From 03ae09ecaa04c8108e7ba9b16be82b1ee29a7378 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 23:09:45 +0000 Subject: [PATCH] Add rules module: security and antipattern detection rules --- src/rules/antipatterns.py | 328 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 src/rules/antipatterns.py diff --git a/src/rules/antipatterns.py b/src/rules/antipatterns.py new file mode 100644 index 0000000..ed7d632 --- /dev/null +++ b/src/rules/antipatterns.py @@ -0,0 +1,328 @@ +"""Anti-pattern detection rules.""" + +import re +from pathlib import Path +from typing import Optional +import tree_sitter + +from src.analyzers.base import ( + Analyzer, + Finding, + FindingCategory, + SeverityLevel, +) + + +class ExceptionSwallowAnalyzer(Analyzer): + """Detect try-catch that swallows exceptions.""" + + def rule_id(self) -> str: + return "antipattern.exception_swallow" + + def rule_name(self) -> str: + return "Exception Swallowing Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.MEDIUM + + def category(self) -> FindingCategory: + return FindingCategory.ANTIPATTERN + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + try_blocks = self._get_try_blocks(tree.root_node) + + for try_block in try_blocks: + handler = self._get_exception_handler(try_block) + if handler and self._is_swallowing(handler, source_code): + line = self._get_line_number(try_block, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message="Empty except clause that swallows exceptions", + suggestion="At minimum, log the exception or re-raise it", + file_path=file_path, + line_number=line, + column=self._get_column(try_block), + node=try_block, + ) + ) + return findings + + def _get_try_blocks(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + blocks = [] + if hasattr(node, "type") and node.type in {"try_statement", "try_clause"}: + blocks.append(node) + if hasattr(node, "children"): + for child in node.children: + blocks.extend(self._get_try_blocks(child)) + return blocks + + def _get_exception_handler(self, try_node: tree_sitter.Node) -> Optional[tree_sitter.Node]: + if hasattr(try_node, "children"): + for child in try_node.children: + if hasattr(child, "type") and child.type in {"except_clause", "except_handler"}: + return child + return None + + def _is_swallowing(self, handler: tree_sitter.Node, source_code: str) -> bool: + handler_text = self._get_node_text(handler, source_code) + empty_patterns = [ + r"except\s*\([^)]*\)\s*:\s*(pass|$)", + r"except\s*:\s*(pass|$)", + ] + return any(re.search(p, handler_text) for p in empty_patterns) + + def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str: + if hasattr(node, "start_byte") and hasattr(node, "end_byte"): + return source_code[node.start_byte:node.end_byte] + return "" + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0 + + +class MagicNumberAnalyzer(Analyzer): + """Detect magic numbers in code.""" + + MAGIC_THRESHOLD = 5 + + def rule_id(self) -> str: + return "antipattern.magic_number" + + def rule_name(self) -> str: + return "Magic Number Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.LOW + + def category(self) -> FindingCategory: + return FindingCategory.ANTIPATTERN + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + numbers = self._get_numbers(tree.root_node) + + for num in numbers: + if self._is_magic(num, source_code): + line = self._get_line_number(num, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message=f"Magic number detected: {self._get_number_value(num, source_code)}", + suggestion="Define as a named constant for better readability", + file_path=file_path, + line_number=line, + column=self._get_column(num), + node=num, + ) + ) + return findings + + def _get_numbers(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + numbers = [] + if hasattr(node, "type") and node.type == "integer": + numbers.append(node) + if hasattr(node, "children"): + for child in node.children: + numbers.extend(self._get_numbers(child)) + return numbers + + def _is_magic(self, node: tree_sitter.Node, source_code: str) -> bool: + value = self._get_number_value(node, source_code) + try: + num = int(value) + return abs(num) > self.MAGIC_THRESHOLD and num not in {-1, 0, 1} + except ValueError: + return False + + def _get_number_value(self, node: tree_sitter.Node, source_code: str) -> str: + if hasattr(node, "text"): + text = node.text + return text.decode() if isinstance(text, bytes) else str(text) + return "" + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0 + + +class DeepNestingAnalyzer(Analyzer): + """Detect deeply nested code blocks.""" + + MAX_NESTING = 4 + + def rule_id(self) -> str: + return "antipattern.deep_nesting" + + def rule_name(self) -> str: + return "Deep Nesting Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.MEDIUM + + def category(self) -> FindingCategory: + return FindingCategory.ANTIPATTERN + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + nesting_info = self._check_nesting(tree.root_node, 0) + + for line_num, depth in nesting_info.items(): + if depth > self.MAX_NESTING: + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message=f"Code nested {depth} levels deep (max: {self.MAX_NESTING})", + suggestion="Consider refactoring with early returns or extracting functions", + file_path=file_path, + line_number=line_num, + column=0, + ) + ) + return findings + + def _check_nesting( + self, node: tree_sitter.Node, current_depth: int + ) -> dict[int, int]: + line_depths = {} + + nesting_types = { + "if_statement", "elif_clause", "else_clause", "for_statement", + "while_statement", "try_statement", "except_clause", "with_statement", + "function_definition", "class_definition", + } + + if hasattr(node, "type") and node.type in nesting_types: + current_depth += 1 + + if hasattr(node, "start_line") and current_depth > self.MAX_NESTING: + line_depths[node.start_line] = current_depth + + if hasattr(node, "children"): + for child in node.children: + child_depths = self._check_nesting(child, current_depth) + line_depths.update(child_depths) + + return line_depths + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0 + + +class LongFunctionAnalyzer(Analyzer): + """Detect functions that are too long.""" + + MAX_LINES = 50 + + def rule_id(self) -> str: + return "antipattern.long_function" + + def rule_name(self) -> str: + return "Long Function Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.MEDIUM + + def category(self) -> FindingCategory: + return FindingCategory.ANTIPATTERN + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + functions = self._get_functions(tree.root_node) + + for func in functions: + lines = self._get_function_lines(func, source_code) + if lines > self.MAX_LINES: + line = self._get_line_number(func, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message=f"Function is {lines} lines long (max: {self.MAX_LINES})", + suggestion="Consider splitting into smaller, focused functions", + file_path=file_path, + line_number=line, + column=self._get_column(func), + node=func, + ) + ) + return findings + + def _get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + functions = [] + if hasattr(node, "type") and node.type in { + "function_definition", "async_function_definition", + "function_declaration", "method_definition", + }: + functions.append(node) + if hasattr(node, "children"): + for child in node.children: + functions.extend(self._get_functions(child)) + return functions + + def _get_function_lines(self, func: tree_sitter.Node, source_code: str) -> int: + if hasattr(func, "end_byte") and hasattr(func, "start_byte"): + return source_code[func.start_byte:func.end_byte].count("\n") + 1 + return 0 + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0