diff --git a/src/rules/performance.py b/src/rules/performance.py new file mode 100644 index 0000000..8095a8b --- /dev/null +++ b/src/rules/performance.py @@ -0,0 +1,272 @@ +"""Performance issue detection rules.""" + +import re +from pathlib import Path +from typing import Optional +import tree_sitter + +from src.analyzers.base import ( + Analyzer, + Finding, + FindingCategory, + SeverityLevel, +) + + +class InefficientLoopAnalyzer(Analyzer): + """Detect inefficient loop patterns.""" + + def rule_id(self) -> str: + return "performance.inefficient_loop" + + def rule_name(self) -> str: + return "Inefficient Loop Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.MEDIUM + + def category(self) -> FindingCategory: + return FindingCategory.PERFORMANCE + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + loops = self._get_loops(tree.root_node) + + for loop in loops: + if self._is_inefficient(loop, source_code): + line = self._get_line_number(loop, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message="Inefficient loop pattern detected", + suggestion="Consider using list comprehension or built-in functions", + file_path=file_path, + line_number=line, + column=self._get_column(loop), + node=loop, + ) + ) + return findings + + def _get_loops(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + loops = [] + if hasattr(node, "type") and node.type in {"for_statement", "while_statement", "for_in_statement"}: + loops.append(node) + if hasattr(node, "children"): + for child in node.children: + loops.extend(self._get_loops(child)) + return loops + + def _is_inefficient(self, loop: tree_sitter.Node, source_code: str) -> bool: + loop_text = self._get_node_text(loop, source_code) + inefficient_patterns = [ + r"for\s+\w+\s+in\s+range\s*\(\s*len\s*\(", + r"while\s+True\s*:.*\s+if.*:\s+break", + r"for\s+\w+\s+in\s+.*:\s+\w+\.append", + ] + return any(re.search(p, loop_text) for p in inefficient_patterns) + + def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str: + if hasattr(node, "start_byte") and hasattr(node, "end_byte"): + return source_code[node.start_byte:node.end_byte] + return "" + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0 + + +class RedundantOperationAnalyzer(Analyzer): + """Detect redundant operations.""" + + def rule_id(self) -> str: + return "performance.redundant_operation" + + def rule_name(self) -> str: + return "Redundant Operation Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.LOW + + def category(self) -> FindingCategory: + return FindingCategory.PERFORMANCE + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + calls = self._get_calls(tree.root_node) + + for call in calls: + func_name = self._get_function_name(call) + if func_name in {"list", "str", "dict", "set"}: + args = self._get_arguments(call) + if self._is_redundant(func_name, args): + line = self._get_line_number(call, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message=f"Redundant {func_name}() call detected", + suggestion="Remove unnecessary type conversion", + file_path=file_path, + line_number=line, + column=self._get_column(call), + node=call, + ) + ) + return findings + + def _get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + calls = [] + if hasattr(node, "type") and node.type == "call": + calls.append(node) + if hasattr(node, "children"): + for child in node.children: + calls.extend(self._get_calls(child)) + return calls + + def _get_function_name(self, call: tree_sitter.Node) -> str: + if hasattr(call, "children") and len(call.children) > 0: + func = call.children[0] + if hasattr(func, "text"): + text = func.text + return text.decode() if isinstance(text, bytes) else str(text) + return "" + + def _get_arguments(self, call: tree_sitter.Node) -> list[str]: + args = [] + if hasattr(call, "children"): + for child in call.children: + if hasattr(child, "text"): + text = child.text + args.append(text.decode() if isinstance(text, bytes) else str(text)) + return args + + def _is_redundant(self, func_name: str, args: list[str]) -> bool: + if len(args) == 0: + return True + arg = args[0].lower() + redundant_map = { + "list": ["list(", "[", "list("], + "str": ["str(", "'", '"', "str("], + "dict": ["dict(", "{", "dict("], + "set": ["set(", "{", "set("], + } + return any(arg.startswith(p) for p in redundant_map.get(func_name, [])) + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0 + + +class UnnecessaryCopyAnalyzer(Analyzer): + """Detect unnecessary list copies.""" + + def rule_id(self) -> str: + return "performance.unnecessary_copy" + + def rule_name(self) -> str: + return "Unnecessary Copy Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.LOW + + def category(self) -> FindingCategory: + return FindingCategory.PERFORMANCE + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + copies = self._get_copy_calls(tree.root_node) + + for copy in copies: + if self._is_unnecessary(copy, source_code): + line = self._get_line_number(copy, source_code) + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message="Unnecessary list copy detected", + suggestion="Avoid copying lists when not needed", + file_path=file_path, + line_number=line, + column=self._get_column(copy), + node=copy, + ) + ) + return findings + + def _get_copy_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + calls = [] + if hasattr(node, "type") and node.type == "call": + func_name = self._get_function_name(node) + if "copy" in func_name.lower() or "[:]" in self._get_node_text(node, ""): + calls.append(node) + if hasattr(node, "children"): + for child in node.children: + calls.extend(self._get_copy_calls(child)) + return calls + + def _get_function_name(self, call: tree_sitter.Node) -> str: + if hasattr(call, "children") and len(call.children) > 0: + func = call.children[0] + if hasattr(func, "text"): + text = func.text + return text.decode() if isinstance(text, bytes) else str(text) + return "" + + def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str: + if hasattr(node, "start_byte") and hasattr(node, "end_byte"): + return source_code[node.start_byte:node.end_byte] + return "" + + def _is_unnecessary(self, call: tree_sitter.Node, source_code: str) -> bool: + call_text = self._get_node_text(call, source_code) + unnecessary_patterns = [ + r"list\s*\(\s*list\s*\(", + r"\[:\]\s*\[:\]", + r"copy\s*\(\s*copy\s*\(", + ] + return any(re.search(p, call_text) for p in unnecessary_patterns) + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0