Add performance and secrets detection rules

2026-01-29 23:10:44 +00:00
parent 03ae09ecaa
commit c3617a2802
1 changed files with 272 additions and 0 deletions
--- a/src/rules/performance.py
+++ b/src/rules/performance.py
@@ -0,0 +1,272 @@
+"""Performance issue detection rules."""
+
+import re
+from pathlib import Path
+from typing import Optional
+import tree_sitter
+
+from src.analyzers.base import (
+    Analyzer,
+    Finding,
+    FindingCategory,
+    SeverityLevel,
+)
+
+
+class InefficientLoopAnalyzer(Analyzer):
+    """Detect inefficient loop patterns."""
+
+    def rule_id(self) -> str:
+        return "performance.inefficient_loop"
+
+    def rule_name(self) -> str:
+        return "Inefficient Loop Detection"
+
+    def severity(self) -> SeverityLevel:
+        return SeverityLevel.MEDIUM
+
+    def category(self) -> FindingCategory:
+        return FindingCategory.PERFORMANCE
+
+    def analyze(
+        self, source_code: str, file_path: Path, tree: tree_sitter.Tree
+    ) -> list[Finding]:
+        findings = []
+        loops = self._get_loops(tree.root_node)
+
+        for loop in loops:
+            if self._is_inefficient(loop, source_code):
+                line = self._get_line_number(loop, source_code)
+                findings.append(
+                    Finding(
+                        rule_id=self.rule_id(),
+                        rule_name=self.rule_name(),
+                        severity=self.severity(),
+                        category=self.category(),
+                        message="Inefficient loop pattern detected",
+                        suggestion="Consider using list comprehension or built-in functions",
+                        file_path=file_path,
+                        line_number=line,
+                        column=self._get_column(loop),
+                        node=loop,
+                    )
+                )
+        return findings
+
+    def _get_loops(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
+        loops = []
+        if hasattr(node, "type") and node.type in {"for_statement", "while_statement", "for_in_statement"}:
+            loops.append(node)
+        if hasattr(node, "children"):
+            for child in node.children:
+                loops.extend(self._get_loops(child))
+        return loops
+
+    def _is_inefficient(self, loop: tree_sitter.Node, source_code: str) -> bool:
+        loop_text = self._get_node_text(loop, source_code)
+        inefficient_patterns = [
+            r"for\s+\w+\s+in\s+range\s*\(\s*len\s*\(",
+            r"while\s+True\s*:.*\s+if.*:\s+break",
+            r"for\s+\w+\s+in\s+.*:\s+\w+\.append",
+        ]
+        return any(re.search(p, loop_text) for p in inefficient_patterns)
+
+    def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str:
+        if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
+            return source_code[node.start_byte:node.end_byte]
+        return ""
+
+    def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
+        lines = source_code.split("\n")
+        start_byte = node.start_byte if hasattr(node, "start_byte") else 0
+        pos = 0
+        for line_num, line_text in enumerate(lines, 1):
+            if pos + len(line_text) >= start_byte:
+                return line_num
+            pos += len(line_text) + 1
+        return 1
+
+    def _get_column(self, node: tree_sitter.Node) -> int:
+        return node.start_column if hasattr(node, "start_column") else 0
+
+
+class RedundantOperationAnalyzer(Analyzer):
+    """Detect redundant operations."""
+
+    def rule_id(self) -> str:
+        return "performance.redundant_operation"
+
+    def rule_name(self) -> str:
+        return "Redundant Operation Detection"
+
+    def severity(self) -> SeverityLevel:
+        return SeverityLevel.LOW
+
+    def category(self) -> FindingCategory:
+        return FindingCategory.PERFORMANCE
+
+    def analyze(
+        self, source_code: str, file_path: Path, tree: tree_sitter.Tree
+    ) -> list[Finding]:
+        findings = []
+        calls = self._get_calls(tree.root_node)
+
+        for call in calls:
+            func_name = self._get_function_name(call)
+            if func_name in {"list", "str", "dict", "set"}:
+                args = self._get_arguments(call)
+                if self._is_redundant(func_name, args):
+                    line = self._get_line_number(call, source_code)
+                    findings.append(
+                        Finding(
+                            rule_id=self.rule_id(),
+                            rule_name=self.rule_name(),
+                            severity=self.severity(),
+                            category=self.category(),
+                            message=f"Redundant {func_name}() call detected",
+                            suggestion="Remove unnecessary type conversion",
+                            file_path=file_path,
+                            line_number=line,
+                            column=self._get_column(call),
+                            node=call,
+                        )
+                    )
+        return findings
+
+    def _get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
+        calls = []
+        if hasattr(node, "type") and node.type == "call":
+            calls.append(node)
+        if hasattr(node, "children"):
+            for child in node.children:
+                calls.extend(self._get_calls(child))
+        return calls
+
+    def _get_function_name(self, call: tree_sitter.Node) -> str:
+        if hasattr(call, "children") and len(call.children) > 0:
+            func = call.children[0]
+            if hasattr(func, "text"):
+                text = func.text
+                return text.decode() if isinstance(text, bytes) else str(text)
+        return ""
+
+    def _get_arguments(self, call: tree_sitter.Node) -> list[str]:
+        args = []
+        if hasattr(call, "children"):
+            for child in call.children:
+                if hasattr(child, "text"):
+                    text = child.text
+                    args.append(text.decode() if isinstance(text, bytes) else str(text))
+        return args
+
+    def _is_redundant(self, func_name: str, args: list[str]) -> bool:
+        if len(args) == 0:
+            return True
+        arg = args[0].lower()
+        redundant_map = {
+            "list": ["list(", "[", "list("],
+            "str": ["str(", "'", '"', "str("],
+            "dict": ["dict(", "{", "dict("],
+            "set": ["set(", "{", "set("],
+        }
+        return any(arg.startswith(p) for p in redundant_map.get(func_name, []))
+
+    def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
+        lines = source_code.split("\n")
+        start_byte = node.start_byte if hasattr(node, "start_byte") else 0
+        pos = 0
+        for line_num, line_text in enumerate(lines, 1):
+            if pos + len(line_text) >= start_byte:
+                return line_num
+            pos += len(line_text) + 1
+        return 1
+
+    def _get_column(self, node: tree_sitter.Node) -> int:
+        return node.start_column if hasattr(node, "start_column") else 0
+
+
+class UnnecessaryCopyAnalyzer(Analyzer):
+    """Detect unnecessary list copies."""
+
+    def rule_id(self) -> str:
+        return "performance.unnecessary_copy"
+
+    def rule_name(self) -> str:
+        return "Unnecessary Copy Detection"
+
+    def severity(self) -> SeverityLevel:
+        return SeverityLevel.LOW
+
+    def category(self) -> FindingCategory:
+        return FindingCategory.PERFORMANCE
+
+    def analyze(
+        self, source_code: str, file_path: Path, tree: tree_sitter.Tree
+    ) -> list[Finding]:
+        findings = []
+        copies = self._get_copy_calls(tree.root_node)
+
+        for copy in copies:
+            if self._is_unnecessary(copy, source_code):
+                line = self._get_line_number(copy, source_code)
+                findings.append(
+                    Finding(
+                        rule_id=self.rule_id(),
+                        rule_name=self.rule_name(),
+                        severity=self.severity(),
+                        category=self.category(),
+                        message="Unnecessary list copy detected",
+                        suggestion="Avoid copying lists when not needed",
+                        file_path=file_path,
+                        line_number=line,
+                        column=self._get_column(copy),
+                        node=copy,
+                    )
+                )
+        return findings
+
+    def _get_copy_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
+        calls = []
+        if hasattr(node, "type") and node.type == "call":
+            func_name = self._get_function_name(node)
+            if "copy" in func_name.lower() or "[:]" in self._get_node_text(node, ""):
+                calls.append(node)
+        if hasattr(node, "children"):
+            for child in node.children:
+                calls.extend(self._get_copy_calls(child))
+        return calls
+
+    def _get_function_name(self, call: tree_sitter.Node) -> str:
+        if hasattr(call, "children") and len(call.children) > 0:
+            func = call.children[0]
+            if hasattr(func, "text"):
+                text = func.text
+                return text.decode() if isinstance(text, bytes) else str(text)
+        return ""
+
+    def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str:
+        if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
+            return source_code[node.start_byte:node.end_byte]
+        return ""
+
+    def _is_unnecessary(self, call: tree_sitter.Node, source_code: str) -> bool:
+        call_text = self._get_node_text(call, source_code)
+        unnecessary_patterns = [
+            r"list\s*\(\s*list\s*\(",
+            r"\[:\]\s*\[:\]",
+            r"copy\s*\(\s*copy\s*\(",
+        ]
+        return any(re.search(p, call_text) for p in unnecessary_patterns)
+
+    def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
+        lines = source_code.split("\n")
+        start_byte = node.start_byte if hasattr(node, "start_byte") else 0
+        pos = 0
+        for line_num, line_text in enumerate(lines, 1):
+            if pos + len(line_text) >= start_byte:
+                return line_num
+            pos += len(line_text) + 1
+        return 1
+
+    def _get_column(self, node: tree_sitter.Node) -> int:
+        return node.start_column if hasattr(node, "start_column") else 0