Add rules module: security and antipattern detection rules
Some checks failed
CI / test (push) Failing after 11s
CI / build (push) Has been skipped

This commit is contained in:
2026-01-29 23:09:45 +00:00
parent d32059b3fc
commit 03ae09ecaa

328
src/rules/antipatterns.py Normal file
View File

@@ -0,0 +1,328 @@
"""Anti-pattern detection rules."""
import re
from pathlib import Path
from typing import Optional
import tree_sitter
from src.analyzers.base import (
Analyzer,
Finding,
FindingCategory,
SeverityLevel,
)
class ExceptionSwallowAnalyzer(Analyzer):
"""Detect try-catch that swallows exceptions."""
def rule_id(self) -> str:
return "antipattern.exception_swallow"
def rule_name(self) -> str:
return "Exception Swallowing Detection"
def severity(self) -> SeverityLevel:
return SeverityLevel.MEDIUM
def category(self) -> FindingCategory:
return FindingCategory.ANTIPATTERN
def analyze(
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
) -> list[Finding]:
findings = []
try_blocks = self._get_try_blocks(tree.root_node)
for try_block in try_blocks:
handler = self._get_exception_handler(try_block)
if handler and self._is_swallowing(handler, source_code):
line = self._get_line_number(try_block, source_code)
findings.append(
Finding(
rule_id=self.rule_id(),
rule_name=self.rule_name(),
severity=self.severity(),
category=self.category(),
message="Empty except clause that swallows exceptions",
suggestion="At minimum, log the exception or re-raise it",
file_path=file_path,
line_number=line,
column=self._get_column(try_block),
node=try_block,
)
)
return findings
def _get_try_blocks(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
blocks = []
if hasattr(node, "type") and node.type in {"try_statement", "try_clause"}:
blocks.append(node)
if hasattr(node, "children"):
for child in node.children:
blocks.extend(self._get_try_blocks(child))
return blocks
def _get_exception_handler(self, try_node: tree_sitter.Node) -> Optional[tree_sitter.Node]:
if hasattr(try_node, "children"):
for child in try_node.children:
if hasattr(child, "type") and child.type in {"except_clause", "except_handler"}:
return child
return None
def _is_swallowing(self, handler: tree_sitter.Node, source_code: str) -> bool:
handler_text = self._get_node_text(handler, source_code)
empty_patterns = [
r"except\s*\([^)]*\)\s*:\s*(pass|$)",
r"except\s*:\s*(pass|$)",
]
return any(re.search(p, handler_text) for p in empty_patterns)
def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str:
if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
return source_code[node.start_byte:node.end_byte]
return ""
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
lines = source_code.split("\n")
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
pos = 0
for line_num, line_text in enumerate(lines, 1):
if pos + len(line_text) >= start_byte:
return line_num
pos += len(line_text) + 1
return 1
def _get_column(self, node: tree_sitter.Node) -> int:
return node.start_column if hasattr(node, "start_column") else 0
class MagicNumberAnalyzer(Analyzer):
"""Detect magic numbers in code."""
MAGIC_THRESHOLD = 5
def rule_id(self) -> str:
return "antipattern.magic_number"
def rule_name(self) -> str:
return "Magic Number Detection"
def severity(self) -> SeverityLevel:
return SeverityLevel.LOW
def category(self) -> FindingCategory:
return FindingCategory.ANTIPATTERN
def analyze(
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
) -> list[Finding]:
findings = []
numbers = self._get_numbers(tree.root_node)
for num in numbers:
if self._is_magic(num, source_code):
line = self._get_line_number(num, source_code)
findings.append(
Finding(
rule_id=self.rule_id(),
rule_name=self.rule_name(),
severity=self.severity(),
category=self.category(),
message=f"Magic number detected: {self._get_number_value(num, source_code)}",
suggestion="Define as a named constant for better readability",
file_path=file_path,
line_number=line,
column=self._get_column(num),
node=num,
)
)
return findings
def _get_numbers(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
numbers = []
if hasattr(node, "type") and node.type == "integer":
numbers.append(node)
if hasattr(node, "children"):
for child in node.children:
numbers.extend(self._get_numbers(child))
return numbers
def _is_magic(self, node: tree_sitter.Node, source_code: str) -> bool:
value = self._get_number_value(node, source_code)
try:
num = int(value)
return abs(num) > self.MAGIC_THRESHOLD and num not in {-1, 0, 1}
except ValueError:
return False
def _get_number_value(self, node: tree_sitter.Node, source_code: str) -> str:
if hasattr(node, "text"):
text = node.text
return text.decode() if isinstance(text, bytes) else str(text)
return ""
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
lines = source_code.split("\n")
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
pos = 0
for line_num, line_text in enumerate(lines, 1):
if pos + len(line_text) >= start_byte:
return line_num
pos += len(line_text) + 1
return 1
def _get_column(self, node: tree_sitter.Node) -> int:
return node.start_column if hasattr(node, "start_column") else 0
class DeepNestingAnalyzer(Analyzer):
"""Detect deeply nested code blocks."""
MAX_NESTING = 4
def rule_id(self) -> str:
return "antipattern.deep_nesting"
def rule_name(self) -> str:
return "Deep Nesting Detection"
def severity(self) -> SeverityLevel:
return SeverityLevel.MEDIUM
def category(self) -> FindingCategory:
return FindingCategory.ANTIPATTERN
def analyze(
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
) -> list[Finding]:
findings = []
nesting_info = self._check_nesting(tree.root_node, 0)
for line_num, depth in nesting_info.items():
if depth > self.MAX_NESTING:
findings.append(
Finding(
rule_id=self.rule_id(),
rule_name=self.rule_name(),
severity=self.severity(),
category=self.category(),
message=f"Code nested {depth} levels deep (max: {self.MAX_NESTING})",
suggestion="Consider refactoring with early returns or extracting functions",
file_path=file_path,
line_number=line_num,
column=0,
)
)
return findings
def _check_nesting(
self, node: tree_sitter.Node, current_depth: int
) -> dict[int, int]:
line_depths = {}
nesting_types = {
"if_statement", "elif_clause", "else_clause", "for_statement",
"while_statement", "try_statement", "except_clause", "with_statement",
"function_definition", "class_definition",
}
if hasattr(node, "type") and node.type in nesting_types:
current_depth += 1
if hasattr(node, "start_line") and current_depth > self.MAX_NESTING:
line_depths[node.start_line] = current_depth
if hasattr(node, "children"):
for child in node.children:
child_depths = self._check_nesting(child, current_depth)
line_depths.update(child_depths)
return line_depths
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
lines = source_code.split("\n")
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
pos = 0
for line_num, line_text in enumerate(lines, 1):
if pos + len(line_text) >= start_byte:
return line_num
pos += len(line_text) + 1
return 1
def _get_column(self, node: tree_sitter.Node) -> int:
return node.start_column if hasattr(node, "start_column") else 0
class LongFunctionAnalyzer(Analyzer):
"""Detect functions that are too long."""
MAX_LINES = 50
def rule_id(self) -> str:
return "antipattern.long_function"
def rule_name(self) -> str:
return "Long Function Detection"
def severity(self) -> SeverityLevel:
return SeverityLevel.MEDIUM
def category(self) -> FindingCategory:
return FindingCategory.ANTIPATTERN
def analyze(
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
) -> list[Finding]:
findings = []
functions = self._get_functions(tree.root_node)
for func in functions:
lines = self._get_function_lines(func, source_code)
if lines > self.MAX_LINES:
line = self._get_line_number(func, source_code)
findings.append(
Finding(
rule_id=self.rule_id(),
rule_name=self.rule_name(),
severity=self.severity(),
category=self.category(),
message=f"Function is {lines} lines long (max: {self.MAX_LINES})",
suggestion="Consider splitting into smaller, focused functions",
file_path=file_path,
line_number=line,
column=self._get_column(func),
node=func,
)
)
return findings
def _get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
functions = []
if hasattr(node, "type") and node.type in {
"function_definition", "async_function_definition",
"function_declaration", "method_definition",
}:
functions.append(node)
if hasattr(node, "children"):
for child in node.children:
functions.extend(self._get_functions(child))
return functions
def _get_function_lines(self, func: tree_sitter.Node, source_code: str) -> int:
if hasattr(func, "end_byte") and hasattr(func, "start_byte"):
return source_code[func.start_byte:func.end_byte].count("\n") + 1
return 0
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
lines = source_code.split("\n")
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
pos = 0
for line_num, line_text in enumerate(lines, 1):
if pos + len(line_text) >= start_byte:
return line_num
pos += len(line_text) + 1
return 1
def _get_column(self, node: tree_sitter.Node) -> int:
return node.start_column if hasattr(node, "start_column") else 0