Add rules module: security and antipattern detection rules
This commit is contained in:
328
src/rules/antipatterns.py
Normal file
328
src/rules/antipatterns.py
Normal file
@@ -0,0 +1,328 @@
|
||||
"""Anti-pattern detection rules."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import tree_sitter
|
||||
|
||||
from src.analyzers.base import (
|
||||
Analyzer,
|
||||
Finding,
|
||||
FindingCategory,
|
||||
SeverityLevel,
|
||||
)
|
||||
|
||||
|
||||
class ExceptionSwallowAnalyzer(Analyzer):
|
||||
"""Detect try-catch that swallows exceptions."""
|
||||
|
||||
def rule_id(self) -> str:
|
||||
return "antipattern.exception_swallow"
|
||||
|
||||
def rule_name(self) -> str:
|
||||
return "Exception Swallowing Detection"
|
||||
|
||||
def severity(self) -> SeverityLevel:
|
||||
return SeverityLevel.MEDIUM
|
||||
|
||||
def category(self) -> FindingCategory:
|
||||
return FindingCategory.ANTIPATTERN
|
||||
|
||||
def analyze(
|
||||
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||
) -> list[Finding]:
|
||||
findings = []
|
||||
try_blocks = self._get_try_blocks(tree.root_node)
|
||||
|
||||
for try_block in try_blocks:
|
||||
handler = self._get_exception_handler(try_block)
|
||||
if handler and self._is_swallowing(handler, source_code):
|
||||
line = self._get_line_number(try_block, source_code)
|
||||
findings.append(
|
||||
Finding(
|
||||
rule_id=self.rule_id(),
|
||||
rule_name=self.rule_name(),
|
||||
severity=self.severity(),
|
||||
category=self.category(),
|
||||
message="Empty except clause that swallows exceptions",
|
||||
suggestion="At minimum, log the exception or re-raise it",
|
||||
file_path=file_path,
|
||||
line_number=line,
|
||||
column=self._get_column(try_block),
|
||||
node=try_block,
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
def _get_try_blocks(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||
blocks = []
|
||||
if hasattr(node, "type") and node.type in {"try_statement", "try_clause"}:
|
||||
blocks.append(node)
|
||||
if hasattr(node, "children"):
|
||||
for child in node.children:
|
||||
blocks.extend(self._get_try_blocks(child))
|
||||
return blocks
|
||||
|
||||
def _get_exception_handler(self, try_node: tree_sitter.Node) -> Optional[tree_sitter.Node]:
|
||||
if hasattr(try_node, "children"):
|
||||
for child in try_node.children:
|
||||
if hasattr(child, "type") and child.type in {"except_clause", "except_handler"}:
|
||||
return child
|
||||
return None
|
||||
|
||||
def _is_swallowing(self, handler: tree_sitter.Node, source_code: str) -> bool:
|
||||
handler_text = self._get_node_text(handler, source_code)
|
||||
empty_patterns = [
|
||||
r"except\s*\([^)]*\)\s*:\s*(pass|$)",
|
||||
r"except\s*:\s*(pass|$)",
|
||||
]
|
||||
return any(re.search(p, handler_text) for p in empty_patterns)
|
||||
|
||||
def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str:
|
||||
if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
|
||||
return source_code[node.start_byte:node.end_byte]
|
||||
return ""
|
||||
|
||||
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||
lines = source_code.split("\n")
|
||||
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||
pos = 0
|
||||
for line_num, line_text in enumerate(lines, 1):
|
||||
if pos + len(line_text) >= start_byte:
|
||||
return line_num
|
||||
pos += len(line_text) + 1
|
||||
return 1
|
||||
|
||||
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||
return node.start_column if hasattr(node, "start_column") else 0
|
||||
|
||||
|
||||
class MagicNumberAnalyzer(Analyzer):
|
||||
"""Detect magic numbers in code."""
|
||||
|
||||
MAGIC_THRESHOLD = 5
|
||||
|
||||
def rule_id(self) -> str:
|
||||
return "antipattern.magic_number"
|
||||
|
||||
def rule_name(self) -> str:
|
||||
return "Magic Number Detection"
|
||||
|
||||
def severity(self) -> SeverityLevel:
|
||||
return SeverityLevel.LOW
|
||||
|
||||
def category(self) -> FindingCategory:
|
||||
return FindingCategory.ANTIPATTERN
|
||||
|
||||
def analyze(
|
||||
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||
) -> list[Finding]:
|
||||
findings = []
|
||||
numbers = self._get_numbers(tree.root_node)
|
||||
|
||||
for num in numbers:
|
||||
if self._is_magic(num, source_code):
|
||||
line = self._get_line_number(num, source_code)
|
||||
findings.append(
|
||||
Finding(
|
||||
rule_id=self.rule_id(),
|
||||
rule_name=self.rule_name(),
|
||||
severity=self.severity(),
|
||||
category=self.category(),
|
||||
message=f"Magic number detected: {self._get_number_value(num, source_code)}",
|
||||
suggestion="Define as a named constant for better readability",
|
||||
file_path=file_path,
|
||||
line_number=line,
|
||||
column=self._get_column(num),
|
||||
node=num,
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
def _get_numbers(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||
numbers = []
|
||||
if hasattr(node, "type") and node.type == "integer":
|
||||
numbers.append(node)
|
||||
if hasattr(node, "children"):
|
||||
for child in node.children:
|
||||
numbers.extend(self._get_numbers(child))
|
||||
return numbers
|
||||
|
||||
def _is_magic(self, node: tree_sitter.Node, source_code: str) -> bool:
|
||||
value = self._get_number_value(node, source_code)
|
||||
try:
|
||||
num = int(value)
|
||||
return abs(num) > self.MAGIC_THRESHOLD and num not in {-1, 0, 1}
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _get_number_value(self, node: tree_sitter.Node, source_code: str) -> str:
|
||||
if hasattr(node, "text"):
|
||||
text = node.text
|
||||
return text.decode() if isinstance(text, bytes) else str(text)
|
||||
return ""
|
||||
|
||||
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||
lines = source_code.split("\n")
|
||||
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||
pos = 0
|
||||
for line_num, line_text in enumerate(lines, 1):
|
||||
if pos + len(line_text) >= start_byte:
|
||||
return line_num
|
||||
pos += len(line_text) + 1
|
||||
return 1
|
||||
|
||||
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||
return node.start_column if hasattr(node, "start_column") else 0
|
||||
|
||||
|
||||
class DeepNestingAnalyzer(Analyzer):
|
||||
"""Detect deeply nested code blocks."""
|
||||
|
||||
MAX_NESTING = 4
|
||||
|
||||
def rule_id(self) -> str:
|
||||
return "antipattern.deep_nesting"
|
||||
|
||||
def rule_name(self) -> str:
|
||||
return "Deep Nesting Detection"
|
||||
|
||||
def severity(self) -> SeverityLevel:
|
||||
return SeverityLevel.MEDIUM
|
||||
|
||||
def category(self) -> FindingCategory:
|
||||
return FindingCategory.ANTIPATTERN
|
||||
|
||||
def analyze(
|
||||
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||
) -> list[Finding]:
|
||||
findings = []
|
||||
nesting_info = self._check_nesting(tree.root_node, 0)
|
||||
|
||||
for line_num, depth in nesting_info.items():
|
||||
if depth > self.MAX_NESTING:
|
||||
findings.append(
|
||||
Finding(
|
||||
rule_id=self.rule_id(),
|
||||
rule_name=self.rule_name(),
|
||||
severity=self.severity(),
|
||||
category=self.category(),
|
||||
message=f"Code nested {depth} levels deep (max: {self.MAX_NESTING})",
|
||||
suggestion="Consider refactoring with early returns or extracting functions",
|
||||
file_path=file_path,
|
||||
line_number=line_num,
|
||||
column=0,
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
def _check_nesting(
|
||||
self, node: tree_sitter.Node, current_depth: int
|
||||
) -> dict[int, int]:
|
||||
line_depths = {}
|
||||
|
||||
nesting_types = {
|
||||
"if_statement", "elif_clause", "else_clause", "for_statement",
|
||||
"while_statement", "try_statement", "except_clause", "with_statement",
|
||||
"function_definition", "class_definition",
|
||||
}
|
||||
|
||||
if hasattr(node, "type") and node.type in nesting_types:
|
||||
current_depth += 1
|
||||
|
||||
if hasattr(node, "start_line") and current_depth > self.MAX_NESTING:
|
||||
line_depths[node.start_line] = current_depth
|
||||
|
||||
if hasattr(node, "children"):
|
||||
for child in node.children:
|
||||
child_depths = self._check_nesting(child, current_depth)
|
||||
line_depths.update(child_depths)
|
||||
|
||||
return line_depths
|
||||
|
||||
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||
lines = source_code.split("\n")
|
||||
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||
pos = 0
|
||||
for line_num, line_text in enumerate(lines, 1):
|
||||
if pos + len(line_text) >= start_byte:
|
||||
return line_num
|
||||
pos += len(line_text) + 1
|
||||
return 1
|
||||
|
||||
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||
return node.start_column if hasattr(node, "start_column") else 0
|
||||
|
||||
|
||||
class LongFunctionAnalyzer(Analyzer):
|
||||
"""Detect functions that are too long."""
|
||||
|
||||
MAX_LINES = 50
|
||||
|
||||
def rule_id(self) -> str:
|
||||
return "antipattern.long_function"
|
||||
|
||||
def rule_name(self) -> str:
|
||||
return "Long Function Detection"
|
||||
|
||||
def severity(self) -> SeverityLevel:
|
||||
return SeverityLevel.MEDIUM
|
||||
|
||||
def category(self) -> FindingCategory:
|
||||
return FindingCategory.ANTIPATTERN
|
||||
|
||||
def analyze(
|
||||
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||
) -> list[Finding]:
|
||||
findings = []
|
||||
functions = self._get_functions(tree.root_node)
|
||||
|
||||
for func in functions:
|
||||
lines = self._get_function_lines(func, source_code)
|
||||
if lines > self.MAX_LINES:
|
||||
line = self._get_line_number(func, source_code)
|
||||
findings.append(
|
||||
Finding(
|
||||
rule_id=self.rule_id(),
|
||||
rule_name=self.rule_name(),
|
||||
severity=self.severity(),
|
||||
category=self.category(),
|
||||
message=f"Function is {lines} lines long (max: {self.MAX_LINES})",
|
||||
suggestion="Consider splitting into smaller, focused functions",
|
||||
file_path=file_path,
|
||||
line_number=line,
|
||||
column=self._get_column(func),
|
||||
node=func,
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
def _get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||
functions = []
|
||||
if hasattr(node, "type") and node.type in {
|
||||
"function_definition", "async_function_definition",
|
||||
"function_declaration", "method_definition",
|
||||
}:
|
||||
functions.append(node)
|
||||
if hasattr(node, "children"):
|
||||
for child in node.children:
|
||||
functions.extend(self._get_functions(child))
|
||||
return functions
|
||||
|
||||
def _get_function_lines(self, func: tree_sitter.Node, source_code: str) -> int:
|
||||
if hasattr(func, "end_byte") and hasattr(func, "start_byte"):
|
||||
return source_code[func.start_byte:func.end_byte].count("\n") + 1
|
||||
return 0
|
||||
|
||||
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||
lines = source_code.split("\n")
|
||||
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||
pos = 0
|
||||
for line_num, line_text in enumerate(lines, 1):
|
||||
if pos + len(line_text) >= start_byte:
|
||||
return line_num
|
||||
pos += len(line_text) + 1
|
||||
return 1
|
||||
|
||||
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||
return node.start_column if hasattr(node, "start_column") else 0
|
||||
Reference in New Issue
Block a user