Add rules module: security and antipattern detection rules
This commit is contained in:
328
src/rules/antipatterns.py
Normal file
328
src/rules/antipatterns.py
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
"""Anti-pattern detection rules."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import tree_sitter
|
||||||
|
|
||||||
|
from src.analyzers.base import (
|
||||||
|
Analyzer,
|
||||||
|
Finding,
|
||||||
|
FindingCategory,
|
||||||
|
SeverityLevel,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExceptionSwallowAnalyzer(Analyzer):
|
||||||
|
"""Detect try-catch that swallows exceptions."""
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "antipattern.exception_swallow"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Exception Swallowing Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.MEDIUM
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.ANTIPATTERN
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
try_blocks = self._get_try_blocks(tree.root_node)
|
||||||
|
|
||||||
|
for try_block in try_blocks:
|
||||||
|
handler = self._get_exception_handler(try_block)
|
||||||
|
if handler and self._is_swallowing(handler, source_code):
|
||||||
|
line = self._get_line_number(try_block, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message="Empty except clause that swallows exceptions",
|
||||||
|
suggestion="At minimum, log the exception or re-raise it",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(try_block),
|
||||||
|
node=try_block,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_try_blocks(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
blocks = []
|
||||||
|
if hasattr(node, "type") and node.type in {"try_statement", "try_clause"}:
|
||||||
|
blocks.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
blocks.extend(self._get_try_blocks(child))
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def _get_exception_handler(self, try_node: tree_sitter.Node) -> Optional[tree_sitter.Node]:
|
||||||
|
if hasattr(try_node, "children"):
|
||||||
|
for child in try_node.children:
|
||||||
|
if hasattr(child, "type") and child.type in {"except_clause", "except_handler"}:
|
||||||
|
return child
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _is_swallowing(self, handler: tree_sitter.Node, source_code: str) -> bool:
|
||||||
|
handler_text = self._get_node_text(handler, source_code)
|
||||||
|
empty_patterns = [
|
||||||
|
r"except\s*\([^)]*\)\s*:\s*(pass|$)",
|
||||||
|
r"except\s*:\s*(pass|$)",
|
||||||
|
]
|
||||||
|
return any(re.search(p, handler_text) for p in empty_patterns)
|
||||||
|
|
||||||
|
def _get_node_text(self, node: tree_sitter.Node, source_code: str) -> str:
|
||||||
|
if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
|
||||||
|
return source_code[node.start_byte:node.end_byte]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
|
|
||||||
|
|
||||||
|
class MagicNumberAnalyzer(Analyzer):
|
||||||
|
"""Detect magic numbers in code."""
|
||||||
|
|
||||||
|
MAGIC_THRESHOLD = 5
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "antipattern.magic_number"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Magic Number Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.LOW
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.ANTIPATTERN
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
numbers = self._get_numbers(tree.root_node)
|
||||||
|
|
||||||
|
for num in numbers:
|
||||||
|
if self._is_magic(num, source_code):
|
||||||
|
line = self._get_line_number(num, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message=f"Magic number detected: {self._get_number_value(num, source_code)}",
|
||||||
|
suggestion="Define as a named constant for better readability",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(num),
|
||||||
|
node=num,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_numbers(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
numbers = []
|
||||||
|
if hasattr(node, "type") and node.type == "integer":
|
||||||
|
numbers.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
numbers.extend(self._get_numbers(child))
|
||||||
|
return numbers
|
||||||
|
|
||||||
|
def _is_magic(self, node: tree_sitter.Node, source_code: str) -> bool:
|
||||||
|
value = self._get_number_value(node, source_code)
|
||||||
|
try:
|
||||||
|
num = int(value)
|
||||||
|
return abs(num) > self.MAGIC_THRESHOLD and num not in {-1, 0, 1}
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _get_number_value(self, node: tree_sitter.Node, source_code: str) -> str:
|
||||||
|
if hasattr(node, "text"):
|
||||||
|
text = node.text
|
||||||
|
return text.decode() if isinstance(text, bytes) else str(text)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
|
|
||||||
|
|
||||||
|
class DeepNestingAnalyzer(Analyzer):
|
||||||
|
"""Detect deeply nested code blocks."""
|
||||||
|
|
||||||
|
MAX_NESTING = 4
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "antipattern.deep_nesting"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Deep Nesting Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.MEDIUM
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.ANTIPATTERN
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
nesting_info = self._check_nesting(tree.root_node, 0)
|
||||||
|
|
||||||
|
for line_num, depth in nesting_info.items():
|
||||||
|
if depth > self.MAX_NESTING:
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message=f"Code nested {depth} levels deep (max: {self.MAX_NESTING})",
|
||||||
|
suggestion="Consider refactoring with early returns or extracting functions",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line_num,
|
||||||
|
column=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _check_nesting(
|
||||||
|
self, node: tree_sitter.Node, current_depth: int
|
||||||
|
) -> dict[int, int]:
|
||||||
|
line_depths = {}
|
||||||
|
|
||||||
|
nesting_types = {
|
||||||
|
"if_statement", "elif_clause", "else_clause", "for_statement",
|
||||||
|
"while_statement", "try_statement", "except_clause", "with_statement",
|
||||||
|
"function_definition", "class_definition",
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasattr(node, "type") and node.type in nesting_types:
|
||||||
|
current_depth += 1
|
||||||
|
|
||||||
|
if hasattr(node, "start_line") and current_depth > self.MAX_NESTING:
|
||||||
|
line_depths[node.start_line] = current_depth
|
||||||
|
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
child_depths = self._check_nesting(child, current_depth)
|
||||||
|
line_depths.update(child_depths)
|
||||||
|
|
||||||
|
return line_depths
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
|
|
||||||
|
|
||||||
|
class LongFunctionAnalyzer(Analyzer):
|
||||||
|
"""Detect functions that are too long."""
|
||||||
|
|
||||||
|
MAX_LINES = 50
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "antipattern.long_function"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Long Function Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.MEDIUM
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.ANTIPATTERN
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
functions = self._get_functions(tree.root_node)
|
||||||
|
|
||||||
|
for func in functions:
|
||||||
|
lines = self._get_function_lines(func, source_code)
|
||||||
|
if lines > self.MAX_LINES:
|
||||||
|
line = self._get_line_number(func, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message=f"Function is {lines} lines long (max: {self.MAX_LINES})",
|
||||||
|
suggestion="Consider splitting into smaller, focused functions",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(func),
|
||||||
|
node=func,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
functions = []
|
||||||
|
if hasattr(node, "type") and node.type in {
|
||||||
|
"function_definition", "async_function_definition",
|
||||||
|
"function_declaration", "method_definition",
|
||||||
|
}:
|
||||||
|
functions.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
functions.extend(self._get_functions(child))
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def _get_function_lines(self, func: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
if hasattr(func, "end_byte") and hasattr(func, "start_byte"):
|
||||||
|
return source_code[func.start_byte:func.end_byte].count("\n") + 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
Reference in New Issue
Block a user