Add rules module: security and antipattern detection rules
This commit is contained in:
289
src/rules/security.py
Normal file
289
src/rules/security.py
Normal file
@@ -0,0 +1,289 @@
|
|||||||
|
"""Security vulnerability detection rules."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import tree_sitter
|
||||||
|
|
||||||
|
from src.analyzers.base import (
|
||||||
|
Analyzer,
|
||||||
|
Finding,
|
||||||
|
FindingCategory,
|
||||||
|
SeverityLevel,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SQLInjectionAnalyzer(Analyzer):
|
||||||
|
"""Detect SQL injection vulnerabilities."""
|
||||||
|
|
||||||
|
DANGEROUS_FUNCTIONS = {
|
||||||
|
"execute": "cursor.execute",
|
||||||
|
"executemany": "cursor.executemany",
|
||||||
|
"raw_query": "db.raw_query",
|
||||||
|
"query": "db.query",
|
||||||
|
}
|
||||||
|
|
||||||
|
SQL_KEYWORDS = {
|
||||||
|
"select", "insert", "update", "delete", "drop", "create", "alter",
|
||||||
|
"union", "and", "or", "where", "from", "table", "database",
|
||||||
|
}
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "security.sql_injection"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "SQL Injection Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.CRITICAL
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.SECURITY
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
calls = self._get_calls(tree.root_node)
|
||||||
|
|
||||||
|
for call in calls:
|
||||||
|
func_name = self._get_function_name(call)
|
||||||
|
if func_name in self.DANGEROUS_FUNCTIONS:
|
||||||
|
args = self._get_arguments(call)
|
||||||
|
for i, arg in enumerate(args):
|
||||||
|
if self._is_dangerous_sql(arg, source_code):
|
||||||
|
line = self._get_line_number(call, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message="Potential SQL injection detected",
|
||||||
|
suggestion="Use parameterized queries instead of string formatting",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(call),
|
||||||
|
node=call,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self._get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def _get_function_name(self, call: tree_sitter.Node) -> str:
|
||||||
|
if hasattr(call, "children") and len(call.children) > 0:
|
||||||
|
func = call.children[0]
|
||||||
|
if hasattr(func, "text"):
|
||||||
|
return func.text.decode() if isinstance(func.text, bytes) else str(func.text)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_arguments(self, call: tree_sitter.Node) -> list[str]:
|
||||||
|
args = []
|
||||||
|
if hasattr(call, "children"):
|
||||||
|
for child in call.children:
|
||||||
|
if hasattr(child, "text"):
|
||||||
|
text = child.text
|
||||||
|
args.append(text.decode() if isinstance(text, bytes) else str(text))
|
||||||
|
return args
|
||||||
|
|
||||||
|
def _is_dangerous_sql(self, arg: str, source_code: str) -> bool:
|
||||||
|
lower_arg = arg.lower()
|
||||||
|
has_concatenation = "+" in arg or "%" in arg or "f\"" in arg or "'" in arg
|
||||||
|
has_sql_keyword = any(kw in lower_arg for kw in self.SQL_KEYWORDS)
|
||||||
|
return has_concatenation and has_sql_keyword
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
line = 1
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
|
|
||||||
|
|
||||||
|
class EvalUsageAnalyzer(Analyzer):
|
||||||
|
"""Detect eval() and exec() usage."""
|
||||||
|
|
||||||
|
DANGEROUS_FUNCTIONS = {"eval", "exec", "execfile", "compile"}
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "security.eval_usage"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Eval/Exec Usage Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.CRITICAL
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.SECURITY
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
calls = self._get_calls(tree.root_node)
|
||||||
|
|
||||||
|
for call in calls:
|
||||||
|
func_name = self._get_function_name(call)
|
||||||
|
if func_name in self.DANGEROUS_FUNCTIONS:
|
||||||
|
line = self._get_line_number(call, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message=f"Dangerous {func_name}() call detected",
|
||||||
|
suggestion="Avoid using eval/exec as they can execute arbitrary code",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(call),
|
||||||
|
node=call,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self._get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def _get_function_name(self, call: tree_sitter.Node) -> str:
|
||||||
|
if hasattr(call, "children") and len(call.children) > 0:
|
||||||
|
func = call.children[0]
|
||||||
|
if hasattr(func, "text"):
|
||||||
|
text = func.text
|
||||||
|
return text.decode() if isinstance(text, bytes) else str(text)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
|
|
||||||
|
|
||||||
|
class PathTraversalAnalyzer(Analyzer):
|
||||||
|
"""Detect path traversal vulnerabilities."""
|
||||||
|
|
||||||
|
DANGEROUS_PATTERNS = [
|
||||||
|
r"\.\./",
|
||||||
|
r"\.\.\\",
|
||||||
|
r"join\s*\(\s*[\"'].*\.\.[\"']",
|
||||||
|
]
|
||||||
|
|
||||||
|
VULNERABLE_FUNCTIONS = {
|
||||||
|
"open": "open()",
|
||||||
|
"file": "file()",
|
||||||
|
"os.path.join": "os.path.join()",
|
||||||
|
"Path": "Path()",
|
||||||
|
}
|
||||||
|
|
||||||
|
def rule_id(self) -> str:
|
||||||
|
return "security.path_traversal"
|
||||||
|
|
||||||
|
def rule_name(self) -> str:
|
||||||
|
return "Path Traversal Detection"
|
||||||
|
|
||||||
|
def severity(self) -> SeverityLevel:
|
||||||
|
return SeverityLevel.HIGH
|
||||||
|
|
||||||
|
def category(self) -> FindingCategory:
|
||||||
|
return FindingCategory.SECURITY
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self, source_code: str, file_path: Path, tree: tree_sitter.Tree
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
calls = self._get_calls(tree.root_node)
|
||||||
|
|
||||||
|
for call in calls:
|
||||||
|
func_name = self._get_function_name(call)
|
||||||
|
if func_name in self.VULNERABLE_FUNCTIONS:
|
||||||
|
args = self._get_arguments(call)
|
||||||
|
for arg in args:
|
||||||
|
if self._contains_path_traversal(arg):
|
||||||
|
line = self._get_line_number(call, source_code)
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_id=self.rule_id(),
|
||||||
|
rule_name=self.rule_name(),
|
||||||
|
severity=self.severity(),
|
||||||
|
category=self.category(),
|
||||||
|
message="Potential path traversal detected",
|
||||||
|
suggestion="Validate and sanitize file paths, use os.path.abspath()",
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line,
|
||||||
|
column=self._get_column(call),
|
||||||
|
node=call,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self._get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def _get_function_name(self, call: tree_sitter.Node) -> str:
|
||||||
|
if hasattr(call, "children") and len(call.children) > 0:
|
||||||
|
func = call.children[0]
|
||||||
|
if hasattr(func, "text"):
|
||||||
|
text = func.text
|
||||||
|
return text.decode() if isinstance(text, bytes) else str(text)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_arguments(self, call: tree_sitter.Node) -> list[str]:
|
||||||
|
args = []
|
||||||
|
if hasattr(call, "children"):
|
||||||
|
for child in call.children:
|
||||||
|
if hasattr(child, "text"):
|
||||||
|
text = child.text
|
||||||
|
args.append(text.decode() if isinstance(text, bytes) else str(text))
|
||||||
|
return args
|
||||||
|
|
||||||
|
def _contains_path_traversal(self, arg: str) -> bool:
|
||||||
|
return any(re.search(pattern, arg) for pattern in self.DANGEROUS_PATTERNS)
|
||||||
|
|
||||||
|
def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int:
|
||||||
|
lines = source_code.split("\n")
|
||||||
|
start_byte = node.start_byte if hasattr(node, "start_byte") else 0
|
||||||
|
pos = 0
|
||||||
|
for line_num, line_text in enumerate(lines, 1):
|
||||||
|
if pos + len(line_text) >= start_byte:
|
||||||
|
return line_num
|
||||||
|
pos += len(line_text) + 1
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _get_column(self, node: tree_sitter.Node) -> int:
|
||||||
|
return node.start_column if hasattr(node, "start_column") else 0
|
||||||
Reference in New Issue
Block a user