diff --git a/src/rules/secrets.py b/src/rules/secrets.py new file mode 100644 index 0000000..40bb5df --- /dev/null +++ b/src/rules/secrets.py @@ -0,0 +1,119 @@ +"""Hardcoded secret detection rules.""" + +import re +from pathlib import Path +from typing import Optional +import tree_sitter + +from src.analyzers.base import ( + Analyzer, + Finding, + FindingCategory, + SeverityLevel, +) + + +class HardcodedSecretAnalyzer(Analyzer): + """Detect hardcoded secrets in source code.""" + + SECRET_PATTERNS = { + "aws_access_key": ( + r"(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}", + "AWS Access Key", + ), + "aws_secret_key": ( + r"[A-Za-z0-9/+=]{40}", + "AWS Secret Key", + ), + "github_token": ( + r"gh[pousr]_[A-Za-z0-9_]{36,}", + "GitHub Token", + ), + "google_api_key": ( + r"AIza[0-9A-Za-z\\-_]{35}", + "Google API Key", + ), + "slack_token": ( + r"xox[baprs]-([0-9a-zA-Z]{10,48})", + "Slack Token", + ), + "private_key_header": ( + r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----", + "Private Key", + ), + "jwt_token": ( + r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*", + "JWT Token", + ), + "password_assignment": ( + r"(?:password|passwd|pwd|secret|token|api_key|apikey)\s*[:=]\s*['\"][^'\"]+['\"]", + "Password/Secret Assignment", + ), + "database_url": ( + r"(?:mongodb(\+srv)?|postgres|postgresql|mysql|mssql)://[^:]+:[^@]+@", + "Database Connection URL", + ), + } + + def rule_id(self) -> str: + return "secret.hardcoded_secret" + + def rule_name(self) -> str: + return "Hardcoded Secret Detection" + + def severity(self) -> SeverityLevel: + return SeverityLevel.CRITICAL + + def category(self) -> FindingCategory: + return FindingCategory.SECRET + + def analyze( + self, source_code: str, file_path: Path, tree: tree_sitter.Tree + ) -> list[Finding]: + findings = [] + + for pattern_id, (pattern, secret_type) in self.SECRET_PATTERNS.items(): + matches = list(re.finditer(pattern, source_code)) + for match in matches: + if not self._is_false_positive(match.group(), source_code): + line = source_code[: match.start()].count("\n") + 1 + findings.append( + Finding( + rule_id=self.rule_id(), + rule_name=self.rule_name(), + severity=self.severity(), + category=self.category(), + message=f"Potential {secret_type} found in code", + suggestion="Move secret to environment variables or secure vault", + file_path=file_path, + line_number=line, + column=match.start() - source_code[:match.start()].rfind("\n") - 1, + ) + ) + return findings + + def _is_false_positive(self, match: str, source_code: str) -> bool: + false_positive_patterns = [ + r"example\.com", + r"localhost", + r"test", + r"placeholder", + r"your_", + r"<", + r">", + ] + lower_match = match.lower() + return any(fp in lower_match for fp in false_positive_patterns) + + def _get_line_number(self, node: tree_sitter.Node, source_code: str) -> int: + lines = source_code.split("\n") + start_byte = node.start_byte if hasattr(node, "start_byte") else 0 + pos = 0 + for line_num, line_text in enumerate(lines, 1): + if pos + len(line_text) >= start_byte: + return line_num + pos += len(line_text) + 1 + return 1 + + def _get_column(self, node: tree_sitter.Node) -> int: + return node.start_column if hasattr(node, "start_column") else 0