This commit is contained in:
139
src/code_privacy_shield/patterns.py
Normal file
139
src/code_privacy_shield/patterns.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import re
|
||||
from typing import Dict, List, Pattern, Tuple
|
||||
|
||||
|
||||
class PatternLibrary:
|
||||
API_KEY_PATTERNS = [
|
||||
(r"(?i)(api[_-]?key['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "API Key"),
|
||||
(r"(?i)(secret['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Secret"),
|
||||
(r"(?i)(token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Token"),
|
||||
(r"(?i)(auth[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Auth Token"),
|
||||
(r"sk-[a-zA-Z0-9]{20,}", "OpenAI API Key"),
|
||||
(r"sk-proj-[a-zA-Z0-9_-]{20,}", "OpenAI Project Key"),
|
||||
(r"(?i)(ghp_|gho_|ghu_|ghs_|ghr_)[a-zA-Z0-9]{36,}", "GitHub Token"),
|
||||
(r"(?i)(github[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{30,})", "GitHub Token"),
|
||||
(r"(?i)AIza[0-9A-Za-z\\-_]{35}", "Google API Key"),
|
||||
(r"(?i)(firebase['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Firebase Key"),
|
||||
(r"(?i)(aws[_-]?access[_-]?key[_-]?id['\"]?\s*[:=]\s*['\"]?)([A-Z0-9]{20})", "AWS Access Key ID"),
|
||||
(r"(?i)(aws[_-]?secret[_-]?access[_-]?key['\"]?\s*[:=]\s*['\"]?)([A-Za-z0-9/+=]{40})", "AWS Secret Key"),
|
||||
(r"(?i)(slack[_-]?token['\"]?\s*[:=]\s*['\"]?)(xox[baprs]-([0-9a-zA-Z]{10,48})?)", "Slack Token"),
|
||||
(r"(?i)(stripe[_-]?key['\"]?\s*[:=]\s*['\"]?)(sk_live_[0-9a-zA-Z]{24,})", "Stripe Secret Key"),
|
||||
(r"(?i)(stripe[_-]?pub[_-]?key['\"]?\s*[:=]\s*['\"]?)(pk_live_[0-9a-zA-Z]{24,})", "Stripe Public Key"),
|
||||
(r"(?i)(sendgrid['\"]?\s*[:=]\s*['\"]?)(SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43})", "SendGrid API Key"),
|
||||
(r"(?i)(twilio['\"]?\s*[:=]\s*['\"]?)(SK[0-9a-f]{32})", "Twilio API Key"),
|
||||
(r"(?i)(twilio[_-]?auth[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9]{32})", "Twilio Auth Token"),
|
||||
(r"(?i)(heroku[_-]?api[_-]?key['\"]?\s*[:=]\s*['\"]?)([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})", "Heroku API Key"),
|
||||
(r"(?i)(new[_-]?relic['\"]?\s*[:=]\s*['\"]?)(NRRA-[a-zA-Z0-9]{32})", "New Relic API Key"),
|
||||
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN RSA PRIVATE KEY-----)", "RSA Private Key Header"),
|
||||
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN EC PRIVATE KEY-----)", "EC Private Key Header"),
|
||||
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN PRIVATE KEY-----)", "Private Key Header"),
|
||||
]
|
||||
|
||||
PII_PATTERNS = [
|
||||
(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "Email Address"),
|
||||
(r"\b(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b", "Phone Number"),
|
||||
(r"\b\d{3}-\d{2}-\d{4}\b", "SSN"),
|
||||
(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b", "Credit Card"),
|
||||
(r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14})\b", "Credit Card (Visa/Mastercard)"),
|
||||
(r"\b(?:3[47][0-9]{13})\b", "Credit Card (Amex)"),
|
||||
(r"(?i)(name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)", "Full Name"),
|
||||
(r"(?i)(first[_-]?name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+)", "First Name"),
|
||||
(r"(?i)(last[_-]?name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+)", "Last Name"),
|
||||
(r"(?i)(address['\"]?\s*[:=]\s*['\"]?)([0-9]{1,5}\s+[A-Za-z0-9\s,]+(?:\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl))?)", "Street Address"),
|
||||
(r"\b[0-9]{5}(?:-[0-9]{4})?\b", "ZIP Code"),
|
||||
(r"(?i)(password['\"]?\s*[:=]\s*['\"]?)([^\s'\"]{8,})", "Password"),
|
||||
(r"(?i)(username['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_.-]{3,})", "Username"),
|
||||
(r"(?i)(user[_-]?id['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{8,})", "User ID"),
|
||||
]
|
||||
|
||||
DATABASE_PATTERNS = [
|
||||
(r"(?i)(postgresql://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "PostgreSQL Connection"),
|
||||
(r"(?i)(postgres://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "PostgreSQL Connection (alt)"),
|
||||
(r"(?i)(mysql://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "MySQL Connection"),
|
||||
(r"(?i)(mysql://[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "MySQL Connection (no password)"),
|
||||
(r"(?i)(mongodb(\+srv)?://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+(/\w+)?)", "MongoDB Connection"),
|
||||
(r"(?i)(redis://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+(/\d+)?)", "Redis Connection"),
|
||||
(r"(?i)(rediss://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+(/\d+)?)", "Redis Connection (SSL)"),
|
||||
(r"(?i)(sqlite:///[\w/.-]+\.db)", "SQLite Connection"),
|
||||
(r"(?i)(sqlserver://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "SQL Server Connection"),
|
||||
(r"(?i)(oracle://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "Oracle Connection"),
|
||||
(r"(?i)(db2://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "DB2 Connection"),
|
||||
(r"(?i)(cockroachdb://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "CockroachDB Connection"),
|
||||
(r"(?i)(cassandra://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "Cassandra Connection"),
|
||||
(r"(?i)(connection[_-]?string['\"]?\s*[:=]\s*['\"]?)([^'\"]+)", "Generic Connection String"),
|
||||
]
|
||||
|
||||
ENV_VAR_PATTERNS = [
|
||||
(r"os\.environ(?:\[|\.get\()\s*['\"]([A-Z0-9_]+)['\"]", "Environment Variable Access"),
|
||||
(r"os\.getenv\s*\(\s*['\"]([A-Z0-9_]+)['\"]\s*\)", "Environment Variable Access (getenv)"),
|
||||
(r"dotenv\.load\(['\"]([^'\")]+)['\"]\)", "Dotenv File Load"),
|
||||
(r"from dotenv import.*['\"]([^'\")]+)['\"]", "Dotenv Import"),
|
||||
(r"(?i)(export\s+[A-Z0-9_]+=)", "Shell Export Statement"),
|
||||
(r"(?i)([A-Z0-9_]+=)", "Environment Variable Assignment"),
|
||||
]
|
||||
|
||||
IP_ADDRESS_PATTERNS = [
|
||||
(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b", "IPv4 Address"),
|
||||
(r"(?i)(host['\"]?\s*[:=]\s*['\"]?)((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))", "Host IP Address"),
|
||||
]
|
||||
|
||||
AUTHORIZATION_PATTERNS = [
|
||||
(r"(?i)(Bearer\s+)([a-zA-Z0-9_-]{20,})", "Bearer Token"),
|
||||
(r"(?i)(Basic\s+)([a-zA-Z0-9+/=]{20,})", "Basic Auth Header"),
|
||||
(r"(?i)(Authorization:\s*)(Bearer\s+[a-zA-Z0-9_-]{20,})", "Authorization Header"),
|
||||
(r"(?i)(auth[_-]?header['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Auth Header Value"),
|
||||
(r"(?i)(x-api-key['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "X-API-Key Header"),
|
||||
(r"(?i)(api[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "API Token"),
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self._compiled_patterns: Dict[str, List[Tuple[Pattern, str]]] = {}
|
||||
|
||||
def _compile_patterns(self, patterns: List[Tuple[str, str]]) -> List[Tuple[Pattern, str]]:
|
||||
compiled = []
|
||||
for pattern, name in patterns:
|
||||
try:
|
||||
compiled.append((re.compile(pattern, re.IGNORECASE), name))
|
||||
except re.error:
|
||||
continue
|
||||
return compiled
|
||||
|
||||
def get_api_key_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "api_keys" not in self._compiled_patterns:
|
||||
self._compiled_patterns["api_keys"] = self._compile_patterns(self.API_KEY_PATTERNS)
|
||||
return self._compiled_patterns["api_keys"]
|
||||
|
||||
def get_pii_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "pii" not in self._compiled_patterns:
|
||||
self._compiled_patterns["pii"] = self._compile_patterns(self.PII_PATTERNS)
|
||||
return self._compiled_patterns["pii"]
|
||||
|
||||
def get_database_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "database" not in self._compiled_patterns:
|
||||
self._compiled_patterns["database"] = self._compile_patterns(self.DATABASE_PATTERNS)
|
||||
return self._compiled_patterns["database"]
|
||||
|
||||
def get_env_var_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "env_var" not in self._compiled_patterns:
|
||||
self._compiled_patterns["env_var"] = self._compile_patterns(self.ENV_VAR_PATTERNS)
|
||||
return self._compiled_patterns["env_var"]
|
||||
|
||||
def get_ip_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "ip" not in self._compiled_patterns:
|
||||
self._compiled_patterns["ip"] = self._compile_patterns(self.IP_ADDRESS_PATTERNS)
|
||||
return self._compiled_patterns["ip"]
|
||||
|
||||
def get_authorization_patterns(self) -> List[Tuple[Pattern, str]]:
|
||||
if "authorization" not in self._compiled_patterns:
|
||||
self._compiled_patterns["authorization"] = self._compile_patterns(self.AUTHORIZATION_PATTERNS)
|
||||
return self._compiled_patterns["authorization"]
|
||||
|
||||
def get_all_patterns(self) -> Dict[str, List[Tuple[Pattern, str]]]:
|
||||
return {
|
||||
"api_keys": self.get_api_key_patterns(),
|
||||
"pii": self.get_pii_patterns(),
|
||||
"database": self.get_database_patterns(),
|
||||
"env_var": self.get_env_var_patterns(),
|
||||
"ip": self.get_ip_patterns(),
|
||||
"authorization": self.get_authorization_patterns(),
|
||||
}
|
||||
Reference in New Issue
Block a user