Initial upload: Code Privacy Shield v0.1.0
Some checks failed
CI / test (push) Has been cancelled

This commit is contained in:
2026-02-02 20:50:58 +00:00
parent 662baeb13c
commit bc3114422c

View File

@@ -0,0 +1,139 @@
import re
from typing import Dict, List, Pattern, Tuple
class PatternLibrary:
API_KEY_PATTERNS = [
(r"(?i)(api[_-]?key['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "API Key"),
(r"(?i)(secret['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Secret"),
(r"(?i)(token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Token"),
(r"(?i)(auth[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Auth Token"),
(r"sk-[a-zA-Z0-9]{20,}", "OpenAI API Key"),
(r"sk-proj-[a-zA-Z0-9_-]{20,}", "OpenAI Project Key"),
(r"(?i)(ghp_|gho_|ghu_|ghs_|ghr_)[a-zA-Z0-9]{36,}", "GitHub Token"),
(r"(?i)(github[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{30,})", "GitHub Token"),
(r"(?i)AIza[0-9A-Za-z\\-_]{35}", "Google API Key"),
(r"(?i)(firebase['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Firebase Key"),
(r"(?i)(aws[_-]?access[_-]?key[_-]?id['\"]?\s*[:=]\s*['\"]?)([A-Z0-9]{20})", "AWS Access Key ID"),
(r"(?i)(aws[_-]?secret[_-]?access[_-]?key['\"]?\s*[:=]\s*['\"]?)([A-Za-z0-9/+=]{40})", "AWS Secret Key"),
(r"(?i)(slack[_-]?token['\"]?\s*[:=]\s*['\"]?)(xox[baprs]-([0-9a-zA-Z]{10,48})?)", "Slack Token"),
(r"(?i)(stripe[_-]?key['\"]?\s*[:=]\s*['\"]?)(sk_live_[0-9a-zA-Z]{24,})", "Stripe Secret Key"),
(r"(?i)(stripe[_-]?pub[_-]?key['\"]?\s*[:=]\s*['\"]?)(pk_live_[0-9a-zA-Z]{24,})", "Stripe Public Key"),
(r"(?i)(sendgrid['\"]?\s*[:=]\s*['\"]?)(SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43})", "SendGrid API Key"),
(r"(?i)(twilio['\"]?\s*[:=]\s*['\"]?)(SK[0-9a-f]{32})", "Twilio API Key"),
(r"(?i)(twilio[_-]?auth[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9]{32})", "Twilio Auth Token"),
(r"(?i)(heroku[_-]?api[_-]?key['\"]?\s*[:=]\s*['\"]?)([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})", "Heroku API Key"),
(r"(?i)(new[_-]?relic['\"]?\s*[:=]\s*['\"]?)(NRRA-[a-zA-Z0-9]{32})", "New Relic API Key"),
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN RSA PRIVATE KEY-----)", "RSA Private Key Header"),
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN EC PRIVATE KEY-----)", "EC Private Key Header"),
(r"(?i)(private[_-]?key['\"]?\s*[:=]\s*['\"]?)(-----BEGIN PRIVATE KEY-----)", "Private Key Header"),
]
PII_PATTERNS = [
(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "Email Address"),
(r"\b(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b", "Phone Number"),
(r"\b\d{3}-\d{2}-\d{4}\b", "SSN"),
(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b", "Credit Card"),
(r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14})\b", "Credit Card (Visa/Mastercard)"),
(r"\b(?:3[47][0-9]{13})\b", "Credit Card (Amex)"),
(r"(?i)(name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)", "Full Name"),
(r"(?i)(first[_-]?name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+)", "First Name"),
(r"(?i)(last[_-]?name['\"]?\s*[:=]\s*['\"]?)([A-Z][a-z]+)", "Last Name"),
(r"(?i)(address['\"]?\s*[:=]\s*['\"]?)([0-9]{1,5}\s+[A-Za-z0-9\s,]+(?:\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl))?)", "Street Address"),
(r"\b[0-9]{5}(?:-[0-9]{4})?\b", "ZIP Code"),
(r"(?i)(password['\"]?\s*[:=]\s*['\"]?)([^\s'\"]{8,})", "Password"),
(r"(?i)(username['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_.-]{3,})", "Username"),
(r"(?i)(user[_-]?id['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{8,})", "User ID"),
]
DATABASE_PATTERNS = [
(r"(?i)(postgresql://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "PostgreSQL Connection"),
(r"(?i)(postgres://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "PostgreSQL Connection (alt)"),
(r"(?i)(mysql://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "MySQL Connection"),
(r"(?i)(mysql://[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "MySQL Connection (no password)"),
(r"(?i)(mongodb(\+srv)?://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+(/\w+)?)", "MongoDB Connection"),
(r"(?i)(redis://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+(/\d+)?)", "Redis Connection"),
(r"(?i)(rediss://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+(/\d+)?)", "Redis Connection (SSL)"),
(r"(?i)(sqlite:///[\w/.-]+\.db)", "SQLite Connection"),
(r"(?i)(sqlserver://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "SQL Server Connection"),
(r"(?i)(oracle://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "Oracle Connection"),
(r"(?i)(db2://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "DB2 Connection"),
(r"(?i)(cockroachdb://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "CockroachDB Connection"),
(r"(?i)(cassandra://[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+@[a-zA-Z0-9.-]+:\d+/\w+)", "Cassandra Connection"),
(r"(?i)(connection[_-]?string['\"]?\s*[:=]\s*['\"]?)([^'\"]+)", "Generic Connection String"),
]
ENV_VAR_PATTERNS = [
(r"os\.environ(?:\[|\.get\()\s*['\"]([A-Z0-9_]+)['\"]", "Environment Variable Access"),
(r"os\.getenv\s*\(\s*['\"]([A-Z0-9_]+)['\"]\s*\)", "Environment Variable Access (getenv)"),
(r"dotenv\.load\(['\"]([^'\")]+)['\"]\)", "Dotenv File Load"),
(r"from dotenv import.*['\"]([^'\")]+)['\"]", "Dotenv Import"),
(r"(?i)(export\s+[A-Z0-9_]+=)", "Shell Export Statement"),
(r"(?i)([A-Z0-9_]+=)", "Environment Variable Assignment"),
]
IP_ADDRESS_PATTERNS = [
(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b", "IPv4 Address"),
(r"(?i)(host['\"]?\s*[:=]\s*['\"]?)((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))", "Host IP Address"),
]
AUTHORIZATION_PATTERNS = [
(r"(?i)(Bearer\s+)([a-zA-Z0-9_-]{20,})", "Bearer Token"),
(r"(?i)(Basic\s+)([a-zA-Z0-9+/=]{20,})", "Basic Auth Header"),
(r"(?i)(Authorization:\s*)(Bearer\s+[a-zA-Z0-9_-]{20,})", "Authorization Header"),
(r"(?i)(auth[_-]?header['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "Auth Header Value"),
(r"(?i)(x-api-key['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "X-API-Key Header"),
(r"(?i)(api[_-]?token['\"]?\s*[:=]\s*['\"]?)([a-zA-Z0-9_-]{20,})", "API Token"),
]
def __init__(self):
self._compiled_patterns: Dict[str, List[Tuple[Pattern, str]]] = {}
def _compile_patterns(self, patterns: List[Tuple[str, str]]) -> List[Tuple[Pattern, str]]:
compiled = []
for pattern, name in patterns:
try:
compiled.append((re.compile(pattern, re.IGNORECASE), name))
except re.error:
continue
return compiled
def get_api_key_patterns(self) -> List[Tuple[Pattern, str]]:
if "api_keys" not in self._compiled_patterns:
self._compiled_patterns["api_keys"] = self._compile_patterns(self.API_KEY_PATTERNS)
return self._compiled_patterns["api_keys"]
def get_pii_patterns(self) -> List[Tuple[Pattern, str]]:
if "pii" not in self._compiled_patterns:
self._compiled_patterns["pii"] = self._compile_patterns(self.PII_PATTERNS)
return self._compiled_patterns["pii"]
def get_database_patterns(self) -> List[Tuple[Pattern, str]]:
if "database" not in self._compiled_patterns:
self._compiled_patterns["database"] = self._compile_patterns(self.DATABASE_PATTERNS)
return self._compiled_patterns["database"]
def get_env_var_patterns(self) -> List[Tuple[Pattern, str]]:
if "env_var" not in self._compiled_patterns:
self._compiled_patterns["env_var"] = self._compile_patterns(self.ENV_VAR_PATTERNS)
return self._compiled_patterns["env_var"]
def get_ip_patterns(self) -> List[Tuple[Pattern, str]]:
if "ip" not in self._compiled_patterns:
self._compiled_patterns["ip"] = self._compile_patterns(self.IP_ADDRESS_PATTERNS)
return self._compiled_patterns["ip"]
def get_authorization_patterns(self) -> List[Tuple[Pattern, str]]:
if "authorization" not in self._compiled_patterns:
self._compiled_patterns["authorization"] = self._compile_patterns(self.AUTHORIZATION_PATTERNS)
return self._compiled_patterns["authorization"]
def get_all_patterns(self) -> Dict[str, List[Tuple[Pattern, str]]]:
return {
"api_keys": self.get_api_key_patterns(),
"pii": self.get_pii_patterns(),
"database": self.get_database_patterns(),
"env_var": self.get_env_var_patterns(),
"ip": self.get_ip_patterns(),
"authorization": self.get_authorization_patterns(),
}