Add scanners module
Some checks failed
CI / test (3.10) (push) Failing after 15s
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 17:21:58 +00:00
parent 8bc7618953
commit 92ddc1666c

View File

@@ -0,0 +1,191 @@
"""String scanner for detecting hardcoded strings."""
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Set
from i18n_guardian.parsers.base import StringLiteral
from i18n_guardian.parsers.simple import get_default_registry
@dataclass
class Violation:
"""Represents a hardcoded string violation."""
literal: StringLiteral
suggested_key: str
reason: str = "Hardcoded string detected"
@dataclass
class ScanResult:
"""Result of a scan operation."""
violations: List[Violation] = field(default_factory=list)
files_scanned: int = 0
strings_found: int = 0
violations_count: int = 0
class StringScanner:
"""Scanner for hardcoded strings in code."""
URL_PATTERN = re.compile(
r"^https?://",
re.IGNORECASE,
)
EMAIL_PATTERN = re.compile(
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
)
REGEX_PATTERN = re.compile(
r"^[/^].*[/$]$",
)
I18N_CALL_PATTERN = re.compile(
r"^(\w+)\s*\(",
)
def __init__(
self,
min_length: int = 3,
exclude_patterns: Optional[List[str]] = None,
i18n_functions: Optional[List[str]] = None,
) -> None:
self.min_length = min_length
self.exclude_patterns = exclude_patterns or []
self.i18n_functions = set(i18n_functions or [])
self.registry = get_default_registry()
def scan(self, path: Path, i18n_functions: Optional[Set[str]] = None) -> ScanResult:
"""Scan a directory for hardcoded strings."""
result = ScanResult()
functions_to_exclude = self.i18n_functions.copy()
if i18n_functions:
functions_to_exclude.update(i18n_functions)
for file_path in self._iter_files(path):
if not self._should_include(file_path):
continue
literals = self._extract_literals(file_path)
result.files_scanned += 1
for literal in literals:
result.strings_found += 1
if self._is_violation(literal, functions_to_exclude):
suggested_key = self._generate_key(literal)
result.violations.append(
Violation(
literal=literal,
suggested_key=suggested_key,
)
)
result.violations_count = len(result.violations)
return result
def _iter_files(self, path: Path) -> List[Path]:
"""Iterate over files in path."""
files = []
if path.is_file():
return [path]
for ext in self.registry.list_extensions():
files.extend(path.rglob(f"*{ext}"))
return files
def _should_include(self, file_path: Path) -> bool:
"""Check if file should be included in scan."""
for pattern in self.exclude_patterns:
if file_path.match(pattern):
return False
return True
def _extract_literals(self, file_path: Path) -> List[StringLiteral]:
"""Extract string literals from a file."""
parser = self.registry.get(file_path)
if parser:
return parser.parse(file_path)
return []
def _is_violation(self, literal: StringLiteral, exclude_functions: Set[str]) -> bool:
"""Check if a string literal is a violation."""
value = literal.value
if len(value) < self.min_length:
return False
if self._is_url(value):
return False
if self._is_email(value):
return False
if self._is_regex(value):
return False
if self._is_i18n_call(value, exclude_functions):
return False
if self._is_numeric(value):
return False
if self._is_single_char(value):
return False
return True
def _is_url(self, value: str) -> bool:
"""Check if value is a URL."""
return bool(self.URL_PATTERN.match(value))
def _is_email(self, value: str) -> bool:
"""Check if value is an email."""
return bool(self.EMAIL_PATTERN.match(value))
def _is_regex(self, value: str) -> bool:
"""Check if value looks like a regex pattern."""
return bool(self.REGEX_PATTERN.match(value))
def _is_i18n_call(self, value: str, exclude_functions: Set[str]) -> bool:
"""Check if value looks like an i18n function call."""
match = self.I18N_CALL_PATTERN.match(value)
if match:
func_name = match.group(1)
if func_name in exclude_functions:
return True
return False
def _is_numeric(self, value: str) -> bool:
"""Check if value is numeric."""
try:
float(value)
return True
except ValueError:
return False
def _is_single_char(self, value: str) -> bool:
"""Check if value is a single character."""
return len(value) == 1
def _generate_key(self, literal: StringLiteral) -> str:
"""Generate a suggested translation key."""
value = literal.value.lower()
key = re.sub(r"[^a-z0-9]+", "_", value)
key = key.strip("_")
file_path = literal.file_path
relative_path = file_path.relative_to(file_path.parent)
parts = relative_path.stem.split("/")
path_prefix = "_".join(parts)
suggested = f"{path_prefix}_{key}"
if len(suggested) > 100:
suggested = suggested[:100].rstrip("_")
return suggested