Add scanners module
This commit is contained in:
191
i18n_guardian/scanners/string_scanner.py
Normal file
191
i18n_guardian/scanners/string_scanner.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""String scanner for detecting hardcoded strings."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from i18n_guardian.parsers.base import StringLiteral
|
||||
from i18n_guardian.parsers.simple import get_default_registry
|
||||
|
||||
|
||||
@dataclass
|
||||
class Violation:
|
||||
"""Represents a hardcoded string violation."""
|
||||
|
||||
literal: StringLiteral
|
||||
suggested_key: str
|
||||
reason: str = "Hardcoded string detected"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
"""Result of a scan operation."""
|
||||
|
||||
violations: List[Violation] = field(default_factory=list)
|
||||
files_scanned: int = 0
|
||||
strings_found: int = 0
|
||||
violations_count: int = 0
|
||||
|
||||
|
||||
class StringScanner:
|
||||
"""Scanner for hardcoded strings in code."""
|
||||
|
||||
URL_PATTERN = re.compile(
|
||||
r"^https?://",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
EMAIL_PATTERN = re.compile(
|
||||
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
|
||||
)
|
||||
|
||||
REGEX_PATTERN = re.compile(
|
||||
r"^[/^].*[/$]$",
|
||||
)
|
||||
|
||||
I18N_CALL_PATTERN = re.compile(
|
||||
r"^(\w+)\s*\(",
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
min_length: int = 3,
|
||||
exclude_patterns: Optional[List[str]] = None,
|
||||
i18n_functions: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
self.min_length = min_length
|
||||
self.exclude_patterns = exclude_patterns or []
|
||||
self.i18n_functions = set(i18n_functions or [])
|
||||
self.registry = get_default_registry()
|
||||
|
||||
def scan(self, path: Path, i18n_functions: Optional[Set[str]] = None) -> ScanResult:
|
||||
"""Scan a directory for hardcoded strings."""
|
||||
result = ScanResult()
|
||||
functions_to_exclude = self.i18n_functions.copy()
|
||||
if i18n_functions:
|
||||
functions_to_exclude.update(i18n_functions)
|
||||
|
||||
for file_path in self._iter_files(path):
|
||||
if not self._should_include(file_path):
|
||||
continue
|
||||
|
||||
literals = self._extract_literals(file_path)
|
||||
result.files_scanned += 1
|
||||
|
||||
for literal in literals:
|
||||
result.strings_found += 1
|
||||
if self._is_violation(literal, functions_to_exclude):
|
||||
suggested_key = self._generate_key(literal)
|
||||
result.violations.append(
|
||||
Violation(
|
||||
literal=literal,
|
||||
suggested_key=suggested_key,
|
||||
)
|
||||
)
|
||||
|
||||
result.violations_count = len(result.violations)
|
||||
return result
|
||||
|
||||
def _iter_files(self, path: Path) -> List[Path]:
|
||||
"""Iterate over files in path."""
|
||||
files = []
|
||||
if path.is_file():
|
||||
return [path]
|
||||
|
||||
for ext in self.registry.list_extensions():
|
||||
files.extend(path.rglob(f"*{ext}"))
|
||||
|
||||
return files
|
||||
|
||||
def _should_include(self, file_path: Path) -> bool:
|
||||
"""Check if file should be included in scan."""
|
||||
for pattern in self.exclude_patterns:
|
||||
if file_path.match(pattern):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _extract_literals(self, file_path: Path) -> List[StringLiteral]:
|
||||
"""Extract string literals from a file."""
|
||||
parser = self.registry.get(file_path)
|
||||
if parser:
|
||||
return parser.parse(file_path)
|
||||
return []
|
||||
|
||||
def _is_violation(self, literal: StringLiteral, exclude_functions: Set[str]) -> bool:
|
||||
"""Check if a string literal is a violation."""
|
||||
value = literal.value
|
||||
|
||||
if len(value) < self.min_length:
|
||||
return False
|
||||
|
||||
if self._is_url(value):
|
||||
return False
|
||||
|
||||
if self._is_email(value):
|
||||
return False
|
||||
|
||||
if self._is_regex(value):
|
||||
return False
|
||||
|
||||
if self._is_i18n_call(value, exclude_functions):
|
||||
return False
|
||||
|
||||
if self._is_numeric(value):
|
||||
return False
|
||||
|
||||
if self._is_single_char(value):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _is_url(self, value: str) -> bool:
|
||||
"""Check if value is a URL."""
|
||||
return bool(self.URL_PATTERN.match(value))
|
||||
|
||||
def _is_email(self, value: str) -> bool:
|
||||
"""Check if value is an email."""
|
||||
return bool(self.EMAIL_PATTERN.match(value))
|
||||
|
||||
def _is_regex(self, value: str) -> bool:
|
||||
"""Check if value looks like a regex pattern."""
|
||||
return bool(self.REGEX_PATTERN.match(value))
|
||||
|
||||
def _is_i18n_call(self, value: str, exclude_functions: Set[str]) -> bool:
|
||||
"""Check if value looks like an i18n function call."""
|
||||
match = self.I18N_CALL_PATTERN.match(value)
|
||||
if match:
|
||||
func_name = match.group(1)
|
||||
if func_name in exclude_functions:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_numeric(self, value: str) -> bool:
|
||||
"""Check if value is numeric."""
|
||||
try:
|
||||
float(value)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _is_single_char(self, value: str) -> bool:
|
||||
"""Check if value is a single character."""
|
||||
return len(value) == 1
|
||||
|
||||
def _generate_key(self, literal: StringLiteral) -> str:
|
||||
"""Generate a suggested translation key."""
|
||||
value = literal.value.lower()
|
||||
key = re.sub(r"[^a-z0-9]+", "_", value)
|
||||
key = key.strip("_")
|
||||
|
||||
file_path = literal.file_path
|
||||
relative_path = file_path.relative_to(file_path.parent)
|
||||
|
||||
parts = relative_path.stem.split("/")
|
||||
path_prefix = "_".join(parts)
|
||||
|
||||
suggested = f"{path_prefix}_{key}"
|
||||
if len(suggested) > 100:
|
||||
suggested = suggested[:100].rstrip("_")
|
||||
|
||||
return suggested
|
||||
Reference in New Issue
Block a user