"""Key extraction from source code using regex patterns.""" import re from pathlib import Path from typing import List, Set, Tuple class ExtractKeys: """Extract i18n keys from source code files.""" PATTERNS = { "_": r'\b_\([\'"]([\'")]+)[\'"]\)', "t": r'\bt\([\'"]([\'")]+)[\'"]\)', "i18n.t": r'i18n\.t\([\'"]([\'")]+)[\'"]\)', "gettext": r'gettext\([\'"]([\'")]+)[\'"]\)', "ngettext": r'ngettext\([\'"]([\'")]+)[\'"],\s*[^\'"]+', } JS_PATTERNS = { "template": r'`([^`]*\$\{[^}]+\}[^`]*)`', "jsx": r'\{_\([\'"]([\'")]+)[\'"]\)\}', } def __init__(self, patterns: List[str] | None = None): """Initialize with custom patterns. Args: patterns: List of pattern names to use. If None, uses default patterns. """ self.patterns = patterns or list(self.PATTERNS.keys()) self._compiled_patterns = self._compile_patterns() def _compile_patterns(self) -> dict[str, re.Pattern]: """Compile regex patterns for faster matching.""" compiled = {} for pattern_name in self.patterns: if pattern_name in self.PATTERNS: compiled[pattern_name] = re.compile(self.PATTERNS[pattern_name]) elif pattern_name in self.JS_PATTERNS: compiled[pattern_name] = re.compile(self.JS_PATTERNS[pattern_name]) return compiled def extract_from_file(self, file_path: Path) -> Set[str]: """Extract keys from a single file. Args: file_path: Path to the source file. Returns: Set of extracted i18n keys. """ keys = set() content = file_path.read_text(encoding="utf-8", errors="ignore") for pattern_name, compiled_pattern in self._compiled_patterns.items(): for match in compiled_pattern.finditer(content): if pattern_name in self.JS_PATTERNS: if pattern_name == "template": keys.update(self._extract_from_template(match.group(1))) elif pattern_name == "jsx": keys.add(match.group(1)) else: keys.add(match.group(1)) return keys def _extract_from_template(self, template: str) -> Set[str]: """Extract keys from template literals. Args: template: Template literal content. Returns: Set of extracted keys. """ keys = set() pattern = re.compile(r'\$\{([^}]+)\') for match in pattern.finditer(template): expr = match.group(1) key_match = re.search(r'[\'"]([^\'"]+)[\'"]', expr) if key_match: keys.add(key_match.group(1)) return keys def extract_from_files(self, file_paths: List[Path]) -> Set[str]: """Extract keys from multiple files. Args: file_paths: List of file paths to scan. Returns: Set of all extracted keys. """ keys = set() for file_path in file_paths: keys.update(self.extract_from_file(file_path)) return keys def extract_keys( paths: Tuple[str, ...], patterns: List[str], file_types: List[str], ) -> Set[str]: """Extract i18n keys from source files. Args: paths: Tuple of directory or file paths to scan. patterns: List of i18n function patterns to look for. file_types: List of file extensions to include. Returns: Set of unique i18n keys found. """ extractor = ExtractKeys(patterns) file_paths = [] for path_str in paths: path = Path(path_str) if path.is_file(): if path.suffix.lstrip(".") in file_types: file_paths.append(path) elif path.is_dir(): for ext in file_types: file_paths.extend(path.rglob(f"*.{ext}")) return extractor.extract_from_files(file_paths)