133 lines
3.9 KiB
Python
133 lines
3.9 KiB
Python
"""Key extraction from source code using regex patterns."""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import List, Set, Tuple
|
|
|
|
|
|
class ExtractKeys:
|
|
"""Extract i18n keys from source code files."""
|
|
|
|
PATTERNS = {
|
|
"_": r'\b_\([\'"]([\'")]+)[\'"]\)',
|
|
"t": r'\bt\([\'"]([\'")]+)[\'"]\)',
|
|
"i18n.t": r'i18n\.t\([\'"]([\'")]+)[\'"]\)',
|
|
"gettext": r'gettext\([\'"]([\'")]+)[\'"]\)',
|
|
"ngettext": r'ngettext\([\'"]([\'")]+)[\'"],\s*[^\'"]+',
|
|
}
|
|
|
|
JS_PATTERNS = {
|
|
"template": r'`([^`]*\$\{[^}]+\}[^`]*)`',
|
|
"jsx": r'\{_\([\'"]([\'")]+)[\'"]\)\}',
|
|
}
|
|
|
|
def __init__(self, patterns: List[str] | None = None):
|
|
"""Initialize with custom patterns.
|
|
|
|
Args:
|
|
patterns: List of pattern names to use. If None, uses default patterns.
|
|
|
|
"""
|
|
self.patterns = patterns or list(self.PATTERNS.keys())
|
|
self._compiled_patterns = self._compile_patterns()
|
|
|
|
def _compile_patterns(self) -> dict[str, re.Pattern]:
|
|
"""Compile regex patterns for faster matching."""
|
|
compiled = {}
|
|
for pattern_name in self.patterns:
|
|
if pattern_name in self.PATTERNS:
|
|
compiled[pattern_name] = re.compile(self.PATTERNS[pattern_name])
|
|
elif pattern_name in self.JS_PATTERNS:
|
|
compiled[pattern_name] = re.compile(self.JS_PATTERNS[pattern_name])
|
|
return compiled
|
|
|
|
def extract_from_file(self, file_path: Path) -> Set[str]:
|
|
"""Extract keys from a single file.
|
|
|
|
Args:
|
|
file_path: Path to the source file.
|
|
|
|
Returns:
|
|
Set of extracted i18n keys.
|
|
|
|
"""
|
|
keys = set()
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
for pattern_name, compiled_pattern in self._compiled_patterns.items():
|
|
for match in compiled_pattern.finditer(content):
|
|
if pattern_name in self.JS_PATTERNS:
|
|
if pattern_name == "template":
|
|
keys.update(self._extract_from_template(match.group(1)))
|
|
elif pattern_name == "jsx":
|
|
keys.add(match.group(1))
|
|
else:
|
|
keys.add(match.group(1))
|
|
|
|
return keys
|
|
|
|
def _extract_from_template(self, template: str) -> Set[str]:
|
|
"""Extract keys from template literals.
|
|
|
|
Args:
|
|
template: Template literal content.
|
|
|
|
Returns:
|
|
Set of extracted keys.
|
|
|
|
"""
|
|
keys = set()
|
|
pattern = re.compile(r'\$\{([^}]+)\')
|
|
for match in pattern.finditer(template):
|
|
expr = match.group(1)
|
|
key_match = re.search(r'[\'"]([^\'"]+)[\'"]', expr)
|
|
if key_match:
|
|
keys.add(key_match.group(1))
|
|
return keys
|
|
|
|
def extract_from_files(self, file_paths: List[Path]) -> Set[str]:
|
|
"""Extract keys from multiple files.
|
|
|
|
Args:
|
|
file_paths: List of file paths to scan.
|
|
|
|
Returns:
|
|
Set of all extracted keys.
|
|
|
|
"""
|
|
keys = set()
|
|
for file_path in file_paths:
|
|
keys.update(self.extract_from_file(file_path))
|
|
return keys
|
|
|
|
|
|
def extract_keys(
|
|
paths: Tuple[str, ...],
|
|
patterns: List[str],
|
|
file_types: List[str],
|
|
) -> Set[str]:
|
|
"""Extract i18n keys from source files.
|
|
|
|
Args:
|
|
paths: Tuple of directory or file paths to scan.
|
|
patterns: List of i18n function patterns to look for.
|
|
file_types: List of file extensions to include.
|
|
|
|
Returns:
|
|
Set of unique i18n keys found.
|
|
|
|
"""
|
|
extractor = ExtractKeys(patterns)
|
|
file_paths = []
|
|
|
|
for path_str in paths:
|
|
path = Path(path_str)
|
|
if path.is_file():
|
|
if path.suffix.lstrip(".") in file_types:
|
|
file_paths.append(path)
|
|
elif path.is_dir():
|
|
for ext in file_types:
|
|
file_paths.extend(path.rglob(f"*.{ext}"))
|
|
|
|
return extractor.extract_from_files(file_paths)
|