This commit is contained in:
132
i18n_key_sync/extract.py
Normal file
132
i18n_key_sync/extract.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""Key extraction from source code using regex patterns."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Tuple
|
||||
|
||||
|
||||
class ExtractKeys:
|
||||
"""Extract i18n keys from source code files."""
|
||||
|
||||
PATTERNS = {
|
||||
"_": r'\b_\([\'"]([\'")]+)[\'"]\)',
|
||||
"t": r'\bt\([\'"]([\'")]+)[\'"]\)',
|
||||
"i18n.t": r'i18n\.t\([\'"]([\'")]+)[\'"]\)',
|
||||
"gettext": r'gettext\([\'"]([\'")]+)[\'"]\)',
|
||||
"ngettext": r'ngettext\([\'"]([\'")]+)[\'"],\s*[^\'"]+',
|
||||
}
|
||||
|
||||
JS_PATTERNS = {
|
||||
"template": r'`([^`]*\$\{[^}]+\}[^`]*)`',
|
||||
"jsx": r'\{_\([\'"]([\'")]+)[\'"]\)\}',
|
||||
}
|
||||
|
||||
def __init__(self, patterns: List[str] | None = None):
|
||||
"""Initialize with custom patterns.
|
||||
|
||||
Args:
|
||||
patterns: List of pattern names to use. If None, uses default patterns.
|
||||
|
||||
"""
|
||||
self.patterns = patterns or list(self.PATTERNS.keys())
|
||||
self._compiled_patterns = self._compile_patterns()
|
||||
|
||||
def _compile_patterns(self) -> dict[str, re.Pattern]:
|
||||
"""Compile regex patterns for faster matching."""
|
||||
compiled = {}
|
||||
for pattern_name in self.patterns:
|
||||
if pattern_name in self.PATTERNS:
|
||||
compiled[pattern_name] = re.compile(self.PATTERNS[pattern_name])
|
||||
elif pattern_name in self.JS_PATTERNS:
|
||||
compiled[pattern_name] = re.compile(self.JS_PATTERNS[pattern_name])
|
||||
return compiled
|
||||
|
||||
def extract_from_file(self, file_path: Path) -> Set[str]:
|
||||
"""Extract keys from a single file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the source file.
|
||||
|
||||
Returns:
|
||||
Set of extracted i18n keys.
|
||||
|
||||
"""
|
||||
keys = set()
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
for pattern_name, compiled_pattern in self._compiled_patterns.items():
|
||||
for match in compiled_pattern.finditer(content):
|
||||
if pattern_name in self.JS_PATTERNS:
|
||||
if pattern_name == "template":
|
||||
keys.update(self._extract_from_template(match.group(1)))
|
||||
elif pattern_name == "jsx":
|
||||
keys.add(match.group(1))
|
||||
else:
|
||||
keys.add(match.group(1))
|
||||
|
||||
return keys
|
||||
|
||||
def _extract_from_template(self, template: str) -> Set[str]:
|
||||
"""Extract keys from template literals.
|
||||
|
||||
Args:
|
||||
template: Template literal content.
|
||||
|
||||
Returns:
|
||||
Set of extracted keys.
|
||||
|
||||
"""
|
||||
keys = set()
|
||||
pattern = re.compile(r'\$\{([^}]+)\')
|
||||
for match in pattern.finditer(template):
|
||||
expr = match.group(1)
|
||||
key_match = re.search(r'[\'"]([^\'"]+)[\'"]', expr)
|
||||
if key_match:
|
||||
keys.add(key_match.group(1))
|
||||
return keys
|
||||
|
||||
def extract_from_files(self, file_paths: List[Path]) -> Set[str]:
|
||||
"""Extract keys from multiple files.
|
||||
|
||||
Args:
|
||||
file_paths: List of file paths to scan.
|
||||
|
||||
Returns:
|
||||
Set of all extracted keys.
|
||||
|
||||
"""
|
||||
keys = set()
|
||||
for file_path in file_paths:
|
||||
keys.update(self.extract_from_file(file_path))
|
||||
return keys
|
||||
|
||||
|
||||
def extract_keys(
|
||||
paths: Tuple[str, ...],
|
||||
patterns: List[str],
|
||||
file_types: List[str],
|
||||
) -> Set[str]:
|
||||
"""Extract i18n keys from source files.
|
||||
|
||||
Args:
|
||||
paths: Tuple of directory or file paths to scan.
|
||||
patterns: List of i18n function patterns to look for.
|
||||
file_types: List of file extensions to include.
|
||||
|
||||
Returns:
|
||||
Set of unique i18n keys found.
|
||||
|
||||
"""
|
||||
extractor = ExtractKeys(patterns)
|
||||
file_paths = []
|
||||
|
||||
for path_str in paths:
|
||||
path = Path(path_str)
|
||||
if path.is_file():
|
||||
if path.suffix.lstrip(".") in file_types:
|
||||
file_paths.append(path)
|
||||
elif path.is_dir():
|
||||
for ext in file_types:
|
||||
file_paths.extend(path.rglob(f"*.{ext}"))
|
||||
|
||||
return extractor.extract_from_files(file_paths)
|
||||
Reference in New Issue
Block a user