Files
i18n-key-sync/i18n_key_sync/extract.py
7000pctAUTO 38bb42f8d5
Some checks failed
CI / test (push) Has been cancelled
Add source files
2026-02-02 03:56:42 +00:00

133 lines
3.9 KiB
Python

"""Key extraction from source code using regex patterns."""
import re
from pathlib import Path
from typing import List, Set, Tuple
class ExtractKeys:
"""Extract i18n keys from source code files."""
PATTERNS = {
"_": r'\b_\([\'"]([\'")]+)[\'"]\)',
"t": r'\bt\([\'"]([\'")]+)[\'"]\)',
"i18n.t": r'i18n\.t\([\'"]([\'")]+)[\'"]\)',
"gettext": r'gettext\([\'"]([\'")]+)[\'"]\)',
"ngettext": r'ngettext\([\'"]([\'")]+)[\'"],\s*[^\'"]+',
}
JS_PATTERNS = {
"template": r'`([^`]*\$\{[^}]+\}[^`]*)`',
"jsx": r'\{_\([\'"]([\'")]+)[\'"]\)\}',
}
def __init__(self, patterns: List[str] | None = None):
"""Initialize with custom patterns.
Args:
patterns: List of pattern names to use. If None, uses default patterns.
"""
self.patterns = patterns or list(self.PATTERNS.keys())
self._compiled_patterns = self._compile_patterns()
def _compile_patterns(self) -> dict[str, re.Pattern]:
"""Compile regex patterns for faster matching."""
compiled = {}
for pattern_name in self.patterns:
if pattern_name in self.PATTERNS:
compiled[pattern_name] = re.compile(self.PATTERNS[pattern_name])
elif pattern_name in self.JS_PATTERNS:
compiled[pattern_name] = re.compile(self.JS_PATTERNS[pattern_name])
return compiled
def extract_from_file(self, file_path: Path) -> Set[str]:
"""Extract keys from a single file.
Args:
file_path: Path to the source file.
Returns:
Set of extracted i18n keys.
"""
keys = set()
content = file_path.read_text(encoding="utf-8", errors="ignore")
for pattern_name, compiled_pattern in self._compiled_patterns.items():
for match in compiled_pattern.finditer(content):
if pattern_name in self.JS_PATTERNS:
if pattern_name == "template":
keys.update(self._extract_from_template(match.group(1)))
elif pattern_name == "jsx":
keys.add(match.group(1))
else:
keys.add(match.group(1))
return keys
def _extract_from_template(self, template: str) -> Set[str]:
"""Extract keys from template literals.
Args:
template: Template literal content.
Returns:
Set of extracted keys.
"""
keys = set()
pattern = re.compile(r'\$\{([^}]+)\')
for match in pattern.finditer(template):
expr = match.group(1)
key_match = re.search(r'[\'"]([^\'"]+)[\'"]', expr)
if key_match:
keys.add(key_match.group(1))
return keys
def extract_from_files(self, file_paths: List[Path]) -> Set[str]:
"""Extract keys from multiple files.
Args:
file_paths: List of file paths to scan.
Returns:
Set of all extracted keys.
"""
keys = set()
for file_path in file_paths:
keys.update(self.extract_from_file(file_path))
return keys
def extract_keys(
paths: Tuple[str, ...],
patterns: List[str],
file_types: List[str],
) -> Set[str]:
"""Extract i18n keys from source files.
Args:
paths: Tuple of directory or file paths to scan.
patterns: List of i18n function patterns to look for.
file_types: List of file extensions to include.
Returns:
Set of unique i18n keys found.
"""
extractor = ExtractKeys(patterns)
file_paths = []
for path_str in paths:
path = Path(path_str)
if path.is_file():
if path.suffix.lstrip(".") in file_types:
file_paths.append(path)
elif path.is_dir():
for ext in file_types:
file_paths.extend(path.rglob(f"*.{ext}"))
return extractor.extract_from_files(file_paths)