From 38bb42f8d5c7e31c89a1b2b1efebc2111889faaf Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Mon, 2 Feb 2026 03:56:42 +0000 Subject: [PATCH] Add source files --- i18n_key_sync/extract.py | 132 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 i18n_key_sync/extract.py diff --git a/i18n_key_sync/extract.py b/i18n_key_sync/extract.py new file mode 100644 index 0000000..a36d823 --- /dev/null +++ b/i18n_key_sync/extract.py @@ -0,0 +1,132 @@ +"""Key extraction from source code using regex patterns.""" + +import re +from pathlib import Path +from typing import List, Set, Tuple + + +class ExtractKeys: + """Extract i18n keys from source code files.""" + + PATTERNS = { + "_": r'\b_\([\'"]([\'")]+)[\'"]\)', + "t": r'\bt\([\'"]([\'")]+)[\'"]\)', + "i18n.t": r'i18n\.t\([\'"]([\'")]+)[\'"]\)', + "gettext": r'gettext\([\'"]([\'")]+)[\'"]\)', + "ngettext": r'ngettext\([\'"]([\'")]+)[\'"],\s*[^\'"]+', + } + + JS_PATTERNS = { + "template": r'`([^`]*\$\{[^}]+\}[^`]*)`', + "jsx": r'\{_\([\'"]([\'")]+)[\'"]\)\}', + } + + def __init__(self, patterns: List[str] | None = None): + """Initialize with custom patterns. + + Args: + patterns: List of pattern names to use. If None, uses default patterns. + + """ + self.patterns = patterns or list(self.PATTERNS.keys()) + self._compiled_patterns = self._compile_patterns() + + def _compile_patterns(self) -> dict[str, re.Pattern]: + """Compile regex patterns for faster matching.""" + compiled = {} + for pattern_name in self.patterns: + if pattern_name in self.PATTERNS: + compiled[pattern_name] = re.compile(self.PATTERNS[pattern_name]) + elif pattern_name in self.JS_PATTERNS: + compiled[pattern_name] = re.compile(self.JS_PATTERNS[pattern_name]) + return compiled + + def extract_from_file(self, file_path: Path) -> Set[str]: + """Extract keys from a single file. + + Args: + file_path: Path to the source file. + + Returns: + Set of extracted i18n keys. + + """ + keys = set() + content = file_path.read_text(encoding="utf-8", errors="ignore") + + for pattern_name, compiled_pattern in self._compiled_patterns.items(): + for match in compiled_pattern.finditer(content): + if pattern_name in self.JS_PATTERNS: + if pattern_name == "template": + keys.update(self._extract_from_template(match.group(1))) + elif pattern_name == "jsx": + keys.add(match.group(1)) + else: + keys.add(match.group(1)) + + return keys + + def _extract_from_template(self, template: str) -> Set[str]: + """Extract keys from template literals. + + Args: + template: Template literal content. + + Returns: + Set of extracted keys. + + """ + keys = set() + pattern = re.compile(r'\$\{([^}]+)\') + for match in pattern.finditer(template): + expr = match.group(1) + key_match = re.search(r'[\'"]([^\'"]+)[\'"]', expr) + if key_match: + keys.add(key_match.group(1)) + return keys + + def extract_from_files(self, file_paths: List[Path]) -> Set[str]: + """Extract keys from multiple files. + + Args: + file_paths: List of file paths to scan. + + Returns: + Set of all extracted keys. + + """ + keys = set() + for file_path in file_paths: + keys.update(self.extract_from_file(file_path)) + return keys + + +def extract_keys( + paths: Tuple[str, ...], + patterns: List[str], + file_types: List[str], +) -> Set[str]: + """Extract i18n keys from source files. + + Args: + paths: Tuple of directory or file paths to scan. + patterns: List of i18n function patterns to look for. + file_types: List of file extensions to include. + + Returns: + Set of unique i18n keys found. + + """ + extractor = ExtractKeys(patterns) + file_paths = [] + + for path_str in paths: + path = Path(path_str) + if path.is_file(): + if path.suffix.lstrip(".") in file_types: + file_paths.append(path) + elif path.is_dir(): + for ext in file_types: + file_paths.extend(path.rglob(f"*.{ext}")) + + return extractor.extract_from_files(file_paths)