"""Simple regex-based parser for string literals.""" import re from pathlib import Path from typing import List, Optional from i18n_guardian.parsers.base import Parser, StringLiteral class SimpleParser(Parser): """Simple regex-based parser for string literals.""" def __init__(self, name: str, extensions: List[str]) -> None: self._name = name self._extensions = extensions self._string_pattern = re.compile( r'(?:(?P\'(?:[^\'\\]|\\.)*\')|(?P"(?:[^"\\]|\\.)*")|(?P`(?:[^`\\]|\\.)*`))', re.MULTILINE, ) @property def name(self) -> str: return self._name @property def extensions(self) -> List[str]: return self._extensions def parse(self, file_path: Path) -> List[StringLiteral]: """Parse file and extract string literals.""" literals: List[StringLiteral] = [] try: content = file_path.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError): return literals lines = content.split("\n") for line_num, line in enumerate(lines, start=1): for match in self._string_pattern.finditer(line): quote_type = "single" if match.group("single") else "double" if match.group("double") else "backtick" raw_value = match.group(0) value = self._extract_value(raw_value, quote_type) is_template = quote_type == "backtick" column = match.start() + 1 literals.append( StringLiteral( value=value, file_path=file_path, line=line_num, column=column, is_template=is_template, ) ) return literals def _extract_value(self, raw: str, quote_type: str) -> str: """Extract the actual string value without quotes.""" if len(raw) < 2: return raw inner = raw[1:-1] if quote_type == "backtick": inner = inner.replace("\\`", "`") escapes = { "\\\\": "\\", "\\n": "\n", "\\t": "\t", "\\r": "\r", "\\'": "'", '\\"': '"', } for old, new in escapes.items(): inner = inner.replace(old, new) return inner class PythonParser(SimpleParser): """Parser for Python files.""" def __init__(self) -> None: super().__init__("python", [".py"]) class JavaScriptParser(SimpleParser): """Parser for JavaScript files.""" def __init__(self) -> None: super().__init__("javascript", [".js", ".mjs"]) class TypeScriptParser(SimpleParser): """Parser for TypeScript files.""" def __init__(self) -> None: super().__init__("typescript", [".ts", ".tsx"]) class ParserRegistry: """Registry for parsers by file extension.""" def __init__(self) -> None: self._parsers: dict = {} def register(self, parser: Parser) -> None: """Register a parser.""" for ext in parser.extensions: self._parsers[ext] = parser def get(self, file_path: Path) -> Optional[Parser]: """Get parser for file extension.""" ext = file_path.suffix.lower() return self._parsers.get(ext) def list_extensions(self) -> List[str]: """List all supported extensions.""" return list(self._parsers.keys()) def get_default_registry() -> ParserRegistry: """Get default parser registry.""" registry = ParserRegistry() registry.register(PythonParser()) registry.register(JavaScriptParser()) registry.register(TypeScriptParser()) return registry