diff --git a/i18n_guardian/parsers/simple.py b/i18n_guardian/parsers/simple.py new file mode 100644 index 0000000..9e8c98d --- /dev/null +++ b/i18n_guardian/parsers/simple.py @@ -0,0 +1,134 @@ +"""Simple regex-based parser for string literals.""" + +import re +from pathlib import Path +from typing import List, Optional + +from i18n_guardian.parsers.base import Parser, StringLiteral + + +class SimpleParser(Parser): + """Simple regex-based parser for string literals.""" + + def __init__(self, name: str, extensions: List[str]) -> None: + self._name = name + self._extensions = extensions + self._string_pattern = re.compile( + r'(?:(?P\'(?:[^\'\\]|\\.)*\')|(?P"(?:[^"\\]|\\.)*")|(?P`(?:[^`\\]|\\.)*`))', + re.MULTILINE, + ) + + @property + def name(self) -> str: + return self._name + + @property + def extensions(self) -> List[str]: + return self._extensions + + def parse(self, file_path: Path) -> List[StringLiteral]: + """Parse file and extract string literals.""" + literals: List[StringLiteral] = [] + + try: + content = file_path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + return literals + + lines = content.split("\n") + + for line_num, line in enumerate(lines, start=1): + for match in self._string_pattern.finditer(line): + quote_type = "single" if match.group("single") else "double" if match.group("double") else "backtick" + raw_value = match.group(0) + value = self._extract_value(raw_value, quote_type) + is_template = quote_type == "backtick" + + column = match.start() + 1 + + literals.append( + StringLiteral( + value=value, + file_path=file_path, + line=line_num, + column=column, + is_template=is_template, + ) + ) + + return literals + + def _extract_value(self, raw: str, quote_type: str) -> str: + """Extract the actual string value without quotes.""" + if len(raw) < 2: + return raw + + inner = raw[1:-1] + + if quote_type == "backtick": + inner = inner.replace("\\`", "`") + + escapes = { + "\\\\": "\\", + "\\n": "\n", + "\\t": "\t", + "\\r": "\r", + "\\'": "'", + '\\"': '"', + } + + for old, new in escapes.items(): + inner = inner.replace(old, new) + + return inner + + +class PythonParser(SimpleParser): + """Parser for Python files.""" + + def __init__(self) -> None: + super().__init__("python", [".py"]) + + +class JavaScriptParser(SimpleParser): + """Parser for JavaScript files.""" + + def __init__(self) -> None: + super().__init__("javascript", [".js", ".mjs"]) + + +class TypeScriptParser(SimpleParser): + """Parser for TypeScript files.""" + + def __init__(self) -> None: + super().__init__("typescript", [".ts", ".tsx"]) + + +class ParserRegistry: + """Registry for parsers by file extension.""" + + def __init__(self) -> None: + self._parsers: dict = {} + + def register(self, parser: Parser) -> None: + """Register a parser.""" + for ext in parser.extensions: + self._parsers[ext] = parser + + def get(self, file_path: Path) -> Optional[Parser]: + """Get parser for file extension.""" + ext = file_path.suffix.lower() + return self._parsers.get(ext) + + def list_extensions(self) -> List[str]: + """List all supported extensions.""" + return list(self._parsers.keys()) + + +def get_default_registry() -> ParserRegistry: + """Get default parser registry.""" + registry = ParserRegistry() + registry.register(PythonParser()) + registry.register(JavaScriptParser()) + registry.register(TypeScriptParser()) + return registry