Files
i18n-guardian/i18n_guardian/parsers/simple.py
7000pctAUTO 1b207b9d71
Some checks failed
CI / test (3.10) (push) Failing after 13s
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled
Add parsers module
2026-02-02 17:21:28 +00:00

135 lines
3.7 KiB
Python

"""Simple regex-based parser for string literals."""
import re
from pathlib import Path
from typing import List, Optional
from i18n_guardian.parsers.base import Parser, StringLiteral
class SimpleParser(Parser):
"""Simple regex-based parser for string literals."""
def __init__(self, name: str, extensions: List[str]) -> None:
self._name = name
self._extensions = extensions
self._string_pattern = re.compile(
r'(?:(?P<single>\'(?:[^\'\\]|\\.)*\')|(?P<double>"(?:[^"\\]|\\.)*")|(?P<backtick>`(?:[^`\\]|\\.)*`))',
re.MULTILINE,
)
@property
def name(self) -> str:
return self._name
@property
def extensions(self) -> List[str]:
return self._extensions
def parse(self, file_path: Path) -> List[StringLiteral]:
"""Parse file and extract string literals."""
literals: List[StringLiteral] = []
try:
content = file_path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
return literals
lines = content.split("\n")
for line_num, line in enumerate(lines, start=1):
for match in self._string_pattern.finditer(line):
quote_type = "single" if match.group("single") else "double" if match.group("double") else "backtick"
raw_value = match.group(0)
value = self._extract_value(raw_value, quote_type)
is_template = quote_type == "backtick"
column = match.start() + 1
literals.append(
StringLiteral(
value=value,
file_path=file_path,
line=line_num,
column=column,
is_template=is_template,
)
)
return literals
def _extract_value(self, raw: str, quote_type: str) -> str:
"""Extract the actual string value without quotes."""
if len(raw) < 2:
return raw
inner = raw[1:-1]
if quote_type == "backtick":
inner = inner.replace("\\`", "`")
escapes = {
"\\\\": "\\",
"\\n": "\n",
"\\t": "\t",
"\\r": "\r",
"\\'": "'",
'\\"': '"',
}
for old, new in escapes.items():
inner = inner.replace(old, new)
return inner
class PythonParser(SimpleParser):
"""Parser for Python files."""
def __init__(self) -> None:
super().__init__("python", [".py"])
class JavaScriptParser(SimpleParser):
"""Parser for JavaScript files."""
def __init__(self) -> None:
super().__init__("javascript", [".js", ".mjs"])
class TypeScriptParser(SimpleParser):
"""Parser for TypeScript files."""
def __init__(self) -> None:
super().__init__("typescript", [".ts", ".tsx"])
class ParserRegistry:
"""Registry for parsers by file extension."""
def __init__(self) -> None:
self._parsers: dict = {}
def register(self, parser: Parser) -> None:
"""Register a parser."""
for ext in parser.extensions:
self._parsers[ext] = parser
def get(self, file_path: Path) -> Optional[Parser]:
"""Get parser for file extension."""
ext = file_path.suffix.lower()
return self._parsers.get(ext)
def list_extensions(self) -> List[str]:
"""List all supported extensions."""
return list(self._parsers.keys())
def get_default_registry() -> ParserRegistry:
"""Get default parser registry."""
registry = ParserRegistry()
registry.register(PythonParser())
registry.register(JavaScriptParser())
registry.register(TypeScriptParser())
return registry