135 lines
3.7 KiB
Python
135 lines
3.7 KiB
Python
"""Simple regex-based parser for string literals."""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
from i18n_guardian.parsers.base import Parser, StringLiteral
|
|
|
|
|
|
class SimpleParser(Parser):
|
|
"""Simple regex-based parser for string literals."""
|
|
|
|
def __init__(self, name: str, extensions: List[str]) -> None:
|
|
self._name = name
|
|
self._extensions = extensions
|
|
self._string_pattern = re.compile(
|
|
r'(?:(?P<single>\'(?:[^\'\\]|\\.)*\')|(?P<double>"(?:[^"\\]|\\.)*")|(?P<backtick>`(?:[^`\\]|\\.)*`))',
|
|
re.MULTILINE,
|
|
)
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return self._name
|
|
|
|
@property
|
|
def extensions(self) -> List[str]:
|
|
return self._extensions
|
|
|
|
def parse(self, file_path: Path) -> List[StringLiteral]:
|
|
"""Parse file and extract string literals."""
|
|
literals: List[StringLiteral] = []
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8")
|
|
except (OSError, UnicodeDecodeError):
|
|
return literals
|
|
|
|
lines = content.split("\n")
|
|
|
|
for line_num, line in enumerate(lines, start=1):
|
|
for match in self._string_pattern.finditer(line):
|
|
quote_type = "single" if match.group("single") else "double" if match.group("double") else "backtick"
|
|
raw_value = match.group(0)
|
|
value = self._extract_value(raw_value, quote_type)
|
|
is_template = quote_type == "backtick"
|
|
|
|
column = match.start() + 1
|
|
|
|
literals.append(
|
|
StringLiteral(
|
|
value=value,
|
|
file_path=file_path,
|
|
line=line_num,
|
|
column=column,
|
|
is_template=is_template,
|
|
)
|
|
)
|
|
|
|
return literals
|
|
|
|
def _extract_value(self, raw: str, quote_type: str) -> str:
|
|
"""Extract the actual string value without quotes."""
|
|
if len(raw) < 2:
|
|
return raw
|
|
|
|
inner = raw[1:-1]
|
|
|
|
if quote_type == "backtick":
|
|
inner = inner.replace("\\`", "`")
|
|
|
|
escapes = {
|
|
"\\\\": "\\",
|
|
"\\n": "\n",
|
|
"\\t": "\t",
|
|
"\\r": "\r",
|
|
"\\'": "'",
|
|
'\\"': '"',
|
|
}
|
|
|
|
for old, new in escapes.items():
|
|
inner = inner.replace(old, new)
|
|
|
|
return inner
|
|
|
|
|
|
class PythonParser(SimpleParser):
|
|
"""Parser for Python files."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__("python", [".py"])
|
|
|
|
|
|
class JavaScriptParser(SimpleParser):
|
|
"""Parser for JavaScript files."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__("javascript", [".js", ".mjs"])
|
|
|
|
|
|
class TypeScriptParser(SimpleParser):
|
|
"""Parser for TypeScript files."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__("typescript", [".ts", ".tsx"])
|
|
|
|
|
|
class ParserRegistry:
|
|
"""Registry for parsers by file extension."""
|
|
|
|
def __init__(self) -> None:
|
|
self._parsers: dict = {}
|
|
|
|
def register(self, parser: Parser) -> None:
|
|
"""Register a parser."""
|
|
for ext in parser.extensions:
|
|
self._parsers[ext] = parser
|
|
|
|
def get(self, file_path: Path) -> Optional[Parser]:
|
|
"""Get parser for file extension."""
|
|
ext = file_path.suffix.lower()
|
|
return self._parsers.get(ext)
|
|
|
|
def list_extensions(self) -> List[str]:
|
|
"""List all supported extensions."""
|
|
return list(self._parsers.keys())
|
|
|
|
|
|
def get_default_registry() -> ParserRegistry:
|
|
"""Get default parser registry."""
|
|
registry = ParserRegistry()
|
|
registry.register(PythonParser())
|
|
registry.register(JavaScriptParser())
|
|
registry.register(TypeScriptParser())
|
|
return registry
|