Add parsers module
This commit is contained in:
134
i18n_guardian/parsers/simple.py
Normal file
134
i18n_guardian/parsers/simple.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Simple regex-based parser for string literals."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from i18n_guardian.parsers.base import Parser, StringLiteral
|
||||
|
||||
|
||||
class SimpleParser(Parser):
|
||||
"""Simple regex-based parser for string literals."""
|
||||
|
||||
def __init__(self, name: str, extensions: List[str]) -> None:
|
||||
self._name = name
|
||||
self._extensions = extensions
|
||||
self._string_pattern = re.compile(
|
||||
r'(?:(?P<single>\'(?:[^\'\\]|\\.)*\')|(?P<double>"(?:[^"\\]|\\.)*")|(?P<backtick>`(?:[^`\\]|\\.)*`))',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def extensions(self) -> List[str]:
|
||||
return self._extensions
|
||||
|
||||
def parse(self, file_path: Path) -> List[StringLiteral]:
|
||||
"""Parse file and extract string literals."""
|
||||
literals: List[StringLiteral] = []
|
||||
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return literals
|
||||
|
||||
lines = content.split("\n")
|
||||
|
||||
for line_num, line in enumerate(lines, start=1):
|
||||
for match in self._string_pattern.finditer(line):
|
||||
quote_type = "single" if match.group("single") else "double" if match.group("double") else "backtick"
|
||||
raw_value = match.group(0)
|
||||
value = self._extract_value(raw_value, quote_type)
|
||||
is_template = quote_type == "backtick"
|
||||
|
||||
column = match.start() + 1
|
||||
|
||||
literals.append(
|
||||
StringLiteral(
|
||||
value=value,
|
||||
file_path=file_path,
|
||||
line=line_num,
|
||||
column=column,
|
||||
is_template=is_template,
|
||||
)
|
||||
)
|
||||
|
||||
return literals
|
||||
|
||||
def _extract_value(self, raw: str, quote_type: str) -> str:
|
||||
"""Extract the actual string value without quotes."""
|
||||
if len(raw) < 2:
|
||||
return raw
|
||||
|
||||
inner = raw[1:-1]
|
||||
|
||||
if quote_type == "backtick":
|
||||
inner = inner.replace("\\`", "`")
|
||||
|
||||
escapes = {
|
||||
"\\\\": "\\",
|
||||
"\\n": "\n",
|
||||
"\\t": "\t",
|
||||
"\\r": "\r",
|
||||
"\\'": "'",
|
||||
'\\"': '"',
|
||||
}
|
||||
|
||||
for old, new in escapes.items():
|
||||
inner = inner.replace(old, new)
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
class PythonParser(SimpleParser):
|
||||
"""Parser for Python files."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("python", [".py"])
|
||||
|
||||
|
||||
class JavaScriptParser(SimpleParser):
|
||||
"""Parser for JavaScript files."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("javascript", [".js", ".mjs"])
|
||||
|
||||
|
||||
class TypeScriptParser(SimpleParser):
|
||||
"""Parser for TypeScript files."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("typescript", [".ts", ".tsx"])
|
||||
|
||||
|
||||
class ParserRegistry:
|
||||
"""Registry for parsers by file extension."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._parsers: dict = {}
|
||||
|
||||
def register(self, parser: Parser) -> None:
|
||||
"""Register a parser."""
|
||||
for ext in parser.extensions:
|
||||
self._parsers[ext] = parser
|
||||
|
||||
def get(self, file_path: Path) -> Optional[Parser]:
|
||||
"""Get parser for file extension."""
|
||||
ext = file_path.suffix.lower()
|
||||
return self._parsers.get(ext)
|
||||
|
||||
def list_extensions(self) -> List[str]:
|
||||
"""List all supported extensions."""
|
||||
return list(self._parsers.keys())
|
||||
|
||||
|
||||
def get_default_registry() -> ParserRegistry:
|
||||
"""Get default parser registry."""
|
||||
registry = ParserRegistry()
|
||||
registry.register(PythonParser())
|
||||
registry.register(JavaScriptParser())
|
||||
registry.register(TypeScriptParser())
|
||||
return registry
|
||||
Reference in New Issue
Block a user