"""Core language parser interface and implementations.""" from abc import ABC, abstractmethod from pathlib import Path from typing import Optional import tree_sitter class LanguageParser(ABC): """Abstract base class for language parsers.""" @property @abstractmethod def language_name(self) -> str: """Return the name of the language.""" pass @abstractmethod def parse(self, source_code: str) -> tree_sitter.Tree: """Parse source code and return AST tree.""" pass @abstractmethod def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: """Extract function definitions from AST.""" pass @abstractmethod def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: """Extract function calls from AST.""" pass @abstractmethod def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: """Extract string literals from AST.""" pass class PythonParser(LanguageParser): """Python language parser using tree-sitter.""" def __init__(self): self._parser: Optional[tree_sitter.Parser] = None @property def language_name(self) -> str: return "python" def _ensure_initialized(self): if self._parser is None: from tree_sitter_python import language lang = tree_sitter.Language(language()) self._parser = tree_sitter.Parser(lang) def parse(self, source_code: str) -> tree_sitter.Tree: self._ensure_initialized() return self._parser.parse(source_code.encode()) def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: functions = [] if hasattr(node, "type") and node.type in {"function_definition", "async_function_definition"}: functions.append(node) if hasattr(node, "children"): for child in node.children: functions.extend(self.get_functions(child)) return functions def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: calls = [] if hasattr(node, "type") and node.type == "call": calls.append(node) if hasattr(node, "children"): for child in node.children: calls.extend(self.get_calls(child)) return calls def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: strings = [] if hasattr(node, "type") and node.type == "string": strings.append(node) if hasattr(node, "children"): for child in node.children: strings.extend(self.get_strings(child)) return strings class JavaScriptParser(LanguageParser): """JavaScript language parser using tree-sitter.""" def __init__(self): self._parser: Optional[tree_sitter.Parser] = None @property def language_name(self) -> str: return "javascript" def _ensure_initialized(self): if self._parser is None: from tree_sitter_javascript import language lang = tree_sitter.Language(language()) self._parser = tree_sitter.Parser(lang) def parse(self, source_code: str) -> tree_sitter.Tree: self._ensure_initialized() return self._parser.parse(source_code.encode()) def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: functions = [] if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}: functions.append(node) if hasattr(node, "children"): for child in node.children: functions.extend(self.get_functions(child)) return functions def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: calls = [] if hasattr(node, "type") and node.type == "call_expression": calls.append(node) if hasattr(node, "children"): for child in node.children: calls.extend(self.get_calls(child)) return calls def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: strings = [] if hasattr(node, "type") and node.type in {"string", "template_string"}: strings.append(node) if hasattr(node, "children"): for child in node.children: strings.extend(self.get_strings(child)) return strings class TypeScriptParser(LanguageParser): """TypeScript language parser using tree-sitter.""" def __init__(self): self._parser: Optional[tree_sitter.Parser] = None @property def language_name(self) -> str: return "typescript" def _ensure_initialized(self): if self._parser is None: from tree_sitter_typescript import language_typescript lang = tree_sitter.Language(language_typescript()) self._parser = tree_sitter.Parser(lang) def parse(self, source_code: str) -> tree_sitter.Tree: self._ensure_initialized() return self._parser.parse(source_code.encode()) def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: functions = [] if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}: functions.append(node) if hasattr(node, "children"): for child in node.children: functions.extend(self.get_functions(child)) return functions def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: calls = [] if hasattr(node, "type") and node.type == "call_expression": calls.append(node) if hasattr(node, "children"): for child in node.children: calls.extend(self.get_calls(child)) return calls def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: strings = [] if hasattr(node, "type") and node.type in {"string", "template_string"}: strings.append(node) if hasattr(node, "children"): for child in node.children: strings.extend(self.get_strings(child)) return strings class ParserFactory: """Factory for creating language parsers.""" _parsers: dict[str, LanguageParser] = { "python": PythonParser(), "javascript": JavaScriptParser(), "typescript": TypeScriptParser(), } @classmethod def get_parser(cls, language: str) -> Optional[LanguageParser]: return cls._parsers.get(language.lower()) @classmethod def supported_languages(cls) -> list[str]: return list(cls._parsers.keys())