From 377a3c3c8608d0bb38149b8fc5251fe12a8c1046 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 23:08:25 +0000 Subject: [PATCH] Add core analyzer modules: base classes and language parsers --- src/analyzers/__init__.py | 198 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 src/analyzers/__init__.py diff --git a/src/analyzers/__init__.py b/src/analyzers/__init__.py new file mode 100644 index 0000000..31116c1 --- /dev/null +++ b/src/analyzers/__init__.py @@ -0,0 +1,198 @@ +"""Core language parser interface and implementations.""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional +import tree_sitter + + +class LanguageParser(ABC): + """Abstract base class for language parsers.""" + + @property + @abstractmethod + def language_name(self) -> str: + """Return the name of the language.""" + pass + + @abstractmethod + def parse(self, source_code: str) -> tree_sitter.Tree: + """Parse source code and return AST tree.""" + pass + + @abstractmethod + def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + """Extract function definitions from AST.""" + pass + + @abstractmethod + def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + """Extract function calls from AST.""" + pass + + @abstractmethod + def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + """Extract string literals from AST.""" + pass + + +class PythonParser(LanguageParser): + """Python language parser using tree-sitter.""" + + def __init__(self): + self._parser: Optional[tree_sitter.Parser] = None + + @property + def language_name(self) -> str: + return "python" + + def _ensure_initialized(self): + if self._parser is None: + from tree_sitter_python import language + lang = tree_sitter.Language(language()) + self._parser = tree_sitter.Parser(lang) + + def parse(self, source_code: str) -> tree_sitter.Tree: + self._ensure_initialized() + return self._parser.parse(source_code.encode()) + + def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + functions = [] + if hasattr(node, "type") and node.type in {"function_definition", "async_function_definition"}: + functions.append(node) + if hasattr(node, "children"): + for child in node.children: + functions.extend(self.get_functions(child)) + return functions + + def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + calls = [] + if hasattr(node, "type") and node.type == "call": + calls.append(node) + if hasattr(node, "children"): + for child in node.children: + calls.extend(self.get_calls(child)) + return calls + + def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + strings = [] + if hasattr(node, "type") and node.type == "string": + strings.append(node) + if hasattr(node, "children"): + for child in node.children: + strings.extend(self.get_strings(child)) + return strings + + +class JavaScriptParser(LanguageParser): + """JavaScript language parser using tree-sitter.""" + + def __init__(self): + self._parser: Optional[tree_sitter.Parser] = None + + @property + def language_name(self) -> str: + return "javascript" + + def _ensure_initialized(self): + if self._parser is None: + from tree_sitter_javascript import language + lang = tree_sitter.Language(language()) + self._parser = tree_sitter.Parser(lang) + + def parse(self, source_code: str) -> tree_sitter.Tree: + self._ensure_initialized() + return self._parser.parse(source_code.encode()) + + def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + functions = [] + if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}: + functions.append(node) + if hasattr(node, "children"): + for child in node.children: + functions.extend(self.get_functions(child)) + return functions + + def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + calls = [] + if hasattr(node, "type") and node.type == "call_expression": + calls.append(node) + if hasattr(node, "children"): + for child in node.children: + calls.extend(self.get_calls(child)) + return calls + + def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + strings = [] + if hasattr(node, "type") and node.type in {"string", "template_string"}: + strings.append(node) + if hasattr(node, "children"): + for child in node.children: + strings.extend(self.get_strings(child)) + return strings + + +class TypeScriptParser(LanguageParser): + """TypeScript language parser using tree-sitter.""" + + def __init__(self): + self._parser: Optional[tree_sitter.Parser] = None + + @property + def language_name(self) -> str: + return "typescript" + + def _ensure_initialized(self): + if self._parser is None: + from tree_sitter_typescript import language_typescript + lang = tree_sitter.Language(language_typescript()) + self._parser = tree_sitter.Parser(lang) + + def parse(self, source_code: str) -> tree_sitter.Tree: + self._ensure_initialized() + return self._parser.parse(source_code.encode()) + + def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + functions = [] + if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}: + functions.append(node) + if hasattr(node, "children"): + for child in node.children: + functions.extend(self.get_functions(child)) + return functions + + def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + calls = [] + if hasattr(node, "type") and node.type == "call_expression": + calls.append(node) + if hasattr(node, "children"): + for child in node.children: + calls.extend(self.get_calls(child)) + return calls + + def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]: + strings = [] + if hasattr(node, "type") and node.type in {"string", "template_string"}: + strings.append(node) + if hasattr(node, "children"): + for child in node.children: + strings.extend(self.get_strings(child)) + return strings + + +class ParserFactory: + """Factory for creating language parsers.""" + + _parsers: dict[str, LanguageParser] = { + "python": PythonParser(), + "javascript": JavaScriptParser(), + "typescript": TypeScriptParser(), + } + + @classmethod + def get_parser(cls, language: str) -> Optional[LanguageParser]: + return cls._parsers.get(language.lower()) + + @classmethod + def supported_languages(cls) -> list[str]: + return list(cls._parsers.keys())