Add core analyzer modules: base classes and language parsers
This commit is contained in:
198
src/analyzers/__init__.py
Normal file
198
src/analyzers/__init__.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
"""Core language parser interface and implementations."""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import tree_sitter
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageParser(ABC):
|
||||||
|
"""Abstract base class for language parsers."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def language_name(self) -> str:
|
||||||
|
"""Return the name of the language."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def parse(self, source_code: str) -> tree_sitter.Tree:
|
||||||
|
"""Parse source code and return AST tree."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
"""Extract function definitions from AST."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
"""Extract function calls from AST."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
"""Extract string literals from AST."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PythonParser(LanguageParser):
|
||||||
|
"""Python language parser using tree-sitter."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language_name(self) -> str:
|
||||||
|
return "python"
|
||||||
|
|
||||||
|
def _ensure_initialized(self):
|
||||||
|
if self._parser is None:
|
||||||
|
from tree_sitter_python import language
|
||||||
|
lang = tree_sitter.Language(language())
|
||||||
|
self._parser = tree_sitter.Parser(lang)
|
||||||
|
|
||||||
|
def parse(self, source_code: str) -> tree_sitter.Tree:
|
||||||
|
self._ensure_initialized()
|
||||||
|
return self._parser.parse(source_code.encode())
|
||||||
|
|
||||||
|
def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
functions = []
|
||||||
|
if hasattr(node, "type") and node.type in {"function_definition", "async_function_definition"}:
|
||||||
|
functions.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
functions.extend(self.get_functions(child))
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self.get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
strings = []
|
||||||
|
if hasattr(node, "type") and node.type == "string":
|
||||||
|
strings.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
strings.extend(self.get_strings(child))
|
||||||
|
return strings
|
||||||
|
|
||||||
|
|
||||||
|
class JavaScriptParser(LanguageParser):
|
||||||
|
"""JavaScript language parser using tree-sitter."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language_name(self) -> str:
|
||||||
|
return "javascript"
|
||||||
|
|
||||||
|
def _ensure_initialized(self):
|
||||||
|
if self._parser is None:
|
||||||
|
from tree_sitter_javascript import language
|
||||||
|
lang = tree_sitter.Language(language())
|
||||||
|
self._parser = tree_sitter.Parser(lang)
|
||||||
|
|
||||||
|
def parse(self, source_code: str) -> tree_sitter.Tree:
|
||||||
|
self._ensure_initialized()
|
||||||
|
return self._parser.parse(source_code.encode())
|
||||||
|
|
||||||
|
def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
functions = []
|
||||||
|
if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}:
|
||||||
|
functions.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
functions.extend(self.get_functions(child))
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call_expression":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self.get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
strings = []
|
||||||
|
if hasattr(node, "type") and node.type in {"string", "template_string"}:
|
||||||
|
strings.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
strings.extend(self.get_strings(child))
|
||||||
|
return strings
|
||||||
|
|
||||||
|
|
||||||
|
class TypeScriptParser(LanguageParser):
|
||||||
|
"""TypeScript language parser using tree-sitter."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._parser: Optional[tree_sitter.Parser] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language_name(self) -> str:
|
||||||
|
return "typescript"
|
||||||
|
|
||||||
|
def _ensure_initialized(self):
|
||||||
|
if self._parser is None:
|
||||||
|
from tree_sitter_typescript import language_typescript
|
||||||
|
lang = tree_sitter.Language(language_typescript())
|
||||||
|
self._parser = tree_sitter.Parser(lang)
|
||||||
|
|
||||||
|
def parse(self, source_code: str) -> tree_sitter.Tree:
|
||||||
|
self._ensure_initialized()
|
||||||
|
return self._parser.parse(source_code.encode())
|
||||||
|
|
||||||
|
def get_functions(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
functions = []
|
||||||
|
if hasattr(node, "type") and node.type in {"function_declaration", "arrow_function", "method_definition"}:
|
||||||
|
functions.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
functions.extend(self.get_functions(child))
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def get_calls(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
calls = []
|
||||||
|
if hasattr(node, "type") and node.type == "call_expression":
|
||||||
|
calls.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
calls.extend(self.get_calls(child))
|
||||||
|
return calls
|
||||||
|
|
||||||
|
def get_strings(self, node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
||||||
|
strings = []
|
||||||
|
if hasattr(node, "type") and node.type in {"string", "template_string"}:
|
||||||
|
strings.append(node)
|
||||||
|
if hasattr(node, "children"):
|
||||||
|
for child in node.children:
|
||||||
|
strings.extend(self.get_strings(child))
|
||||||
|
return strings
|
||||||
|
|
||||||
|
|
||||||
|
class ParserFactory:
|
||||||
|
"""Factory for creating language parsers."""
|
||||||
|
|
||||||
|
_parsers: dict[str, LanguageParser] = {
|
||||||
|
"python": PythonParser(),
|
||||||
|
"javascript": JavaScriptParser(),
|
||||||
|
"typescript": TypeScriptParser(),
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_parser(cls, language: str) -> Optional[LanguageParser]:
|
||||||
|
return cls._parsers.get(language.lower())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_languages(cls) -> list[str]:
|
||||||
|
return list(cls._parsers.keys())
|
||||||
Reference in New Issue
Block a user