Add code analyzers (Python, JS, Go, Rust) with tree-sitter
This commit is contained in:
207
src/auto_readme/analyzers/javascript_analyzer.py
Normal file
207
src/auto_readme/analyzers/javascript_analyzer.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""JavaScript/TypeScript code analyzer using tree-sitter."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
from tree_sitter import Language, Node, Parser
|
||||||
|
|
||||||
|
from tree_sitter_javascript import language as javascript_language
|
||||||
|
|
||||||
|
from . import BaseAnalyzer
|
||||||
|
from ..models import Function, Class, ImportStatement
|
||||||
|
|
||||||
|
|
||||||
|
class JavaScriptAnalyzer(BaseAnalyzer):
|
||||||
|
"""Analyzer for JavaScript and TypeScript source files."""
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
|
||||||
|
|
||||||
|
def can_analyze(self, path: Path) -> bool:
|
||||||
|
"""Check if this analyzer can handle the file."""
|
||||||
|
return path.suffix.lower() in self.SUPPORTED_EXTENSIONS
|
||||||
|
|
||||||
|
def analyze(self, path: Path) -> dict:
|
||||||
|
"""Analyze a JavaScript/TypeScript file and extract functions, classes, and imports."""
|
||||||
|
content = self._get_file_content(path)
|
||||||
|
if not content:
|
||||||
|
return {"functions": [], "classes": [], "imports": []}
|
||||||
|
|
||||||
|
content_bytes = content.encode("utf-8")
|
||||||
|
|
||||||
|
try:
|
||||||
|
lang = Language(javascript_language())
|
||||||
|
parser = Parser(language=lang)
|
||||||
|
tree = parser.parse(content_bytes)
|
||||||
|
except Exception:
|
||||||
|
return {"functions": [], "classes": [], "imports": []}
|
||||||
|
|
||||||
|
functions = self._extract_functions(tree.root_node, content, content_bytes)
|
||||||
|
classes = self._extract_classes(tree.root_node, content, content_bytes)
|
||||||
|
imports = self._extract_imports(tree.root_node, content_bytes)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"functions": functions,
|
||||||
|
"classes": classes,
|
||||||
|
"imports": imports,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_functions(self, node: Node, content: str, content_bytes: bytes) -> list[Function]:
|
||||||
|
"""Extract function definitions from the AST."""
|
||||||
|
functions = []
|
||||||
|
|
||||||
|
if node.type in ("function_declaration", "method_definition", "generator_function_declaration"):
|
||||||
|
func = self._parse_function(node, content, content_bytes)
|
||||||
|
if func:
|
||||||
|
functions.append(func)
|
||||||
|
elif node.type == "arrow_function":
|
||||||
|
parent_func = self._find_parent_function(node, content, content_bytes)
|
||||||
|
if parent_func:
|
||||||
|
functions.append(parent_func)
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
funcs = self._extract_functions(child, content, content_bytes)
|
||||||
|
functions.extend(funcs)
|
||||||
|
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def _extract_classes(self, node: Node, content: str, content_bytes: bytes) -> list[Class]:
|
||||||
|
"""Extract class definitions from the AST."""
|
||||||
|
classes = []
|
||||||
|
|
||||||
|
if node.type == "class_declaration":
|
||||||
|
cls = self._parse_class(node, content, content_bytes)
|
||||||
|
if cls:
|
||||||
|
classes.append(cls)
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
classes.extend(self._extract_classes(child, content, content_bytes))
|
||||||
|
|
||||||
|
return classes
|
||||||
|
|
||||||
|
def _extract_imports(self, node: Node, content_bytes: bytes) -> list[ImportStatement]:
|
||||||
|
"""Extract import statements from the AST."""
|
||||||
|
imports = []
|
||||||
|
|
||||||
|
if node.type == "import_statement":
|
||||||
|
imp = self._parse_import(node, content_bytes)
|
||||||
|
if imp:
|
||||||
|
imports.append(imp)
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
imports.extend(self._extract_imports(child, content_bytes))
|
||||||
|
|
||||||
|
return imports
|
||||||
|
|
||||||
|
def _parse_function(self, node: Node, content: str, content_bytes: bytes) -> Optional[Function]:
|
||||||
|
"""Parse a function definition node."""
|
||||||
|
name = "anonymous"
|
||||||
|
parameters = []
|
||||||
|
docstring = None
|
||||||
|
line_number = node.start_point[0] + 1
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
if child.type == "identifier":
|
||||||
|
name = content_bytes[child.start_byte : child.end_byte].decode("utf-8")
|
||||||
|
elif child.type == "formal_parameters":
|
||||||
|
parameters = self._parse_parameters(child, content_bytes)
|
||||||
|
elif child.type == "property_signature":
|
||||||
|
name = content_bytes[child.start_byte : child.end_byte].decode("utf-8")
|
||||||
|
elif child.type == "method_definition":
|
||||||
|
for grandchild in child.children:
|
||||||
|
if grandchild.type == "property_identifier":
|
||||||
|
name = content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8")
|
||||||
|
elif grandchild.type == "formal_parameters":
|
||||||
|
parameters = self._parse_parameters(grandchild, content_bytes)
|
||||||
|
|
||||||
|
return Function(
|
||||||
|
name=name,
|
||||||
|
parameters=parameters,
|
||||||
|
docstring=docstring,
|
||||||
|
line_number=line_number,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _find_parent_function(self, node: Node, content: str, content_bytes: bytes) -> Optional[Function]:
|
||||||
|
"""Find parent function for arrow functions."""
|
||||||
|
parent = node.parent
|
||||||
|
while parent:
|
||||||
|
if parent.type == "variable_declarator":
|
||||||
|
for child in parent.children:
|
||||||
|
if child.type == "identifier":
|
||||||
|
return Function(
|
||||||
|
name=content_bytes[child.start_byte : child.end_byte].decode("utf-8"),
|
||||||
|
parameters=[],
|
||||||
|
line_number=node.start_point[0] + 1,
|
||||||
|
)
|
||||||
|
if parent.type == "pair":
|
||||||
|
for child in parent.children:
|
||||||
|
if child.type == "property_identifier":
|
||||||
|
return Function(
|
||||||
|
name=content_bytes[child.start_byte : child.end_byte].decode("utf-8"),
|
||||||
|
parameters=[],
|
||||||
|
line_number=node.start_point[0] + 1,
|
||||||
|
)
|
||||||
|
parent = parent.parent
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_class(self, node: Node, content: str, content_bytes: bytes) -> Optional[Class]:
|
||||||
|
"""Parse a class definition node."""
|
||||||
|
name = None
|
||||||
|
base_classes = []
|
||||||
|
methods = []
|
||||||
|
docstring = None
|
||||||
|
line_number = node.start_point[0] + 1
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
if child.type == "class_heritage":
|
||||||
|
for grandchild in child.children:
|
||||||
|
if grandchild.type == "identifier":
|
||||||
|
base_classes.append(
|
||||||
|
content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8")
|
||||||
|
)
|
||||||
|
elif child.type == "class_body":
|
||||||
|
methods = self._extract_functions(child, content, content_bytes)
|
||||||
|
|
||||||
|
return Class(
|
||||||
|
name=name or "Unknown",
|
||||||
|
base_classes=base_classes,
|
||||||
|
methods=methods,
|
||||||
|
docstring=docstring,
|
||||||
|
line_number=line_number,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_import(self, node: Node, content_bytes: bytes) -> Optional[ImportStatement]:
|
||||||
|
"""Parse an import statement node."""
|
||||||
|
line_number = node.start_point[0] + 1
|
||||||
|
module = None
|
||||||
|
items = []
|
||||||
|
alias = None
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
if child.type == "import_clause":
|
||||||
|
for grandchild in child.children:
|
||||||
|
if grandchild.type == "identifier":
|
||||||
|
items.append(content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8"))
|
||||||
|
elif grandchild.type == "namespace_import":
|
||||||
|
alias = content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8")
|
||||||
|
elif child.type == "string":
|
||||||
|
module = content_bytes[child.start_byte : child.end_byte].decode("utf-8").strip('"').strip("'")
|
||||||
|
|
||||||
|
return ImportStatement(
|
||||||
|
module=module or "",
|
||||||
|
items=items,
|
||||||
|
alias=alias,
|
||||||
|
line_number=line_number,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_parameters(self, node: Node, content_bytes: bytes) -> list[str]:
|
||||||
|
"""Parse function parameters."""
|
||||||
|
params = []
|
||||||
|
for child in node.children:
|
||||||
|
if child.type in ("identifier", "rest_parameter", "optional_parameter"):
|
||||||
|
param = content_bytes[child.start_byte : child.end_byte].decode("utf-8")
|
||||||
|
params.append(param)
|
||||||
|
elif child.type in ("typed_parameter", "parameter"):
|
||||||
|
for grandchild in child.children:
|
||||||
|
if grandchild.type == "identifier":
|
||||||
|
params.append(content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8"))
|
||||||
|
break
|
||||||
|
return params
|
||||||
Reference in New Issue
Block a user