From 8224cfd7e8b15420e84477fefadcaedae66dcd07 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 5 Feb 2026 08:42:31 +0000 Subject: [PATCH] Add code analyzers (Python, JS, Go, Rust) with tree-sitter --- .../analyzers/javascript_analyzer.py | 207 ++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 src/auto_readme/analyzers/javascript_analyzer.py diff --git a/src/auto_readme/analyzers/javascript_analyzer.py b/src/auto_readme/analyzers/javascript_analyzer.py new file mode 100644 index 0000000..eaa4dfe --- /dev/null +++ b/src/auto_readme/analyzers/javascript_analyzer.py @@ -0,0 +1,207 @@ +"""JavaScript/TypeScript code analyzer using tree-sitter.""" + +from pathlib import Path +from typing import Optional +from tree_sitter import Language, Node, Parser + +from tree_sitter_javascript import language as javascript_language + +from . import BaseAnalyzer +from ..models import Function, Class, ImportStatement + + +class JavaScriptAnalyzer(BaseAnalyzer): + """Analyzer for JavaScript and TypeScript source files.""" + + SUPPORTED_EXTENSIONS = {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"} + + def can_analyze(self, path: Path) -> bool: + """Check if this analyzer can handle the file.""" + return path.suffix.lower() in self.SUPPORTED_EXTENSIONS + + def analyze(self, path: Path) -> dict: + """Analyze a JavaScript/TypeScript file and extract functions, classes, and imports.""" + content = self._get_file_content(path) + if not content: + return {"functions": [], "classes": [], "imports": []} + + content_bytes = content.encode("utf-8") + + try: + lang = Language(javascript_language()) + parser = Parser(language=lang) + tree = parser.parse(content_bytes) + except Exception: + return {"functions": [], "classes": [], "imports": []} + + functions = self._extract_functions(tree.root_node, content, content_bytes) + classes = self._extract_classes(tree.root_node, content, content_bytes) + imports = self._extract_imports(tree.root_node, content_bytes) + + return { + "functions": functions, + "classes": classes, + "imports": imports, + } + + def _extract_functions(self, node: Node, content: str, content_bytes: bytes) -> list[Function]: + """Extract function definitions from the AST.""" + functions = [] + + if node.type in ("function_declaration", "method_definition", "generator_function_declaration"): + func = self._parse_function(node, content, content_bytes) + if func: + functions.append(func) + elif node.type == "arrow_function": + parent_func = self._find_parent_function(node, content, content_bytes) + if parent_func: + functions.append(parent_func) + + for child in node.children: + funcs = self._extract_functions(child, content, content_bytes) + functions.extend(funcs) + + return functions + + def _extract_classes(self, node: Node, content: str, content_bytes: bytes) -> list[Class]: + """Extract class definitions from the AST.""" + classes = [] + + if node.type == "class_declaration": + cls = self._parse_class(node, content, content_bytes) + if cls: + classes.append(cls) + + for child in node.children: + classes.extend(self._extract_classes(child, content, content_bytes)) + + return classes + + def _extract_imports(self, node: Node, content_bytes: bytes) -> list[ImportStatement]: + """Extract import statements from the AST.""" + imports = [] + + if node.type == "import_statement": + imp = self._parse_import(node, content_bytes) + if imp: + imports.append(imp) + + for child in node.children: + imports.extend(self._extract_imports(child, content_bytes)) + + return imports + + def _parse_function(self, node: Node, content: str, content_bytes: bytes) -> Optional[Function]: + """Parse a function definition node.""" + name = "anonymous" + parameters = [] + docstring = None + line_number = node.start_point[0] + 1 + + for child in node.children: + if child.type == "identifier": + name = content_bytes[child.start_byte : child.end_byte].decode("utf-8") + elif child.type == "formal_parameters": + parameters = self._parse_parameters(child, content_bytes) + elif child.type == "property_signature": + name = content_bytes[child.start_byte : child.end_byte].decode("utf-8") + elif child.type == "method_definition": + for grandchild in child.children: + if grandchild.type == "property_identifier": + name = content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8") + elif grandchild.type == "formal_parameters": + parameters = self._parse_parameters(grandchild, content_bytes) + + return Function( + name=name, + parameters=parameters, + docstring=docstring, + line_number=line_number, + ) + + def _find_parent_function(self, node: Node, content: str, content_bytes: bytes) -> Optional[Function]: + """Find parent function for arrow functions.""" + parent = node.parent + while parent: + if parent.type == "variable_declarator": + for child in parent.children: + if child.type == "identifier": + return Function( + name=content_bytes[child.start_byte : child.end_byte].decode("utf-8"), + parameters=[], + line_number=node.start_point[0] + 1, + ) + if parent.type == "pair": + for child in parent.children: + if child.type == "property_identifier": + return Function( + name=content_bytes[child.start_byte : child.end_byte].decode("utf-8"), + parameters=[], + line_number=node.start_point[0] + 1, + ) + parent = parent.parent + return None + + def _parse_class(self, node: Node, content: str, content_bytes: bytes) -> Optional[Class]: + """Parse a class definition node.""" + name = None + base_classes = [] + methods = [] + docstring = None + line_number = node.start_point[0] + 1 + + for child in node.children: + if child.type == "class_heritage": + for grandchild in child.children: + if grandchild.type == "identifier": + base_classes.append( + content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8") + ) + elif child.type == "class_body": + methods = self._extract_functions(child, content, content_bytes) + + return Class( + name=name or "Unknown", + base_classes=base_classes, + methods=methods, + docstring=docstring, + line_number=line_number, + ) + + def _parse_import(self, node: Node, content_bytes: bytes) -> Optional[ImportStatement]: + """Parse an import statement node.""" + line_number = node.start_point[0] + 1 + module = None + items = [] + alias = None + + for child in node.children: + if child.type == "import_clause": + for grandchild in child.children: + if grandchild.type == "identifier": + items.append(content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8")) + elif grandchild.type == "namespace_import": + alias = content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8") + elif child.type == "string": + module = content_bytes[child.start_byte : child.end_byte].decode("utf-8").strip('"').strip("'") + + return ImportStatement( + module=module or "", + items=items, + alias=alias, + line_number=line_number, + ) + + def _parse_parameters(self, node: Node, content_bytes: bytes) -> list[str]: + """Parse function parameters.""" + params = [] + for child in node.children: + if child.type in ("identifier", "rest_parameter", "optional_parameter"): + param = content_bytes[child.start_byte : child.end_byte].decode("utf-8") + params.append(param) + elif child.type in ("typed_parameter", "parameter"): + for grandchild in child.children: + if grandchild.type == "identifier": + params.append(content_bytes[grandchild.start_byte : grandchild.end_byte].decode("utf-8")) + break + return params