fix: resolve CI/CD issues with proper package structure and imports
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-03 03:54:41 +00:00
parent 9773c9e46c
commit 8750e7574b

View File

@@ -0,0 +1,544 @@
"""Code comment indexer for Python, JavaScript, and TypeScript files."""
import ast
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
from local_api_docs_search.indexer.base import BaseIndexer
from local_api_docs_search.models.document import Document, SourceType
class CodeIndexer(BaseIndexer):
"""Indexer for code comments and docstrings."""
source_type = SourceType.CODE
SUPPORTED_EXTENSIONS = {
".py": "python",
".js": "javascript",
".jsx": "javascript",
".ts": "typescript",
".tsx": "typescript",
}
def __init__(self):
self._documents: List[Document] = []
self._parsed_files: Dict[str, Any] = {}
def index(
self, path: Path, recursive: bool = False, batch_size: int = 32
) -> List[Document]:
"""Index code files from the given path.
Args:
path: Path to file or directory
recursive: Whether to search recursively
batch_size: Documents per batch (for progress tracking)
Returns:
List of indexed Document objects
"""
self._documents = []
self._parsed_files = {}
for file_path in self._find_files(path, recursive):
try:
docs = self._parse_file(file_path)
self._documents.extend(docs)
except Exception as e:
print(f"Warning: Failed to parse {file_path}: {e}")
return self._documents
def _parse_file(self, file_path: Path) -> List[Document]:
"""Parse a single code file.
Args:
file_path: Path to the code file
Returns:
List of Document objects
"""
ext = file_path.suffix.lower()
language = self.SUPPORTED_EXTENSIONS.get(ext)
if language is None:
return []
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
self._parsed_files[str(file_path)] = content
if language == "python":
return self._parse_python(content, file_path)
elif language in ("javascript", "typescript"):
return self._parse_js_ts(content, file_path, language)
return []
def _parse_python(self, content: str, file_path: Path) -> List[Document]:
"""Parse Python file for docstrings.
Args:
content: Python file content
file_path: Path to the file
Returns:
List of Document objects
"""
documents = []
doc_id_base = self._generate_id(file_path)
try:
tree = ast.parse(content)
except SyntaxError:
return []
module_doc = self._get_module_docstring(content)
if module_doc:
doc = Document(
id=f"{doc_id_base}_module",
content=module_doc,
source_type=self.source_type,
title=f"Module: {file_path.stem}",
file_path=str(file_path),
metadata={"doc_type": "module"},
)
documents.append(doc)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
doc = self._parse_python_function(node, file_path, doc_id_base)
if doc:
documents.append(doc)
elif isinstance(node, ast.ClassDef):
doc = self._parse_python_class(node, file_path, doc_id_base)
if doc:
documents.append(doc)
if documents:
index_doc = Document(
id=f"{doc_id_base}_index",
content=self._generate_python_index(tree, file_path),
source_type=self.source_type,
title=f"Index: {file_path.stem}",
file_path=str(file_path),
metadata={"doc_type": "index"},
)
documents.append(index_doc)
return documents
def _get_module_docstring(self, content: str) -> Optional[str]:
"""Extract module docstring.
Args:
content: Python file content
Returns:
Module docstring or None
"""
tree = ast.parse(content)
if tree.body and isinstance(tree.body[0], ast.Expr):
docstring = tree.body[0].value
if isinstance(docstring, ast.Constant) and isinstance(
docstring.value, str
):
return docstring.value
return None
def _parse_python_function(
self, node: ast.FunctionDef, file_path: Path, doc_id_base: str
) -> Optional[Document]:
"""Parse a Python function for docstring.
Args:
node: AST function node
file_path: Path to the file
doc_id_base: Base ID for document generation
Returns:
Document or None
"""
docstring = self._get_docstring(node)
if not docstring:
return None
func_info = self._extract_python_function_info(node)
content = f"Function: {node.name}\n"
content += f"Docstring:\n{docstring}\n"
content += f"Parameters: {', '.join(func_info['args'])}\n"
content += f"Returns: {func_info['returns']}\n"
content += f"Line: {node.lineno}"
return Document(
id=f"{doc_id_base}_func_{node.name}",
content=content,
source_type=self.source_type,
title=f"Function: {node.name}",
file_path=str(file_path),
metadata={
"doc_type": "function",
"function_name": node.name,
"line": node.lineno,
},
)
def _parse_python_class(
self, node: ast.ClassDef, file_path: Path, doc_id_base: str
) -> Optional[Document]:
"""Parse a Python class for docstring.
Args:
node: AST class node
file_path: Path to the file
doc_id_base: Base ID for document generation
Returns:
Document or None
"""
docstring = self._get_docstring(node)
if not docstring:
return None
methods = []
attributes = []
for item in node.body:
if isinstance(item, ast.FunctionDef) or isinstance(
item, ast.AsyncFunctionDef
):
if not item.name.startswith("_"):
methods.append(item.name)
elif isinstance(item, ast.AnnAssign) and isinstance(
item.target, ast.Name
):
attributes.append(item.target.name)
content = f"Class: {node.name}\n"
content += f"Docstring:\n{docstring}\n"
if attributes:
content += f"Attributes: {', '.join(attributes)}\n"
if methods:
content += f"Methods: {', '.join(methods)}\n"
content += f"Line: {node.lineno}"
return Document(
id=f"{doc_id_base}_class_{node.name}",
content=content,
source_type=self.source_type,
title=f"Class: {node.name}",
file_path=str(file_path),
metadata={
"doc_type": "class",
"class_name": node.name,
"line": node.lineno,
},
)
def _get_docstring(self, node: ast.AST) -> Optional[str]:
"""Extract docstring from an AST node.
Args:
node: AST node
Returns:
Docstring or None
"""
if hasattr(node, "body") and node.body:
first = node.body[0]
if isinstance(first, ast.Expr) and isinstance(first.value, ast.Constant):
value = first.value.value
if isinstance(value, str):
return value
return None
def _extract_python_function_info(
self, node: ast.FunctionDef
) -> Dict[str, Any]:
"""Extract function information.
Args:
node: AST function node
Returns:
Dictionary with function information
"""
args = []
defaults = []
for arg in node.args.args:
if arg.arg != "self" and arg.arg != "cls":
args.append(arg.arg)
for default in node.args.defaults:
if isinstance(default, ast.Constant):
defaults.append(str(default.value))
returns = "unknown"
if node.returns:
if isinstance(node.returns, ast.Name):
returns = node.returns.id
elif isinstance(node.returns, ast.Constant):
returns = str(node.returns.value)
return {"args": args, "defaults": defaults, "returns": returns}
def _generate_python_index(
self, tree: ast.AST, file_path: Path
) -> str:
"""Generate an index of all documented items.
Args:
tree: Parsed AST tree
file_path: Path to the file
Returns:
Index content
"""
functions = []
classes = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) or isinstance(
node, ast.AsyncFunctionDef
):
if self._get_docstring(node) and not node.name.startswith("_"):
functions.append(node.name)
elif isinstance(node, ast.ClassDef):
if self._get_docstring(node):
classes.append(node.name)
content = f"File: {file_path.name}\n\n"
if classes:
content += "Classes:\n" + "\n".join(f" - {c}" for c in classes) + "\n\n"
if functions:
content += "Functions:\n" + "\n".join(f" - {f}" for f in functions)
return content
def _parse_js_ts(
self, content: str, file_path: Path, language: str
) -> List[Document]:
"""Parse JavaScript/TypeScript file for JSDoc comments.
Args:
content: File content
file_path: Path to the file
language: Language identifier
Returns:
List of Document objects
"""
documents = []
doc_id_base = self._generate_id(file_path)
jsdocs = self._extract_jsdocs(content)
if not jsdocs:
return documents
module_doc = self._extract_js_module_doc(content)
if module_doc:
doc = Document(
id=f"{doc_id_base}_module",
content=module_doc,
source_type=self.source_type,
title=f"Module: {file_path.stem}",
file_path=str(file_path),
metadata={"doc_type": "module"},
)
documents.append(doc)
for i, jsdoc in enumerate(jsdocs):
doc = self._create_jsdoc_document(jsdoc, file_path, doc_id_base, i)
documents.append(doc)
return documents
def _extract_jsdocs(self, content: str) -> List[Dict[str, Any]]:
"""Extract JSDoc comments from content.
Args:
content: File content
Returns:
List of JSDoc dictionaries
"""
jsdocs = []
pattern = r"/\*\*([\s\S]*?)\*/\s*(export\s+)?(async\s+)?(function|const|let|var|class|interface|type|enum)\s+(\w+)"
matches = re.findall(pattern, content, re.MULTILINE)
for match in matches:
full_comment = f"/**{match[0]}*/"
exported = bool(match[1])
async_kw = bool(match[2])
decl_type = match[3]
name = match[4]
parsed = self._parse_jsdoc_comment(full_comment)
parsed.update({
"name": name,
"type": decl_type,
"exported": exported,
"async": async_kw,
})
jsdocs.append(parsed)
return jsdocs
def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
"""Parse a JSDoc comment.
Args:
comment: JSDoc comment string
Returns:
Parsed JSDoc dictionary
"""
result = {
"description": "",
"params": [],
"returns": None,
"examples": [],
"throws": [],
"see": [],
}
lines = comment.strip("/**").strip("*/").split("\n")
current_description = []
for line in lines:
line = line.strip().lstrip("*").strip()
if line.startswith("@param"):
param_match = re.match(r"@param\s+\{([^}]+)\}\s+(\w+)(?:\s+-)?\s*(.*)", line)
if param_match:
result["params"].append({
"type": param_match.group(1),
"name": param_match.group(2),
"description": param_match.group(3),
})
elif line.startswith("@returns") or line.startswith("@return"):
return_match = re.match(r"@returns?\{([^}]+)\}\s*(.*)", line)
if return_match:
result["returns"] = {
"type": return_match.group(1),
"description": return_match.group(2),
}
elif line.startswith("@example"):
result["examples"].append(line[8:].strip())
elif line.startswith("@throws"):
throw_match = re.match(r"@throws\{([^}]+)\}\s*(.*)", line)
if throw_match:
result["throws"].append({
"type": throw_match.group(1),
"description": throw_match.group(2),
})
elif line.startswith("@see"):
result["see"].append(line[4:].strip())
elif line and not line.startswith("@"):
current_description.append(line)
result["description"] = " ".join(current_description)
return result
def _extract_js_module_doc(self, content: str) -> Optional[str]:
"""Extract module-level documentation.
Args:
content: File content
Returns:
Module docstring or None
"""
file_doc_pattern = r"/\*\*([\s\S]*?)\*/\s*@module\s+(\w+)"
match = re.search(file_doc_pattern, content)
if match:
return f"Module: {match.group(2)}\n\n{match.group(1).strip()}"
return None
def _create_jsdoc_document(
self,
jsdoc: Dict[str, Any],
file_path: Path,
doc_id_base: str,
index: int,
) -> Document:
"""Create a Document from parsed JSDoc.
Args:
jsdoc: Parsed JSDoc dictionary
file_path: Path to the source file
doc_id_base: Base ID for document generation
index: Index for ID generation
Returns:
Document object
"""
content_parts = []
decl_type = jsdoc.get("type", "unknown")
name = jsdoc.get("name", "unknown")
is_async = "async " if jsdoc.get("async") else ""
is_exported = "export " if jsdoc.get("exported") else ""
content_parts.append(f"{is_exported}{is_async}{decl_type} {name}")
if jsdoc.get("description"):
content_parts.append(f"\nDescription: {jsdoc['description']}")
if jsdoc.get("params"):
param_lines = ["\nParameters:"]
for param in jsdoc["params"]:
param_lines.append(
f" - {param['name']} ({param['type']}): {param['description']}"
)
content_parts.append("\n".join(param_lines))
if jsdoc.get("returns"):
ret = jsdoc["returns"]
content_parts.append(f"\nReturns ({ret['type']}): {ret['description']}")
if jsdoc.get("examples"):
examples = "\nExamples:\n" + "\n".join(
f" {i+1}. {ex}" for i, ex in enumerate(jsdoc["examples"])
)
content_parts.append(examples)
content = "\n".join(content_parts)
return Document(
id=f"{doc_id_base}_jsdoc_{index}",
content=content,
source_type=self.source_type,
title=f"{decl_type.capitalize()}: {name}",
file_path=str(file_path),
metadata={
"doc_type": "jsdoc",
"name": name,
"jsdoc_type": decl_type,
},
)
def _is_supported_file(self, path: Path) -> bool:
"""Check if the file is a supported code file.
Args:
path: Path to the file
Returns:
True if the file extension is supported
"""
return path.suffix.lower() in self.SUPPORTED_EXTENSIONS
def get_documents(self) -> List[Document]:
"""Get all indexed documents.
Returns:
List of Document objects
"""
return self._documents