Add generators and parsers modules
Some checks failed
CI / test (push) Failing after 6s

This commit is contained in:
2026-01-29 16:51:53 +00:00
parent 3191bd2f9a
commit e8e939cff9

View File

@@ -0,0 +1,325 @@
"""Python parser using AST and regex patterns."""
import ast
import re
from typing import Optional, List
from .base import Parser, DocElement, ElementType, Parameter
class PythonParser(Parser):
"""Parser for Python source files."""
EXTENSIONS = [".py", ".pyw"]
def __init__(self, file_path: str):
super().__init__(file_path)
self.tree: Optional[ast.AST] = None
def get_language_name(self) -> str:
return "python"
@classmethod
def supports_file(cls, file_path: str) -> bool:
ext = cls._get_extension(file_path)
return ext in cls.EXTENSIONS
@staticmethod
def _get_extension(file_path: str) -> str:
import os
return os.path.splitext(file_path)[1].lower()
def parse(self) -> list[DocElement]:
"""Parse Python file and extract documentation elements."""
try:
self.content = self._read_content()
self.tree = ast.parse(self.content)
self.elements = []
module_docstring = ast.get_docstring(self.tree)
if module_docstring:
module_elem = DocElement(
name=self._get_module_name(),
element_type=ElementType.MODULE,
description=module_docstring,
full_docstring=module_docstring,
source_file=self.file_path,
)
self.elements.append(module_elem)
for node in ast.iter_child_nodes(self.tree):
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
self._parse_function(node)
elif isinstance(node, ast.ClassDef):
self._parse_class(node)
return self.elements
except SyntaxError as e:
raise ValueError(f"Syntax error in Python file: {e}")
def _get_module_name(self) -> str:
"""Extract module name from file path."""
import os
base = os.path.basename(self.file_path)
return os.path.splitext(base)[0]
def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
"""Parse a function definition."""
docstring = ast.get_docstring(node) or ""
parameters = self._extract_parameters(node.args)
returns = self._extract_return_type(node.returns)
elem = DocElement(
name=node.name,
element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD,
description=self._extract_summary(docstring),
full_docstring=docstring,
parameters=parameters,
return_type=returns,
return_description=self._extract_return_description(docstring),
raises=self._extract_raises(docstring),
examples=self._extract_examples(docstring),
source_file=self.file_path,
line_number=node.lineno,
visibility=self._get_visibility(node.name),
decorators=[self._format_decorator(d) for d in node.decorator_list],
)
self.elements.append(elem)
def _parse_class(self, node: ast.ClassDef) -> None:
"""Parse a class definition."""
docstring = ast.get_docstring(node) or ""
bases = [self._get_base_name(base) for base in node.bases]
attributes = self._extract_class_attributes(node)
methods = []
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
methods.append(item)
elem = DocElement(
name=node.name,
element_type=ElementType.CLASS,
description=self._extract_summary(docstring),
full_docstring=docstring,
attributes=attributes,
parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [],
source_file=self.file_path,
line_number=node.lineno,
visibility=self._get_visibility(node.name),
decorators=[self._format_decorator(d) for d in node.decorator_list],
)
for method in methods:
method_elem = self._parse_method(method, node.name)
self.elements.append(method_elem)
self.elements.append(elem)
def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement:
"""Parse a method within a class."""
docstring = ast.get_docstring(node) or ""
parameters = self._extract_parameters(node.args, skip_first=True)
returns = self._extract_return_type(node.returns)
return DocElement(
name=f"{class_name}.{node.name}",
element_type=ElementType.METHOD,
description=self._extract_summary(docstring),
full_docstring=docstring,
parameters=parameters,
return_type=returns,
return_description=self._extract_return_description(docstring),
raises=self._extract_raises(docstring),
examples=self._extract_examples(docstring),
source_file=self.file_path,
line_number=node.lineno,
visibility=self._get_visibility(node.name),
decorators=[self._format_decorator(d) for d in node.decorator_list],
)
def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]:
"""Extract function parameters from AST arguments."""
params = []
args_list = args.args
if skip_first and args_list:
args_list = args_list[1:]
for arg in args_list:
param = Parameter(
name=arg.arg,
type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None,
default_value=self._get_default_value(args, arg.arg),
)
params.append(param)
if args.vararg:
params.append(Parameter(
name=f"*{args.vararg.arg}",
type_hint="*args",
))
if args.kwarg:
params.append(Parameter(
name=f"**{args.kwarg.arg}",
type_hint="**kwargs",
))
return params
def _get_type_hint(self, annotation: ast.AST) -> Optional[str]:
"""Get type hint as string."""
if annotation is None:
return None
try:
return ast.unparse(annotation)
except Exception:
return "Any"
def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]:
"""Get default value for a parameter."""
defaults = list(args.defaults)
num_defaults = len(defaults)
num_args = len(args.args)
if num_defaults > 0:
start_idx = num_args - num_defaults
for i, arg in enumerate(args.args):
if arg.arg == arg_name:
idx = start_idx + i
if idx < len(defaults):
try:
return ast.unparse(defaults[idx - start_idx])
except Exception:
return None
return None
def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]:
"""Extract return type from AST."""
if returns is None:
return None
try:
return ast.unparse(returns)
except Exception:
return "Any"
def _extract_summary(self, docstring: str) -> str:
"""Extract first line or paragraph as summary."""
if not docstring:
return ""
lines = docstring.strip().split("\n")
if not lines:
return ""
summary = lines[0].strip()
if len(lines) > 1 and not lines[1].strip():
for i in range(1, len(lines)):
if lines[i].strip():
summary = lines[i].strip()
break
return summary
def _extract_return_description(self, docstring: str) -> Optional[str]:
"""Extract return description from docstring."""
patterns = [
r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)",
]
for pattern in patterns:
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
desc = match.group(1).strip()
lines = desc.split("\n")
result = []
for line in lines:
stripped = line.strip()
if stripped and not stripped.startswith("-"):
result.append(stripped)
elif stripped.startswith("-"):
break
return "\n".join(result) if result else None
return None
def _extract_raises(self, docstring: str) -> list[tuple[str, str]]:
"""Extract raises information from docstring."""
raises = []
pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)"
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1)
lines = content.split("\n")
for line in lines:
line = line.strip()
if line.startswith("-"):
parts = line[1:].split(":", 1)
if len(parts) == 2:
exc_type = parts[0].strip()
exc_desc = parts[1].strip()
raises.append((exc_type, exc_desc))
return raises
def _extract_examples(self, docstring: str) -> list[str]:
"""Extract examples from docstring."""
examples = []
pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)"
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1).strip()
examples.append(content)
return examples
def _get_visibility(self, name: str) -> str:
"""Determine visibility based on name."""
if name.startswith("_"):
if name.startswith("__"):
return "dunder"
return "private"
return "public"
def _format_decorator(self, decorator: ast.AST) -> str:
"""Format decorator as string."""
if isinstance(decorator, ast.Name):
return f"@{decorator.id}"
elif isinstance(decorator, ast.Attribute):
return f"@{ast.unparse(decorator)}"
return f"@{ast.unparse(decorator)}"
def _get_base_name(self, base: ast.AST) -> str:
"""Get base class name."""
if isinstance(base, ast.Name):
return base.id
return ast.unparse(base)
def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]:
"""Extract class attributes from docstring."""
attributes = []
docstring = ast.get_docstring(node) or ""
patterns = [
r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
]
for pattern in patterns:
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1)
lines = content.split("\n")
for line in lines:
line = line.strip()
if line.startswith("-"):
parts = line[1:].split(":", 1)
if len(parts) >= 1:
attr_name = parts[0].strip()
attr_type = None
attr_desc = None
if len(parts) > 1:
rest = parts[1].strip()
if " " in rest:
attr_type = rest.split(" ", 1)[0]
attr_desc = rest.split(" ", 1)[1]
else:
attr_type = rest
attributes.append((attr_name, attr_type, attr_desc))
break
return attributes