This commit is contained in:
325
.code_doc_cli/parsers/python_parser.py
Normal file
325
.code_doc_cli/parsers/python_parser.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""Python parser using AST and regex patterns."""
|
||||
|
||||
import ast
|
||||
import re
|
||||
from typing import Optional, List
|
||||
from .base import Parser, DocElement, ElementType, Parameter
|
||||
|
||||
|
||||
class PythonParser(Parser):
|
||||
"""Parser for Python source files."""
|
||||
|
||||
EXTENSIONS = [".py", ".pyw"]
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
super().__init__(file_path)
|
||||
self.tree: Optional[ast.AST] = None
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "python"
|
||||
|
||||
@classmethod
|
||||
def supports_file(cls, file_path: str) -> bool:
|
||||
ext = cls._get_extension(file_path)
|
||||
return ext in cls.EXTENSIONS
|
||||
|
||||
@staticmethod
|
||||
def _get_extension(file_path: str) -> str:
|
||||
import os
|
||||
return os.path.splitext(file_path)[1].lower()
|
||||
|
||||
def parse(self) -> list[DocElement]:
|
||||
"""Parse Python file and extract documentation elements."""
|
||||
try:
|
||||
self.content = self._read_content()
|
||||
self.tree = ast.parse(self.content)
|
||||
self.elements = []
|
||||
|
||||
module_docstring = ast.get_docstring(self.tree)
|
||||
if module_docstring:
|
||||
module_elem = DocElement(
|
||||
name=self._get_module_name(),
|
||||
element_type=ElementType.MODULE,
|
||||
description=module_docstring,
|
||||
full_docstring=module_docstring,
|
||||
source_file=self.file_path,
|
||||
)
|
||||
self.elements.append(module_elem)
|
||||
|
||||
for node in ast.iter_child_nodes(self.tree):
|
||||
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||
self._parse_function(node)
|
||||
elif isinstance(node, ast.ClassDef):
|
||||
self._parse_class(node)
|
||||
|
||||
return self.elements
|
||||
|
||||
except SyntaxError as e:
|
||||
raise ValueError(f"Syntax error in Python file: {e}")
|
||||
|
||||
def _get_module_name(self) -> str:
|
||||
"""Extract module name from file path."""
|
||||
import os
|
||||
base = os.path.basename(self.file_path)
|
||||
return os.path.splitext(base)[0]
|
||||
|
||||
def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
||||
"""Parse a function definition."""
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
parameters = self._extract_parameters(node.args)
|
||||
returns = self._extract_return_type(node.returns)
|
||||
|
||||
elem = DocElement(
|
||||
name=node.name,
|
||||
element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
parameters=parameters,
|
||||
return_type=returns,
|
||||
return_description=self._extract_return_description(docstring),
|
||||
raises=self._extract_raises(docstring),
|
||||
examples=self._extract_examples(docstring),
|
||||
source_file=self.file_path,
|
||||
line_number=node.lineno,
|
||||
visibility=self._get_visibility(node.name),
|
||||
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_class(self, node: ast.ClassDef) -> None:
|
||||
"""Parse a class definition."""
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
bases = [self._get_base_name(base) for base in node.bases]
|
||||
|
||||
attributes = self._extract_class_attributes(node)
|
||||
|
||||
methods = []
|
||||
for item in node.body:
|
||||
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
methods.append(item)
|
||||
|
||||
elem = DocElement(
|
||||
name=node.name,
|
||||
element_type=ElementType.CLASS,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
attributes=attributes,
|
||||
parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [],
|
||||
source_file=self.file_path,
|
||||
line_number=node.lineno,
|
||||
visibility=self._get_visibility(node.name),
|
||||
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||
)
|
||||
|
||||
for method in methods:
|
||||
method_elem = self._parse_method(method, node.name)
|
||||
self.elements.append(method_elem)
|
||||
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement:
|
||||
"""Parse a method within a class."""
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
parameters = self._extract_parameters(node.args, skip_first=True)
|
||||
returns = self._extract_return_type(node.returns)
|
||||
|
||||
return DocElement(
|
||||
name=f"{class_name}.{node.name}",
|
||||
element_type=ElementType.METHOD,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
parameters=parameters,
|
||||
return_type=returns,
|
||||
return_description=self._extract_return_description(docstring),
|
||||
raises=self._extract_raises(docstring),
|
||||
examples=self._extract_examples(docstring),
|
||||
source_file=self.file_path,
|
||||
line_number=node.lineno,
|
||||
visibility=self._get_visibility(node.name),
|
||||
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||
)
|
||||
|
||||
def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]:
|
||||
"""Extract function parameters from AST arguments."""
|
||||
params = []
|
||||
|
||||
args_list = args.args
|
||||
if skip_first and args_list:
|
||||
args_list = args_list[1:]
|
||||
|
||||
for arg in args_list:
|
||||
param = Parameter(
|
||||
name=arg.arg,
|
||||
type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None,
|
||||
default_value=self._get_default_value(args, arg.arg),
|
||||
)
|
||||
params.append(param)
|
||||
|
||||
if args.vararg:
|
||||
params.append(Parameter(
|
||||
name=f"*{args.vararg.arg}",
|
||||
type_hint="*args",
|
||||
))
|
||||
|
||||
if args.kwarg:
|
||||
params.append(Parameter(
|
||||
name=f"**{args.kwarg.arg}",
|
||||
type_hint="**kwargs",
|
||||
))
|
||||
|
||||
return params
|
||||
|
||||
def _get_type_hint(self, annotation: ast.AST) -> Optional[str]:
|
||||
"""Get type hint as string."""
|
||||
if annotation is None:
|
||||
return None
|
||||
try:
|
||||
return ast.unparse(annotation)
|
||||
except Exception:
|
||||
return "Any"
|
||||
|
||||
def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]:
|
||||
"""Get default value for a parameter."""
|
||||
defaults = list(args.defaults)
|
||||
num_defaults = len(defaults)
|
||||
num_args = len(args.args)
|
||||
|
||||
if num_defaults > 0:
|
||||
start_idx = num_args - num_defaults
|
||||
for i, arg in enumerate(args.args):
|
||||
if arg.arg == arg_name:
|
||||
idx = start_idx + i
|
||||
if idx < len(defaults):
|
||||
try:
|
||||
return ast.unparse(defaults[idx - start_idx])
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]:
|
||||
"""Extract return type from AST."""
|
||||
if returns is None:
|
||||
return None
|
||||
try:
|
||||
return ast.unparse(returns)
|
||||
except Exception:
|
||||
return "Any"
|
||||
|
||||
def _extract_summary(self, docstring: str) -> str:
|
||||
"""Extract first line or paragraph as summary."""
|
||||
if not docstring:
|
||||
return ""
|
||||
lines = docstring.strip().split("\n")
|
||||
if not lines:
|
||||
return ""
|
||||
summary = lines[0].strip()
|
||||
if len(lines) > 1 and not lines[1].strip():
|
||||
for i in range(1, len(lines)):
|
||||
if lines[i].strip():
|
||||
summary = lines[i].strip()
|
||||
break
|
||||
return summary
|
||||
|
||||
def _extract_return_description(self, docstring: str) -> Optional[str]:
|
||||
"""Extract return description from docstring."""
|
||||
patterns = [
|
||||
r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
|
||||
r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)",
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
desc = match.group(1).strip()
|
||||
lines = desc.split("\n")
|
||||
result = []
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("-"):
|
||||
result.append(stripped)
|
||||
elif stripped.startswith("-"):
|
||||
break
|
||||
return "\n".join(result) if result else None
|
||||
return None
|
||||
|
||||
def _extract_raises(self, docstring: str) -> list[tuple[str, str]]:
|
||||
"""Extract raises information from docstring."""
|
||||
raises = []
|
||||
pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)"
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith("-"):
|
||||
parts = line[1:].split(":", 1)
|
||||
if len(parts) == 2:
|
||||
exc_type = parts[0].strip()
|
||||
exc_desc = parts[1].strip()
|
||||
raises.append((exc_type, exc_desc))
|
||||
return raises
|
||||
|
||||
def _extract_examples(self, docstring: str) -> list[str]:
|
||||
"""Extract examples from docstring."""
|
||||
examples = []
|
||||
pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)"
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
content = match.group(1).strip()
|
||||
examples.append(content)
|
||||
return examples
|
||||
|
||||
def _get_visibility(self, name: str) -> str:
|
||||
"""Determine visibility based on name."""
|
||||
if name.startswith("_"):
|
||||
if name.startswith("__"):
|
||||
return "dunder"
|
||||
return "private"
|
||||
return "public"
|
||||
|
||||
def _format_decorator(self, decorator: ast.AST) -> str:
|
||||
"""Format decorator as string."""
|
||||
if isinstance(decorator, ast.Name):
|
||||
return f"@{decorator.id}"
|
||||
elif isinstance(decorator, ast.Attribute):
|
||||
return f"@{ast.unparse(decorator)}"
|
||||
return f"@{ast.unparse(decorator)}"
|
||||
|
||||
def _get_base_name(self, base: ast.AST) -> str:
|
||||
"""Get base class name."""
|
||||
if isinstance(base, ast.Name):
|
||||
return base.id
|
||||
return ast.unparse(base)
|
||||
|
||||
def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]:
|
||||
"""Extract class attributes from docstring."""
|
||||
attributes = []
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
|
||||
patterns = [
|
||||
r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith("-"):
|
||||
parts = line[1:].split(":", 1)
|
||||
if len(parts) >= 1:
|
||||
attr_name = parts[0].strip()
|
||||
attr_type = None
|
||||
attr_desc = None
|
||||
if len(parts) > 1:
|
||||
rest = parts[1].strip()
|
||||
if " " in rest:
|
||||
attr_type = rest.split(" ", 1)[0]
|
||||
attr_desc = rest.split(" ", 1)[1]
|
||||
else:
|
||||
attr_type = rest
|
||||
attributes.append((attr_name, attr_type, attr_desc))
|
||||
break
|
||||
|
||||
return attributes
|
||||
Reference in New Issue
Block a user