This commit is contained in:
325
.code_doc_cli/parsers/python_parser.py
Normal file
325
.code_doc_cli/parsers/python_parser.py
Normal file
@@ -0,0 +1,325 @@
|
|||||||
|
"""Python parser using AST and regex patterns."""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import re
|
||||||
|
from typing import Optional, List
|
||||||
|
from .base import Parser, DocElement, ElementType, Parameter
|
||||||
|
|
||||||
|
|
||||||
|
class PythonParser(Parser):
|
||||||
|
"""Parser for Python source files."""
|
||||||
|
|
||||||
|
EXTENSIONS = [".py", ".pyw"]
|
||||||
|
|
||||||
|
def __init__(self, file_path: str):
|
||||||
|
super().__init__(file_path)
|
||||||
|
self.tree: Optional[ast.AST] = None
|
||||||
|
|
||||||
|
def get_language_name(self) -> str:
|
||||||
|
return "python"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_file(cls, file_path: str) -> bool:
|
||||||
|
ext = cls._get_extension(file_path)
|
||||||
|
return ext in cls.EXTENSIONS
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_extension(file_path: str) -> str:
|
||||||
|
import os
|
||||||
|
return os.path.splitext(file_path)[1].lower()
|
||||||
|
|
||||||
|
def parse(self) -> list[DocElement]:
|
||||||
|
"""Parse Python file and extract documentation elements."""
|
||||||
|
try:
|
||||||
|
self.content = self._read_content()
|
||||||
|
self.tree = ast.parse(self.content)
|
||||||
|
self.elements = []
|
||||||
|
|
||||||
|
module_docstring = ast.get_docstring(self.tree)
|
||||||
|
if module_docstring:
|
||||||
|
module_elem = DocElement(
|
||||||
|
name=self._get_module_name(),
|
||||||
|
element_type=ElementType.MODULE,
|
||||||
|
description=module_docstring,
|
||||||
|
full_docstring=module_docstring,
|
||||||
|
source_file=self.file_path,
|
||||||
|
)
|
||||||
|
self.elements.append(module_elem)
|
||||||
|
|
||||||
|
for node in ast.iter_child_nodes(self.tree):
|
||||||
|
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||||
|
self._parse_function(node)
|
||||||
|
elif isinstance(node, ast.ClassDef):
|
||||||
|
self._parse_class(node)
|
||||||
|
|
||||||
|
return self.elements
|
||||||
|
|
||||||
|
except SyntaxError as e:
|
||||||
|
raise ValueError(f"Syntax error in Python file: {e}")
|
||||||
|
|
||||||
|
def _get_module_name(self) -> str:
|
||||||
|
"""Extract module name from file path."""
|
||||||
|
import os
|
||||||
|
base = os.path.basename(self.file_path)
|
||||||
|
return os.path.splitext(base)[0]
|
||||||
|
|
||||||
|
def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
||||||
|
"""Parse a function definition."""
|
||||||
|
docstring = ast.get_docstring(node) or ""
|
||||||
|
parameters = self._extract_parameters(node.args)
|
||||||
|
returns = self._extract_return_type(node.returns)
|
||||||
|
|
||||||
|
elem = DocElement(
|
||||||
|
name=node.name,
|
||||||
|
element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD,
|
||||||
|
description=self._extract_summary(docstring),
|
||||||
|
full_docstring=docstring,
|
||||||
|
parameters=parameters,
|
||||||
|
return_type=returns,
|
||||||
|
return_description=self._extract_return_description(docstring),
|
||||||
|
raises=self._extract_raises(docstring),
|
||||||
|
examples=self._extract_examples(docstring),
|
||||||
|
source_file=self.file_path,
|
||||||
|
line_number=node.lineno,
|
||||||
|
visibility=self._get_visibility(node.name),
|
||||||
|
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||||
|
)
|
||||||
|
self.elements.append(elem)
|
||||||
|
|
||||||
|
def _parse_class(self, node: ast.ClassDef) -> None:
|
||||||
|
"""Parse a class definition."""
|
||||||
|
docstring = ast.get_docstring(node) or ""
|
||||||
|
bases = [self._get_base_name(base) for base in node.bases]
|
||||||
|
|
||||||
|
attributes = self._extract_class_attributes(node)
|
||||||
|
|
||||||
|
methods = []
|
||||||
|
for item in node.body:
|
||||||
|
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||||
|
methods.append(item)
|
||||||
|
|
||||||
|
elem = DocElement(
|
||||||
|
name=node.name,
|
||||||
|
element_type=ElementType.CLASS,
|
||||||
|
description=self._extract_summary(docstring),
|
||||||
|
full_docstring=docstring,
|
||||||
|
attributes=attributes,
|
||||||
|
parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [],
|
||||||
|
source_file=self.file_path,
|
||||||
|
line_number=node.lineno,
|
||||||
|
visibility=self._get_visibility(node.name),
|
||||||
|
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||||
|
)
|
||||||
|
|
||||||
|
for method in methods:
|
||||||
|
method_elem = self._parse_method(method, node.name)
|
||||||
|
self.elements.append(method_elem)
|
||||||
|
|
||||||
|
self.elements.append(elem)
|
||||||
|
|
||||||
|
def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement:
|
||||||
|
"""Parse a method within a class."""
|
||||||
|
docstring = ast.get_docstring(node) or ""
|
||||||
|
parameters = self._extract_parameters(node.args, skip_first=True)
|
||||||
|
returns = self._extract_return_type(node.returns)
|
||||||
|
|
||||||
|
return DocElement(
|
||||||
|
name=f"{class_name}.{node.name}",
|
||||||
|
element_type=ElementType.METHOD,
|
||||||
|
description=self._extract_summary(docstring),
|
||||||
|
full_docstring=docstring,
|
||||||
|
parameters=parameters,
|
||||||
|
return_type=returns,
|
||||||
|
return_description=self._extract_return_description(docstring),
|
||||||
|
raises=self._extract_raises(docstring),
|
||||||
|
examples=self._extract_examples(docstring),
|
||||||
|
source_file=self.file_path,
|
||||||
|
line_number=node.lineno,
|
||||||
|
visibility=self._get_visibility(node.name),
|
||||||
|
decorators=[self._format_decorator(d) for d in node.decorator_list],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]:
|
||||||
|
"""Extract function parameters from AST arguments."""
|
||||||
|
params = []
|
||||||
|
|
||||||
|
args_list = args.args
|
||||||
|
if skip_first and args_list:
|
||||||
|
args_list = args_list[1:]
|
||||||
|
|
||||||
|
for arg in args_list:
|
||||||
|
param = Parameter(
|
||||||
|
name=arg.arg,
|
||||||
|
type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None,
|
||||||
|
default_value=self._get_default_value(args, arg.arg),
|
||||||
|
)
|
||||||
|
params.append(param)
|
||||||
|
|
||||||
|
if args.vararg:
|
||||||
|
params.append(Parameter(
|
||||||
|
name=f"*{args.vararg.arg}",
|
||||||
|
type_hint="*args",
|
||||||
|
))
|
||||||
|
|
||||||
|
if args.kwarg:
|
||||||
|
params.append(Parameter(
|
||||||
|
name=f"**{args.kwarg.arg}",
|
||||||
|
type_hint="**kwargs",
|
||||||
|
))
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _get_type_hint(self, annotation: ast.AST) -> Optional[str]:
|
||||||
|
"""Get type hint as string."""
|
||||||
|
if annotation is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return ast.unparse(annotation)
|
||||||
|
except Exception:
|
||||||
|
return "Any"
|
||||||
|
|
||||||
|
def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]:
|
||||||
|
"""Get default value for a parameter."""
|
||||||
|
defaults = list(args.defaults)
|
||||||
|
num_defaults = len(defaults)
|
||||||
|
num_args = len(args.args)
|
||||||
|
|
||||||
|
if num_defaults > 0:
|
||||||
|
start_idx = num_args - num_defaults
|
||||||
|
for i, arg in enumerate(args.args):
|
||||||
|
if arg.arg == arg_name:
|
||||||
|
idx = start_idx + i
|
||||||
|
if idx < len(defaults):
|
||||||
|
try:
|
||||||
|
return ast.unparse(defaults[idx - start_idx])
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]:
|
||||||
|
"""Extract return type from AST."""
|
||||||
|
if returns is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return ast.unparse(returns)
|
||||||
|
except Exception:
|
||||||
|
return "Any"
|
||||||
|
|
||||||
|
def _extract_summary(self, docstring: str) -> str:
|
||||||
|
"""Extract first line or paragraph as summary."""
|
||||||
|
if not docstring:
|
||||||
|
return ""
|
||||||
|
lines = docstring.strip().split("\n")
|
||||||
|
if not lines:
|
||||||
|
return ""
|
||||||
|
summary = lines[0].strip()
|
||||||
|
if len(lines) > 1 and not lines[1].strip():
|
||||||
|
for i in range(1, len(lines)):
|
||||||
|
if lines[i].strip():
|
||||||
|
summary = lines[i].strip()
|
||||||
|
break
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def _extract_return_description(self, docstring: str) -> Optional[str]:
|
||||||
|
"""Extract return description from docstring."""
|
||||||
|
patterns = [
|
||||||
|
r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
|
||||||
|
r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
desc = match.group(1).strip()
|
||||||
|
lines = desc.split("\n")
|
||||||
|
result = []
|
||||||
|
for line in lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped and not stripped.startswith("-"):
|
||||||
|
result.append(stripped)
|
||||||
|
elif stripped.startswith("-"):
|
||||||
|
break
|
||||||
|
return "\n".join(result) if result else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_raises(self, docstring: str) -> list[tuple[str, str]]:
|
||||||
|
"""Extract raises information from docstring."""
|
||||||
|
raises = []
|
||||||
|
pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)"
|
||||||
|
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
content = match.group(1)
|
||||||
|
lines = content.split("\n")
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("-"):
|
||||||
|
parts = line[1:].split(":", 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
exc_type = parts[0].strip()
|
||||||
|
exc_desc = parts[1].strip()
|
||||||
|
raises.append((exc_type, exc_desc))
|
||||||
|
return raises
|
||||||
|
|
||||||
|
def _extract_examples(self, docstring: str) -> list[str]:
|
||||||
|
"""Extract examples from docstring."""
|
||||||
|
examples = []
|
||||||
|
pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)"
|
||||||
|
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
content = match.group(1).strip()
|
||||||
|
examples.append(content)
|
||||||
|
return examples
|
||||||
|
|
||||||
|
def _get_visibility(self, name: str) -> str:
|
||||||
|
"""Determine visibility based on name."""
|
||||||
|
if name.startswith("_"):
|
||||||
|
if name.startswith("__"):
|
||||||
|
return "dunder"
|
||||||
|
return "private"
|
||||||
|
return "public"
|
||||||
|
|
||||||
|
def _format_decorator(self, decorator: ast.AST) -> str:
|
||||||
|
"""Format decorator as string."""
|
||||||
|
if isinstance(decorator, ast.Name):
|
||||||
|
return f"@{decorator.id}"
|
||||||
|
elif isinstance(decorator, ast.Attribute):
|
||||||
|
return f"@{ast.unparse(decorator)}"
|
||||||
|
return f"@{ast.unparse(decorator)}"
|
||||||
|
|
||||||
|
def _get_base_name(self, base: ast.AST) -> str:
|
||||||
|
"""Get base class name."""
|
||||||
|
if isinstance(base, ast.Name):
|
||||||
|
return base.id
|
||||||
|
return ast.unparse(base)
|
||||||
|
|
||||||
|
def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]:
|
||||||
|
"""Extract class attributes from docstring."""
|
||||||
|
attributes = []
|
||||||
|
docstring = ast.get_docstring(node) or ""
|
||||||
|
|
||||||
|
patterns = [
|
||||||
|
r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
content = match.group(1)
|
||||||
|
lines = content.split("\n")
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("-"):
|
||||||
|
parts = line[1:].split(":", 1)
|
||||||
|
if len(parts) >= 1:
|
||||||
|
attr_name = parts[0].strip()
|
||||||
|
attr_type = None
|
||||||
|
attr_desc = None
|
||||||
|
if len(parts) > 1:
|
||||||
|
rest = parts[1].strip()
|
||||||
|
if " " in rest:
|
||||||
|
attr_type = rest.split(" ", 1)[0]
|
||||||
|
attr_desc = rest.split(" ", 1)[1]
|
||||||
|
else:
|
||||||
|
attr_type = rest
|
||||||
|
attributes.append((attr_name, attr_type, attr_desc))
|
||||||
|
break
|
||||||
|
|
||||||
|
return attributes
|
||||||
Reference in New Issue
Block a user