"""Python parser using AST and regex patterns.""" import ast import re from typing import Optional, List from .base import Parser, DocElement, ElementType, Parameter class PythonParser(Parser): """Parser for Python source files.""" EXTENSIONS = [".py", ".pyw"] def __init__(self, file_path: str): super().__init__(file_path) self.tree: Optional[ast.AST] = None def get_language_name(self) -> str: return "python" @classmethod def supports_file(cls, file_path: str) -> bool: ext = cls._get_extension(file_path) return ext in cls.EXTENSIONS @staticmethod def _get_extension(file_path: str) -> str: import os return os.path.splitext(file_path)[1].lower() def parse(self) -> list[DocElement]: """Parse Python file and extract documentation elements.""" try: self.content = self._read_content() self.tree = ast.parse(self.content) self.elements = [] module_docstring = ast.get_docstring(self.tree) if module_docstring: module_elem = DocElement( name=self._get_module_name(), element_type=ElementType.MODULE, description=module_docstring, full_docstring=module_docstring, source_file=self.file_path, ) self.elements.append(module_elem) for node in ast.iter_child_nodes(self.tree): if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef): self._parse_function(node) elif isinstance(node, ast.ClassDef): self._parse_class(node) return self.elements except SyntaxError as e: raise ValueError(f"Syntax error in Python file: {e}") def _get_module_name(self) -> str: """Extract module name from file path.""" import os base = os.path.basename(self.file_path) return os.path.splitext(base)[0] def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: """Parse a function definition.""" docstring = ast.get_docstring(node) or "" parameters = self._extract_parameters(node.args) returns = self._extract_return_type(node.returns) elem = DocElement( name=node.name, element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD, description=self._extract_summary(docstring), full_docstring=docstring, parameters=parameters, return_type=returns, return_description=self._extract_return_description(docstring), raises=self._extract_raises(docstring), examples=self._extract_examples(docstring), source_file=self.file_path, line_number=node.lineno, visibility=self._get_visibility(node.name), decorators=[self._format_decorator(d) for d in node.decorator_list], ) self.elements.append(elem) def _parse_class(self, node: ast.ClassDef) -> None: """Parse a class definition.""" docstring = ast.get_docstring(node) or "" bases = [self._get_base_name(base) for base in node.bases] attributes = self._extract_class_attributes(node) methods = [] for item in node.body: if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): methods.append(item) elem = DocElement( name=node.name, element_type=ElementType.CLASS, description=self._extract_summary(docstring), full_docstring=docstring, attributes=attributes, parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [], source_file=self.file_path, line_number=node.lineno, visibility=self._get_visibility(node.name), decorators=[self._format_decorator(d) for d in node.decorator_list], ) for method in methods: method_elem = self._parse_method(method, node.name) self.elements.append(method_elem) self.elements.append(elem) def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement: """Parse a method within a class.""" docstring = ast.get_docstring(node) or "" parameters = self._extract_parameters(node.args, skip_first=True) returns = self._extract_return_type(node.returns) return DocElement( name=f"{class_name}.{node.name}", element_type=ElementType.METHOD, description=self._extract_summary(docstring), full_docstring=docstring, parameters=parameters, return_type=returns, return_description=self._extract_return_description(docstring), raises=self._extract_raises(docstring), examples=self._extract_examples(docstring), source_file=self.file_path, line_number=node.lineno, visibility=self._get_visibility(node.name), decorators=[self._format_decorator(d) for d in node.decorator_list], ) def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]: """Extract function parameters from AST arguments.""" params = [] args_list = args.args if skip_first and args_list: args_list = args_list[1:] for arg in args_list: param = Parameter( name=arg.arg, type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None, default_value=self._get_default_value(args, arg.arg), ) params.append(param) if args.vararg: params.append(Parameter( name=f"*{args.vararg.arg}", type_hint="*args", )) if args.kwarg: params.append(Parameter( name=f"**{args.kwarg.arg}", type_hint="**kwargs", )) return params def _get_type_hint(self, annotation: ast.AST) -> Optional[str]: """Get type hint as string.""" if annotation is None: return None try: return ast.unparse(annotation) except Exception: return "Any" def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]: """Get default value for a parameter.""" defaults = list(args.defaults) num_defaults = len(defaults) num_args = len(args.args) if num_defaults > 0: start_idx = num_args - num_defaults for i, arg in enumerate(args.args): if arg.arg == arg_name: idx = start_idx + i if idx < len(defaults): try: return ast.unparse(defaults[idx - start_idx]) except Exception: return None return None def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]: """Extract return type from AST.""" if returns is None: return None try: return ast.unparse(returns) except Exception: return "Any" def _extract_summary(self, docstring: str) -> str: """Extract first line or paragraph as summary.""" if not docstring: return "" lines = docstring.strip().split("\n") if not lines: return "" summary = lines[0].strip() if len(lines) > 1 and not lines[1].strip(): for i in range(1, len(lines)): if lines[i].strip(): summary = lines[i].strip() break return summary def _extract_return_description(self, docstring: str) -> Optional[str]: """Extract return description from docstring.""" patterns = [ r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)", r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)", ] for pattern in patterns: match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) if match: desc = match.group(1).strip() lines = desc.split("\n") result = [] for line in lines: stripped = line.strip() if stripped and not stripped.startswith("-"): result.append(stripped) elif stripped.startswith("-"): break return "\n".join(result) if result else None return None def _extract_raises(self, docstring: str) -> list[tuple[str, str]]: """Extract raises information from docstring.""" raises = [] pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)" match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) if match: content = match.group(1) lines = content.split("\n") for line in lines: line = line.strip() if line.startswith("-"): parts = line[1:].split(":", 1) if len(parts) == 2: exc_type = parts[0].strip() exc_desc = parts[1].strip() raises.append((exc_type, exc_desc)) return raises def _extract_examples(self, docstring: str) -> list[str]: """Extract examples from docstring.""" examples = [] pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)" match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) if match: content = match.group(1).strip() examples.append(content) return examples def _get_visibility(self, name: str) -> str: """Determine visibility based on name.""" if name.startswith("_"): if name.startswith("__"): return "dunder" return "private" return "public" def _format_decorator(self, decorator: ast.AST) -> str: """Format decorator as string.""" if isinstance(decorator, ast.Name): return f"@{decorator.id}" elif isinstance(decorator, ast.Attribute): return f"@{ast.unparse(decorator)}" return f"@{ast.unparse(decorator)}" def _get_base_name(self, base: ast.AST) -> str: """Get base class name.""" if isinstance(base, ast.Name): return base.id return ast.unparse(base) def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]: """Extract class attributes from docstring.""" attributes = [] docstring = ast.get_docstring(node) or "" patterns = [ r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)", ] for pattern in patterns: match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) if match: content = match.group(1) lines = content.split("\n") for line in lines: line = line.strip() if line.startswith("-"): parts = line[1:].split(":", 1) if len(parts) >= 1: attr_name = parts[0].strip() attr_type = None attr_desc = None if len(parts) > 1: rest = parts[1].strip() if " " in rest: attr_type = rest.split(" ", 1)[0] attr_desc = rest.split(" ", 1)[1] else: attr_type = rest attributes.append((attr_name, attr_type, attr_desc)) break return attributes