From e8e939cff96d2f3450dd5f4afc4d1081ffed6f62 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 16:51:53 +0000 Subject: [PATCH] Add generators and parsers modules --- .code_doc_cli/parsers/python_parser.py | 325 +++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 .code_doc_cli/parsers/python_parser.py diff --git a/.code_doc_cli/parsers/python_parser.py b/.code_doc_cli/parsers/python_parser.py new file mode 100644 index 0000000..349f240 --- /dev/null +++ b/.code_doc_cli/parsers/python_parser.py @@ -0,0 +1,325 @@ +"""Python parser using AST and regex patterns.""" + +import ast +import re +from typing import Optional, List +from .base import Parser, DocElement, ElementType, Parameter + + +class PythonParser(Parser): + """Parser for Python source files.""" + + EXTENSIONS = [".py", ".pyw"] + + def __init__(self, file_path: str): + super().__init__(file_path) + self.tree: Optional[ast.AST] = None + + def get_language_name(self) -> str: + return "python" + + @classmethod + def supports_file(cls, file_path: str) -> bool: + ext = cls._get_extension(file_path) + return ext in cls.EXTENSIONS + + @staticmethod + def _get_extension(file_path: str) -> str: + import os + return os.path.splitext(file_path)[1].lower() + + def parse(self) -> list[DocElement]: + """Parse Python file and extract documentation elements.""" + try: + self.content = self._read_content() + self.tree = ast.parse(self.content) + self.elements = [] + + module_docstring = ast.get_docstring(self.tree) + if module_docstring: + module_elem = DocElement( + name=self._get_module_name(), + element_type=ElementType.MODULE, + description=module_docstring, + full_docstring=module_docstring, + source_file=self.file_path, + ) + self.elements.append(module_elem) + + for node in ast.iter_child_nodes(self.tree): + if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef): + self._parse_function(node) + elif isinstance(node, ast.ClassDef): + self._parse_class(node) + + return self.elements + + except SyntaxError as e: + raise ValueError(f"Syntax error in Python file: {e}") + + def _get_module_name(self) -> str: + """Extract module name from file path.""" + import os + base = os.path.basename(self.file_path) + return os.path.splitext(base)[0] + + def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + """Parse a function definition.""" + docstring = ast.get_docstring(node) or "" + parameters = self._extract_parameters(node.args) + returns = self._extract_return_type(node.returns) + + elem = DocElement( + name=node.name, + element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD, + description=self._extract_summary(docstring), + full_docstring=docstring, + parameters=parameters, + return_type=returns, + return_description=self._extract_return_description(docstring), + raises=self._extract_raises(docstring), + examples=self._extract_examples(docstring), + source_file=self.file_path, + line_number=node.lineno, + visibility=self._get_visibility(node.name), + decorators=[self._format_decorator(d) for d in node.decorator_list], + ) + self.elements.append(elem) + + def _parse_class(self, node: ast.ClassDef) -> None: + """Parse a class definition.""" + docstring = ast.get_docstring(node) or "" + bases = [self._get_base_name(base) for base in node.bases] + + attributes = self._extract_class_attributes(node) + + methods = [] + for item in node.body: + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + methods.append(item) + + elem = DocElement( + name=node.name, + element_type=ElementType.CLASS, + description=self._extract_summary(docstring), + full_docstring=docstring, + attributes=attributes, + parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [], + source_file=self.file_path, + line_number=node.lineno, + visibility=self._get_visibility(node.name), + decorators=[self._format_decorator(d) for d in node.decorator_list], + ) + + for method in methods: + method_elem = self._parse_method(method, node.name) + self.elements.append(method_elem) + + self.elements.append(elem) + + def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement: + """Parse a method within a class.""" + docstring = ast.get_docstring(node) or "" + parameters = self._extract_parameters(node.args, skip_first=True) + returns = self._extract_return_type(node.returns) + + return DocElement( + name=f"{class_name}.{node.name}", + element_type=ElementType.METHOD, + description=self._extract_summary(docstring), + full_docstring=docstring, + parameters=parameters, + return_type=returns, + return_description=self._extract_return_description(docstring), + raises=self._extract_raises(docstring), + examples=self._extract_examples(docstring), + source_file=self.file_path, + line_number=node.lineno, + visibility=self._get_visibility(node.name), + decorators=[self._format_decorator(d) for d in node.decorator_list], + ) + + def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]: + """Extract function parameters from AST arguments.""" + params = [] + + args_list = args.args + if skip_first and args_list: + args_list = args_list[1:] + + for arg in args_list: + param = Parameter( + name=arg.arg, + type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None, + default_value=self._get_default_value(args, arg.arg), + ) + params.append(param) + + if args.vararg: + params.append(Parameter( + name=f"*{args.vararg.arg}", + type_hint="*args", + )) + + if args.kwarg: + params.append(Parameter( + name=f"**{args.kwarg.arg}", + type_hint="**kwargs", + )) + + return params + + def _get_type_hint(self, annotation: ast.AST) -> Optional[str]: + """Get type hint as string.""" + if annotation is None: + return None + try: + return ast.unparse(annotation) + except Exception: + return "Any" + + def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]: + """Get default value for a parameter.""" + defaults = list(args.defaults) + num_defaults = len(defaults) + num_args = len(args.args) + + if num_defaults > 0: + start_idx = num_args - num_defaults + for i, arg in enumerate(args.args): + if arg.arg == arg_name: + idx = start_idx + i + if idx < len(defaults): + try: + return ast.unparse(defaults[idx - start_idx]) + except Exception: + return None + return None + + def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]: + """Extract return type from AST.""" + if returns is None: + return None + try: + return ast.unparse(returns) + except Exception: + return "Any" + + def _extract_summary(self, docstring: str) -> str: + """Extract first line or paragraph as summary.""" + if not docstring: + return "" + lines = docstring.strip().split("\n") + if not lines: + return "" + summary = lines[0].strip() + if len(lines) > 1 and not lines[1].strip(): + for i in range(1, len(lines)): + if lines[i].strip(): + summary = lines[i].strip() + break + return summary + + def _extract_return_description(self, docstring: str) -> Optional[str]: + """Extract return description from docstring.""" + patterns = [ + r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)", + r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)", + ] + for pattern in patterns: + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + desc = match.group(1).strip() + lines = desc.split("\n") + result = [] + for line in lines: + stripped = line.strip() + if stripped and not stripped.startswith("-"): + result.append(stripped) + elif stripped.startswith("-"): + break + return "\n".join(result) if result else None + return None + + def _extract_raises(self, docstring: str) -> list[tuple[str, str]]: + """Extract raises information from docstring.""" + raises = [] + pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)" + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + content = match.group(1) + lines = content.split("\n") + for line in lines: + line = line.strip() + if line.startswith("-"): + parts = line[1:].split(":", 1) + if len(parts) == 2: + exc_type = parts[0].strip() + exc_desc = parts[1].strip() + raises.append((exc_type, exc_desc)) + return raises + + def _extract_examples(self, docstring: str) -> list[str]: + """Extract examples from docstring.""" + examples = [] + pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)" + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + content = match.group(1).strip() + examples.append(content) + return examples + + def _get_visibility(self, name: str) -> str: + """Determine visibility based on name.""" + if name.startswith("_"): + if name.startswith("__"): + return "dunder" + return "private" + return "public" + + def _format_decorator(self, decorator: ast.AST) -> str: + """Format decorator as string.""" + if isinstance(decorator, ast.Name): + return f"@{decorator.id}" + elif isinstance(decorator, ast.Attribute): + return f"@{ast.unparse(decorator)}" + return f"@{ast.unparse(decorator)}" + + def _get_base_name(self, base: ast.AST) -> str: + """Get base class name.""" + if isinstance(base, ast.Name): + return base.id + return ast.unparse(base) + + def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]: + """Extract class attributes from docstring.""" + attributes = [] + docstring = ast.get_docstring(node) or "" + + patterns = [ + r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)", + ] + for pattern in patterns: + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + content = match.group(1) + lines = content.split("\n") + for line in lines: + line = line.strip() + if line.startswith("-"): + parts = line[1:].split(":", 1) + if len(parts) >= 1: + attr_name = parts[0].strip() + attr_type = None + attr_desc = None + if len(parts) > 1: + rest = parts[1].strip() + if " " in rest: + attr_type = rest.split(" ", 1)[0] + attr_desc = rest.split(" ", 1)[1] + else: + attr_type = rest + attributes.append((attr_name, attr_type, attr_desc)) + break + + return attributes