From e8e939cff96d2f3450dd5f4afc4d1081ffed6f62 Mon Sep 17 00:00:00 2001
From: 7000pctAUTO <lukems3823@gmail.com>
Date: Thu, 29 Jan 2026 16:51:53 +0000
Subject: [PATCH] Add generators and parsers modules

---
 .code_doc_cli/parsers/python_parser.py | 325 +++++++++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 .code_doc_cli/parsers/python_parser.py

diff --git a/.code_doc_cli/parsers/python_parser.py b/.code_doc_cli/parsers/python_parser.py
new file mode 100644
index 0000000..349f240
--- /dev/null
+++ b/.code_doc_cli/parsers/python_parser.py
@@ -0,0 +1,325 @@
+"""Python parser using AST and regex patterns."""
+
+import ast
+import re
+from typing import Optional, List
+from .base import Parser, DocElement, ElementType, Parameter
+
+
+class PythonParser(Parser):
+    """Parser for Python source files."""
+
+    EXTENSIONS = [".py", ".pyw"]
+
+    def __init__(self, file_path: str):
+        super().__init__(file_path)
+        self.tree: Optional[ast.AST] = None
+
+    def get_language_name(self) -> str:
+        return "python"
+
+    @classmethod
+    def supports_file(cls, file_path: str) -> bool:
+        ext = cls._get_extension(file_path)
+        return ext in cls.EXTENSIONS
+
+    @staticmethod
+    def _get_extension(file_path: str) -> str:
+        import os
+        return os.path.splitext(file_path)[1].lower()
+
+    def parse(self) -> list[DocElement]:
+        """Parse Python file and extract documentation elements."""
+        try:
+            self.content = self._read_content()
+            self.tree = ast.parse(self.content)
+            self.elements = []
+
+            module_docstring = ast.get_docstring(self.tree)
+            if module_docstring:
+                module_elem = DocElement(
+                    name=self._get_module_name(),
+                    element_type=ElementType.MODULE,
+                    description=module_docstring,
+                    full_docstring=module_docstring,
+                    source_file=self.file_path,
+                )
+                self.elements.append(module_elem)
+
+            for node in ast.iter_child_nodes(self.tree):
+                if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
+                    self._parse_function(node)
+                elif isinstance(node, ast.ClassDef):
+                    self._parse_class(node)
+
+            return self.elements
+
+        except SyntaxError as e:
+            raise ValueError(f"Syntax error in Python file: {e}")
+
+    def _get_module_name(self) -> str:
+        """Extract module name from file path."""
+        import os
+        base = os.path.basename(self.file_path)
+        return os.path.splitext(base)[0]
+
+    def _parse_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
+        """Parse a function definition."""
+        docstring = ast.get_docstring(node) or ""
+        parameters = self._extract_parameters(node.args)
+        returns = self._extract_return_type(node.returns)
+
+        elem = DocElement(
+            name=node.name,
+            element_type=ElementType.FUNCTION if node.col_offset == 0 else ElementType.METHOD,
+            description=self._extract_summary(docstring),
+            full_docstring=docstring,
+            parameters=parameters,
+            return_type=returns,
+            return_description=self._extract_return_description(docstring),
+            raises=self._extract_raises(docstring),
+            examples=self._extract_examples(docstring),
+            source_file=self.file_path,
+            line_number=node.lineno,
+            visibility=self._get_visibility(node.name),
+            decorators=[self._format_decorator(d) for d in node.decorator_list],
+        )
+        self.elements.append(elem)
+
+    def _parse_class(self, node: ast.ClassDef) -> None:
+        """Parse a class definition."""
+        docstring = ast.get_docstring(node) or ""
+        bases = [self._get_base_name(base) for base in node.bases]
+
+        attributes = self._extract_class_attributes(node)
+
+        methods = []
+        for item in node.body:
+            if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                methods.append(item)
+
+        elem = DocElement(
+            name=node.name,
+            element_type=ElementType.CLASS,
+            description=self._extract_summary(docstring),
+            full_docstring=docstring,
+            attributes=attributes,
+            parameters=[Parameter(name=b, description=f"Base class: {b}") for b in bases] if bases else [],
+            source_file=self.file_path,
+            line_number=node.lineno,
+            visibility=self._get_visibility(node.name),
+            decorators=[self._format_decorator(d) for d in node.decorator_list],
+        )
+
+        for method in methods:
+            method_elem = self._parse_method(method, node.name)
+            self.elements.append(method_elem)
+
+        self.elements.append(elem)
+
+    def _parse_method(self, node: ast.FunctionDef | ast.AsyncFunctionDef, class_name: str) -> DocElement:
+        """Parse a method within a class."""
+        docstring = ast.get_docstring(node) or ""
+        parameters = self._extract_parameters(node.args, skip_first=True)
+        returns = self._extract_return_type(node.returns)
+
+        return DocElement(
+            name=f"{class_name}.{node.name}",
+            element_type=ElementType.METHOD,
+            description=self._extract_summary(docstring),
+            full_docstring=docstring,
+            parameters=parameters,
+            return_type=returns,
+            return_description=self._extract_return_description(docstring),
+            raises=self._extract_raises(docstring),
+            examples=self._extract_examples(docstring),
+            source_file=self.file_path,
+            line_number=node.lineno,
+            visibility=self._get_visibility(node.name),
+            decorators=[self._format_decorator(d) for d in node.decorator_list],
+        )
+
+    def _extract_parameters(self, args: ast.arguments, skip_first: bool = False) -> list[Parameter]:
+        """Extract function parameters from AST arguments."""
+        params = []
+
+        args_list = args.args
+        if skip_first and args_list:
+            args_list = args_list[1:]
+
+        for arg in args_list:
+            param = Parameter(
+                name=arg.arg,
+                type_hint=self._get_type_hint(arg.annotation) if arg.annotation else None,
+                default_value=self._get_default_value(args, arg.arg),
+            )
+            params.append(param)
+
+        if args.vararg:
+            params.append(Parameter(
+                name=f"*{args.vararg.arg}",
+                type_hint="*args",
+            ))
+
+        if args.kwarg:
+            params.append(Parameter(
+                name=f"**{args.kwarg.arg}",
+                type_hint="**kwargs",
+            ))
+
+        return params
+
+    def _get_type_hint(self, annotation: ast.AST) -> Optional[str]:
+        """Get type hint as string."""
+        if annotation is None:
+            return None
+        try:
+            return ast.unparse(annotation)
+        except Exception:
+            return "Any"
+
+    def _get_default_value(self, args: ast.arguments, arg_name: str) -> Optional[str]:
+        """Get default value for a parameter."""
+        defaults = list(args.defaults)
+        num_defaults = len(defaults)
+        num_args = len(args.args)
+
+        if num_defaults > 0:
+            start_idx = num_args - num_defaults
+            for i, arg in enumerate(args.args):
+                if arg.arg == arg_name:
+                    idx = start_idx + i
+                    if idx < len(defaults):
+                        try:
+                            return ast.unparse(defaults[idx - start_idx])
+                        except Exception:
+                            return None
+        return None
+
+    def _extract_return_type(self, returns: ast.AST | None) -> Optional[str]:
+        """Extract return type from AST."""
+        if returns is None:
+            return None
+        try:
+            return ast.unparse(returns)
+        except Exception:
+            return "Any"
+
+    def _extract_summary(self, docstring: str) -> str:
+        """Extract first line or paragraph as summary."""
+        if not docstring:
+            return ""
+        lines = docstring.strip().split("\n")
+        if not lines:
+            return ""
+        summary = lines[0].strip()
+        if len(lines) > 1 and not lines[1].strip():
+            for i in range(1, len(lines)):
+                if lines[i].strip():
+                    summary = lines[i].strip()
+                    break
+        return summary
+
+    def _extract_return_description(self, docstring: str) -> Optional[str]:
+        """Extract return description from docstring."""
+        patterns = [
+            r"(?:^|\n)\s*Returns?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
+            r"(?:^|\n)\s*Returns?\s+(.+?)(?:\n\n|$)",
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
+            if match:
+                desc = match.group(1).strip()
+                lines = desc.split("\n")
+                result = []
+                for line in lines:
+                    stripped = line.strip()
+                    if stripped and not stripped.startswith("-"):
+                        result.append(stripped)
+                    elif stripped.startswith("-"):
+                        break
+                return "\n".join(result) if result else None
+        return None
+
+    def _extract_raises(self, docstring: str) -> list[tuple[str, str]]:
+        """Extract raises information from docstring."""
+        raises = []
+        pattern = r"(?:^|\n)\s*Raises?\s*:\s*(.+?)(?:\n\n|$)"
+        match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
+        if match:
+            content = match.group(1)
+            lines = content.split("\n")
+            for line in lines:
+                line = line.strip()
+                if line.startswith("-"):
+                    parts = line[1:].split(":", 1)
+                    if len(parts) == 2:
+                        exc_type = parts[0].strip()
+                        exc_desc = parts[1].strip()
+                        raises.append((exc_type, exc_desc))
+        return raises
+
+    def _extract_examples(self, docstring: str) -> list[str]:
+        """Extract examples from docstring."""
+        examples = []
+        pattern = r"(?:^|\n)\s*Example[s]?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)"
+        match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
+        if match:
+            content = match.group(1).strip()
+            examples.append(content)
+        return examples
+
+    def _get_visibility(self, name: str) -> str:
+        """Determine visibility based on name."""
+        if name.startswith("_"):
+            if name.startswith("__"):
+                return "dunder"
+            return "private"
+        return "public"
+
+    def _format_decorator(self, decorator: ast.AST) -> str:
+        """Format decorator as string."""
+        if isinstance(decorator, ast.Name):
+            return f"@{decorator.id}"
+        elif isinstance(decorator, ast.Attribute):
+            return f"@{ast.unparse(decorator)}"
+        return f"@{ast.unparse(decorator)}"
+
+    def _get_base_name(self, base: ast.AST) -> str:
+        """Get base class name."""
+        if isinstance(base, ast.Name):
+            return base.id
+        return ast.unparse(base)
+
+    def _extract_class_attributes(self, node: ast.ClassDef) -> list[tuple[str, Optional[str], Optional[str]]]:
+        """Extract class attributes from docstring."""
+        attributes = []
+        docstring = ast.get_docstring(node) or ""
+
+        patterns = [
+            r"(?:^|\n)\s*Attributes?\s*:\s*(.+?)(?:\n\s*[-=]+\s*|\n\n|$)",
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
+            if match:
+                content = match.group(1)
+                lines = content.split("\n")
+                for line in lines:
+                    line = line.strip()
+                    if line.startswith("-"):
+                        parts = line[1:].split(":", 1)
+                        if len(parts) >= 1:
+                            attr_name = parts[0].strip()
+                            attr_type = None
+                            attr_desc = None
+                            if len(parts) > 1:
+                                rest = parts[1].strip()
+                                if " " in rest:
+                                    attr_type = rest.split(" ", 1)[0]
+                                    attr_desc = rest.split(" ", 1)[1]
+                                else:
+                                    attr_type = rest
+                            attributes.append((attr_name, attr_type, attr_desc))
+                break
+
+        return attributes