diff --git a/doc2man/parsers/python.py b/doc2man/parsers/python.py new file mode 100644 index 0000000..24e5316 --- /dev/null +++ b/doc2man/parsers/python.py @@ -0,0 +1,364 @@ +"""Python docstring parser for Doc2Man.""" + +import ast +from pathlib import Path +from typing import Any, Dict, List, Optional + + +class PythonDocstringParser: + """Parser for Python docstrings supporting Google-style, NumPy-style, and ReST formats.""" + + def __init__(self): + """Initialize the parser.""" + pass + + def parse(self, source_code: str) -> Dict[str, Any]: + """Parse a Python source file and extract documentation. + + Args: + source_code: The Python source code to parse. + + Returns: + Dictionary containing parsed documentation. + """ + result = { + "title": None, + "description": None, + "module_docstring": None, + "functions": [], + "classes": [], + } + + try: + tree = ast.parse(source_code) + except SyntaxError: + return result + + module_docstring = ast.get_docstring(tree) + if module_docstring: + result["module_docstring"] = module_docstring + result["description"] = module_docstring + result["title"] = self._extract_title(module_docstring) + + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef): + func_doc = self._parse_function(node, source_code) + if func_doc: + result["functions"].append(func_doc) + + elif isinstance(node, ast.ClassDef): + class_doc = self._parse_class(node, source_code) + if class_doc: + result["classes"].append(class_doc) + + return result + + def _extract_title(self, docstring: str) -> Optional[str]: + """Extract the first line as title.""" + if docstring: + lines = docstring.strip().split("\n") + return lines[0].strip() if lines else None + return None + + def _parse_function(self, node: ast.FunctionDef, source_code: str) -> Optional[Dict[str, Any]]: + """Parse a function node and extract its documentation.""" + docstring = ast.get_docstring(node) + func_info = { + "name": node.name, + "description": None, + "args": [], + "returns": None, + "raises": [], + "examples": [], + "line_number": node.lineno, + } + + if docstring: + parsed = self._parse_docstring(docstring) + func_info["description"] = parsed.get("description") + func_info["args"] = parsed.get("args", []) + func_info["returns"] = parsed.get("returns") + func_info["raises"] = parsed.get("raises", []) + func_info["examples"] = parsed.get("examples", []) + + for arg in node.args.args: + arg_name = arg.arg + if arg_name not in [p.get("name") for p in func_info["args"]]: + func_info["args"].append({ + "name": arg_name, + "type": "", + "description": "" + }) + + return func_info + + def _parse_class(self, node: ast.ClassDef, source_code: str) -> Optional[Dict[str, Any]]: + """Parse a class node and extract its documentation.""" + docstring = ast.get_docstring(node) + class_info = { + "name": node.name, + "description": None, + "methods": [], + "line_number": node.lineno, + } + + if docstring: + class_info["description"] = docstring.strip().split("\n")[0] + + for item in node.body: + if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef): + if not item.name.startswith("_"): + method_doc = self._parse_function(item, source_code) + if method_doc: + class_info["methods"].append(method_doc) + + return class_info + + def _parse_docstring(self, docstring: str) -> Dict[str, Any]: + """Parse a docstring and extract sections.""" + result = { + "description": "", + "args": [], + "returns": None, + "raises": [], + "examples": [], + } + + if not docstring: + return result + + lines = docstring.strip().split("\n") + i = 0 + while i < len(lines): + line = lines[i].strip() + + if not line: + i += 1 + continue + + line_lower = line.lower() + + if line_lower.startswith(("args:", "arguments:", "parameters:")): + i += 1 + result["args"] = self._parse_args_section(lines, i) + while i < len(lines) and lines[i].strip(): + if lines[i].strip().lower() in ["returns:", "raises:", "examples:", "example:"]: + break + i += 1 + + elif line_lower.startswith("returns:"): + i += 1 + if i < len(lines): + returns_line = lines[i].strip() + result["returns"] = self._parse_returns(returns_line) + i += 1 + + elif line_lower.startswith("raises:"): + i += 1 + result["raises"] = self._parse_raises_section(lines, i) + while i < len(lines) and lines[i].strip(): + if lines[i].strip().lower() in ["examples:", "example:"]: + break + i += 1 + + elif line_lower.startswith(("example:", "examples:")): + i += 1 + result["examples"] = self._parse_examples_section(lines, i) + break + + elif ":param " in line or ":parameter " in line or line.startswith("@param"): + param = self._parse_param_tag(line) + if param: + result["args"].append(param) + i += 1 + + elif ":return:" in line or ":returns:" in line or line.startswith("@return") or line.startswith("@returns"): + if i + 1 < len(lines): + result["returns"] = self._parse_returns(lines[i + 1].strip()) + i += 1 + + elif line.startswith(":raises") or line.startswith("@raises"): + raise_info = self._parse_raise_tag(line) + if raise_info: + result["raises"].append(raise_info) + i += 1 + + elif not result["description"]: + result["description"] = line + i += 1 + + else: + i += 1 + + return result + + def _parse_args_section(self, lines: List[str], start: int) -> List[Dict[str, str]]: + """Parse the Args section of a docstring.""" + args = [] + + for i in range(start, len(lines)): + line = lines[i].strip() + if not line: + continue + if line.lower() in ["returns:", "raises:", "examples:", "example:"]: + break + + if line.startswith("- ") or line.startswith("* "): + line = line[2:] + elif not line[0:1].isalnum() and line[0] != ':': + continue + + parts = line.split(":", 1) + name = parts[0].strip() + if not name: + continue + + arg = {"name": name, "type": "", "description": ""} + if len(parts) > 1: + rest = parts[1].strip() + type_parts = rest.split(None, 1) + if type_parts and type_parts[0] in ["int", "str", "float", "bool", "list", "dict", "tuple", "set", "any", "Optional", "List", "Dict", "str,int", "number"]: + arg["type"] = type_parts[0] + if len(type_parts) > 1: + arg["description"] = type_parts[1].strip() + else: + arg["description"] = rest + + args.append(arg) + + return args + + def _parse_returns(self, line: str) -> Optional[Dict[str, str]]: + """Parse a returns line.""" + if not line: + return None + + line = line.strip() + + if not line: + return None + + if line.startswith(":"): + line = line[1:].strip() + + return {"type": "", "description": line} + + def _parse_raises_section(self, lines: List[str], start: int) -> List[Dict[str, str]]: + """Parse the Raises section of a docstring.""" + raises = [] + + for i in range(start, len(lines)): + line = lines[i].strip() + if not line: + continue + if line.lower() in ["examples:", "example:"]: + break + + if line.startswith("- ") or line.startswith("* "): + line = line[2:] + + parts = line.split(":", 1) + exception = parts[0].strip() + description = parts[1].strip() if len(parts) > 1 else "" + if exception: + raises.append({"exception": exception, "description": description}) + + return raises + + def _parse_examples_section(self, lines: List[str], start: int) -> List[str]: + """Parse the Examples section of a docstring.""" + examples = [] + current_example = [] + + for i in range(start, len(lines)): + line = lines[i] + stripped = line.strip() + if not stripped: + continue + if stripped.lower() in ["", "returns:", "raises:"]: + if current_example: + examples.append("\n".join(current_example)) + current_example = [] + break + + if stripped and not stripped.startswith("#"): + if line.startswith(" ") or line.startswith("\t") or current_example or stripped.startswith(">>>"): + current_example.append(stripped) + elif current_example: + examples.append("\n".join(current_example)) + current_example = [stripped] + + if current_example: + examples.append("\n".join(current_example)) + + return examples + + def _parse_param_tag(self, line: str) -> Optional[Dict[str, str]]: + """Parse a :param: style tag.""" + try: + tag_content = line.replace(":param", "").replace(":parameter", "").replace("@param", "").strip() + if not tag_content: + return None + + parts = tag_content.split(None, 2) + if len(parts) >= 2: + param_type = parts[0] if parts[0].startswith("{") else "" + name = parts[1] if not parts[0].startswith("{") else parts[1] + description = parts[2] if len(parts) > 2 else "" + + if name.startswith("{") and not param_type: + name_parts = name.split("}", 1) + if len(name_parts) > 1: + param_type = name_parts[0].replace("{", "") + name = name_parts[1].strip() + + return {"name": name, "type": param_type.replace("{", "").replace("}", ""), "description": description} + return None + except Exception: + return None + + def _parse_raise_tag(self, line: str) -> Optional[Dict[str, str]]: + """Parse a :raises: style tag.""" + try: + tag_content = line.replace(":raises", "").replace("@raises", "").strip() + if not tag_content: + return None + + parts = tag_content.split(":", 1) + exception = parts[0].strip() + description = parts[1].strip() if len(parts) > 1 else "" + + return {"exception": exception, "description": description} + except Exception: + return None + + +def parse_python_file(file_path: Path) -> Dict[str, Any]: + """Parse a Python file and extract documentation.""" + parser = PythonDocstringParser() + + try: + with open(file_path, "r", encoding="utf-8") as f: + source_code = f.read() + + result = parser.parse(source_code) + result["file_path"] = str(file_path) + result["language"] = "python" + + return result + + except UnicodeDecodeError: + try: + with open(file_path, "r", encoding="latin-1") as f: + source_code = f.read() + + result = parser.parse(source_code) + result["file_path"] = str(file_path) + result["language"] = "python" + + return result + except Exception as e: + raise ValueError(f"Error reading Python file {file_path}: {e}") + + except Exception as e: + raise ValueError(f"Error parsing Python file {file_path}: {e}")