From fb2fb893aa80ea5b306a1e85929f692a317e357e Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sat, 31 Jan 2026 00:56:36 +0000 Subject: [PATCH] Add parsers: Python, Go, and JavaScript docstring parsers --- doc2man/parsers/go.py | 292 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 doc2man/parsers/go.py diff --git a/doc2man/parsers/go.py b/doc2man/parsers/go.py new file mode 100644 index 0000000..d12b2a9 --- /dev/null +++ b/doc2man/parsers/go.py @@ -0,0 +1,292 @@ +"""Go docstring parser for Doc2Man.""" + +import re +import subprocess +from pathlib import Path +from typing import Any, Dict, List, Optional + + +class GoDocstringParser: + """Parser for Go documentation comments using go doc or AST parsing.""" + + def __init__(self): + """Initialize the parser.""" + pass + + def parse_file(self, file_path: Path) -> Dict[str, Any]: + """Parse a Go source file and extract documentation.""" + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + return self.parse_content(content, str(file_path)) + except Exception as e: + raise ValueError(f"Error reading Go file {file_path}: {e}") + + def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]: + """Parse Go source content and extract documentation.""" + result = { + "title": None, + "description": None, + "package_docstring": None, + "functions": [], + "types": [], + "file_path": file_path, + "language": "go", + } + + package_doc = self._extract_package_documentation(content) + if package_doc: + result["package_docstring"] = package_doc + result["description"] = package_doc + result["title"] = self._extract_title(package_doc) + + functions = self._extract_functions(content) + result["functions"] = functions + + types = self._extract_types(content) + result["types"] = types + + return result + + def _extract_package_documentation(self, content: str) -> Optional[str]: + """Extract package-level documentation from the beginning of the file.""" + lines = content.split("\n") + + in_comment = False + comment_lines = [] + found_code = False + + for i, line in enumerate(lines): + stripped = line.strip() + + if stripped.startswith("package "): + if in_comment: + return "\n".join(comment_lines).strip() if comment_lines else None + return None + + if stripped.startswith("//"): + clean_comment = stripped[2:].strip() + if clean_comment: + in_comment = True + comment_lines.append(clean_comment) + elif stripped.startswith("/*"): + in_comment = True + inner = stripped[2:].strip() + if inner and not inner.endswith("*/"): + comment_lines.append(inner) + elif stripped.endswith("*/"): + in_comment = False + else: + if in_comment and stripped: + found_code = True + + return "\n".join(comment_lines).strip() if comment_lines else None + + def _extract_title(self, docstring: str) -> Optional[str]: + """Extract the first line as title.""" + if docstring: + lines = docstring.strip().split("\n") + return lines[0].strip() if lines else None + return None + + def _extract_functions(self, content: str) -> List[Dict[str, Any]]: + """Extract function documentation from Go source.""" + functions = [] + + lines = content.split("\n") + in_function = False + func_start = 0 + func_name = "" + func_signature = "" + func_comment = [] + + for i, line in enumerate(lines): + stripped = line.strip() + + if stripped.startswith("func "): + if in_function and func_name: + func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start) + if func_doc: + functions.append(func_doc) + + in_function = True + func_start = i + func_name = self._extract_func_name(stripped) + func_signature = stripped + func_comment = [] + + comment = self._get_leading_comment(lines, i) + if comment: + func_comment = comment + + elif in_function: + if stripped and not stripped.startswith("//") and not stripped.startswith("/*"): + if not self._is_in_multiline_comment(lines, i): + func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start) + if func_doc: + functions.append(func_doc) + in_function = False + func_name = "" + func_signature = "" + func_comment = [] + + if in_function and func_name: + func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start) + if func_doc: + functions.append(func_doc) + + return functions + + def _extract_func_name(self, line: str) -> str: + """Extract function name from function signature.""" + match = re.search(r'func\s+(?:\([^)]*\)\s*)?(\w+)', line) + if match: + return match.group(1) + return "" + + def _get_leading_comment(self, lines: List[str], line_num: int) -> List[str]: + """Get comments immediately preceding a line.""" + comments = [] + for i in range(line_num - 1, -1, -1): + line = lines[i].strip() + if line.startswith("//"): + clean = line[2:].strip() + if clean: + comments.insert(0, clean) + elif line.startswith("/*"): + break + elif line: + break + return comments + + def _is_in_multiline_comment(self, lines: List[str], line_num: int) -> bool: + """Check if a line is inside a multiline comment.""" + in_comment = False + for i in range(line_num): + line = lines[i].strip() + if "/*" in line: + in_comment = True + if "*/" in line: + in_comment = False + return in_comment + + def _build_function_doc(self, name: str, signature: str, comments: List[str], lines: List[str], start: int) -> Optional[Dict[str, Any]]: + """Build a function documentation dictionary.""" + if not name: + return None + + func_doc = { + "name": name, + "signature": signature, + "description": "", + "args": [], + "returns": None, + "line_number": start + 1, + } + + if comments: + func_doc["description"] = "\n".join(comments) + + params = self._extract_params_from_signature(signature) + func_doc["args"] = params + + returns = self._extract_returns_from_signature(signature) + func_doc["returns"] = returns + + return func_doc + + def _extract_params_from_signature(self, signature: str) -> List[Dict[str, str]]: + """Extract parameters from function signature.""" + params = [] + + match = re.search(r'\(([^)]*)\)', signature) + if not match: + return params + + param_str = match.group(1) + if not param_str: + return params + + parts = param_str.split(",") + for part in parts: + part = part.strip() + if not part: + continue + + name_type = part.split() + if len(name_type) >= 2: + name = name_type[-2] + type_str = " ".join(name_type[-1:]) + elif len(name_type) == 1: + name = "" + type_str = name_type[0] + else: + continue + + if name and not name.startswith("_"): + params.append({"name": name, "type": type_str, "description": ""}) + + return params + + def _extract_returns_from_signature(self, signature: str) -> Optional[Dict[str, str]]: + """Extract return type from function signature.""" + match = re.search(r'\)\s*(\([^)]*\)|\w+)', signature) + if not match: + return None + + return_type = match.group(1).strip() + if not return_type: + return None + + return {"type": return_type, "description": ""} + + def _extract_types(self, content: str) -> List[Dict[str, Any]]: + """Extract type definitions and their documentation.""" + types = [] + + type_pattern = re.compile( + r'(?://[^\n]*\n)*\s*type\s+(\w+)\s+(?:struct|interface|type)\s*\{([^}]*)\}', + re.MULTILINE | re.DOTALL + ) + + for match in type_pattern.finditer(content): + type_name = match.group(1) + type_body = match.group(2) + + type_doc = { + "name": type_name, + "description": "", + "fields": [], + "line_number": content[:match.start()].count("\n") + 1, + } + + types.append(type_doc) + + return types + + def parse_go_doc(self, package_path: str) -> Dict[str, Any]: + """Parse Go documentation using go doc command.""" + try: + result = subprocess.run( + ["go", "doc", "-all", package_path], + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode != 0: + raise ValueError(f"go doc failed: {result.stderr}") + + output = result.stdout + return self._parse_go_doc_output(output) + + except subprocess.TimeoutExpired: + raise ValueError("go doc command timed out") + except FileNotFoundError: + raise ValueError("go command not found - ensure Go is installed") + + +def parse_go_file(file_path: Path) -> Dict[str, Any]: + """Parse a Go file and extract documentation.""" + parser = GoDocstringParser() + return parser.parse_file(file_path)