Add parsers: Python, Go, and JavaScript docstring parsers
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-01-31 00:56:36 +00:00
parent 903096e80c
commit fb2fb893aa

292
doc2man/parsers/go.py Normal file
View File

@@ -0,0 +1,292 @@
"""Go docstring parser for Doc2Man."""
import re
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Optional
class GoDocstringParser:
"""Parser for Go documentation comments using go doc or AST parsing."""
def __init__(self):
"""Initialize the parser."""
pass
def parse_file(self, file_path: Path) -> Dict[str, Any]:
"""Parse a Go source file and extract documentation."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
return self.parse_content(content, str(file_path))
except Exception as e:
raise ValueError(f"Error reading Go file {file_path}: {e}")
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
"""Parse Go source content and extract documentation."""
result = {
"title": None,
"description": None,
"package_docstring": None,
"functions": [],
"types": [],
"file_path": file_path,
"language": "go",
}
package_doc = self._extract_package_documentation(content)
if package_doc:
result["package_docstring"] = package_doc
result["description"] = package_doc
result["title"] = self._extract_title(package_doc)
functions = self._extract_functions(content)
result["functions"] = functions
types = self._extract_types(content)
result["types"] = types
return result
def _extract_package_documentation(self, content: str) -> Optional[str]:
"""Extract package-level documentation from the beginning of the file."""
lines = content.split("\n")
in_comment = False
comment_lines = []
found_code = False
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("package "):
if in_comment:
return "\n".join(comment_lines).strip() if comment_lines else None
return None
if stripped.startswith("//"):
clean_comment = stripped[2:].strip()
if clean_comment:
in_comment = True
comment_lines.append(clean_comment)
elif stripped.startswith("/*"):
in_comment = True
inner = stripped[2:].strip()
if inner and not inner.endswith("*/"):
comment_lines.append(inner)
elif stripped.endswith("*/"):
in_comment = False
else:
if in_comment and stripped:
found_code = True
return "\n".join(comment_lines).strip() if comment_lines else None
def _extract_title(self, docstring: str) -> Optional[str]:
"""Extract the first line as title."""
if docstring:
lines = docstring.strip().split("\n")
return lines[0].strip() if lines else None
return None
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
"""Extract function documentation from Go source."""
functions = []
lines = content.split("\n")
in_function = False
func_start = 0
func_name = ""
func_signature = ""
func_comment = []
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("func "):
if in_function and func_name:
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
if func_doc:
functions.append(func_doc)
in_function = True
func_start = i
func_name = self._extract_func_name(stripped)
func_signature = stripped
func_comment = []
comment = self._get_leading_comment(lines, i)
if comment:
func_comment = comment
elif in_function:
if stripped and not stripped.startswith("//") and not stripped.startswith("/*"):
if not self._is_in_multiline_comment(lines, i):
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
if func_doc:
functions.append(func_doc)
in_function = False
func_name = ""
func_signature = ""
func_comment = []
if in_function and func_name:
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
if func_doc:
functions.append(func_doc)
return functions
def _extract_func_name(self, line: str) -> str:
"""Extract function name from function signature."""
match = re.search(r'func\s+(?:\([^)]*\)\s*)?(\w+)', line)
if match:
return match.group(1)
return ""
def _get_leading_comment(self, lines: List[str], line_num: int) -> List[str]:
"""Get comments immediately preceding a line."""
comments = []
for i in range(line_num - 1, -1, -1):
line = lines[i].strip()
if line.startswith("//"):
clean = line[2:].strip()
if clean:
comments.insert(0, clean)
elif line.startswith("/*"):
break
elif line:
break
return comments
def _is_in_multiline_comment(self, lines: List[str], line_num: int) -> bool:
"""Check if a line is inside a multiline comment."""
in_comment = False
for i in range(line_num):
line = lines[i].strip()
if "/*" in line:
in_comment = True
if "*/" in line:
in_comment = False
return in_comment
def _build_function_doc(self, name: str, signature: str, comments: List[str], lines: List[str], start: int) -> Optional[Dict[str, Any]]:
"""Build a function documentation dictionary."""
if not name:
return None
func_doc = {
"name": name,
"signature": signature,
"description": "",
"args": [],
"returns": None,
"line_number": start + 1,
}
if comments:
func_doc["description"] = "\n".join(comments)
params = self._extract_params_from_signature(signature)
func_doc["args"] = params
returns = self._extract_returns_from_signature(signature)
func_doc["returns"] = returns
return func_doc
def _extract_params_from_signature(self, signature: str) -> List[Dict[str, str]]:
"""Extract parameters from function signature."""
params = []
match = re.search(r'\(([^)]*)\)', signature)
if not match:
return params
param_str = match.group(1)
if not param_str:
return params
parts = param_str.split(",")
for part in parts:
part = part.strip()
if not part:
continue
name_type = part.split()
if len(name_type) >= 2:
name = name_type[-2]
type_str = " ".join(name_type[-1:])
elif len(name_type) == 1:
name = ""
type_str = name_type[0]
else:
continue
if name and not name.startswith("_"):
params.append({"name": name, "type": type_str, "description": ""})
return params
def _extract_returns_from_signature(self, signature: str) -> Optional[Dict[str, str]]:
"""Extract return type from function signature."""
match = re.search(r'\)\s*(\([^)]*\)|\w+)', signature)
if not match:
return None
return_type = match.group(1).strip()
if not return_type:
return None
return {"type": return_type, "description": ""}
def _extract_types(self, content: str) -> List[Dict[str, Any]]:
"""Extract type definitions and their documentation."""
types = []
type_pattern = re.compile(
r'(?://[^\n]*\n)*\s*type\s+(\w+)\s+(?:struct|interface|type)\s*\{([^}]*)\}',
re.MULTILINE | re.DOTALL
)
for match in type_pattern.finditer(content):
type_name = match.group(1)
type_body = match.group(2)
type_doc = {
"name": type_name,
"description": "",
"fields": [],
"line_number": content[:match.start()].count("\n") + 1,
}
types.append(type_doc)
return types
def parse_go_doc(self, package_path: str) -> Dict[str, Any]:
"""Parse Go documentation using go doc command."""
try:
result = subprocess.run(
["go", "doc", "-all", package_path],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0:
raise ValueError(f"go doc failed: {result.stderr}")
output = result.stdout
return self._parse_go_doc_output(output)
except subprocess.TimeoutExpired:
raise ValueError("go doc command timed out")
except FileNotFoundError:
raise ValueError("go command not found - ensure Go is installed")
def parse_go_file(file_path: Path) -> Dict[str, Any]:
"""Parse a Go file and extract documentation."""
parser = GoDocstringParser()
return parser.parse_file(file_path)