Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
292
doc2man/parsers/go.py
Normal file
292
doc2man/parsers/go.py
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
"""Go docstring parser for Doc2Man."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class GoDocstringParser:
|
||||||
|
"""Parser for Go documentation comments using go doc or AST parsing."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the parser."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse_file(self, file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Parse a Go source file and extract documentation."""
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
return self.parse_content(content, str(file_path))
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error reading Go file {file_path}: {e}")
|
||||||
|
|
||||||
|
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
|
||||||
|
"""Parse Go source content and extract documentation."""
|
||||||
|
result = {
|
||||||
|
"title": None,
|
||||||
|
"description": None,
|
||||||
|
"package_docstring": None,
|
||||||
|
"functions": [],
|
||||||
|
"types": [],
|
||||||
|
"file_path": file_path,
|
||||||
|
"language": "go",
|
||||||
|
}
|
||||||
|
|
||||||
|
package_doc = self._extract_package_documentation(content)
|
||||||
|
if package_doc:
|
||||||
|
result["package_docstring"] = package_doc
|
||||||
|
result["description"] = package_doc
|
||||||
|
result["title"] = self._extract_title(package_doc)
|
||||||
|
|
||||||
|
functions = self._extract_functions(content)
|
||||||
|
result["functions"] = functions
|
||||||
|
|
||||||
|
types = self._extract_types(content)
|
||||||
|
result["types"] = types
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_package_documentation(self, content: str) -> Optional[str]:
|
||||||
|
"""Extract package-level documentation from the beginning of the file."""
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
in_comment = False
|
||||||
|
comment_lines = []
|
||||||
|
found_code = False
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
if stripped.startswith("package "):
|
||||||
|
if in_comment:
|
||||||
|
return "\n".join(comment_lines).strip() if comment_lines else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
if stripped.startswith("//"):
|
||||||
|
clean_comment = stripped[2:].strip()
|
||||||
|
if clean_comment:
|
||||||
|
in_comment = True
|
||||||
|
comment_lines.append(clean_comment)
|
||||||
|
elif stripped.startswith("/*"):
|
||||||
|
in_comment = True
|
||||||
|
inner = stripped[2:].strip()
|
||||||
|
if inner and not inner.endswith("*/"):
|
||||||
|
comment_lines.append(inner)
|
||||||
|
elif stripped.endswith("*/"):
|
||||||
|
in_comment = False
|
||||||
|
else:
|
||||||
|
if in_comment and stripped:
|
||||||
|
found_code = True
|
||||||
|
|
||||||
|
return "\n".join(comment_lines).strip() if comment_lines else None
|
||||||
|
|
||||||
|
def _extract_title(self, docstring: str) -> Optional[str]:
|
||||||
|
"""Extract the first line as title."""
|
||||||
|
if docstring:
|
||||||
|
lines = docstring.strip().split("\n")
|
||||||
|
return lines[0].strip() if lines else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Extract function documentation from Go source."""
|
||||||
|
functions = []
|
||||||
|
|
||||||
|
lines = content.split("\n")
|
||||||
|
in_function = False
|
||||||
|
func_start = 0
|
||||||
|
func_name = ""
|
||||||
|
func_signature = ""
|
||||||
|
func_comment = []
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
if stripped.startswith("func "):
|
||||||
|
if in_function and func_name:
|
||||||
|
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||||
|
if func_doc:
|
||||||
|
functions.append(func_doc)
|
||||||
|
|
||||||
|
in_function = True
|
||||||
|
func_start = i
|
||||||
|
func_name = self._extract_func_name(stripped)
|
||||||
|
func_signature = stripped
|
||||||
|
func_comment = []
|
||||||
|
|
||||||
|
comment = self._get_leading_comment(lines, i)
|
||||||
|
if comment:
|
||||||
|
func_comment = comment
|
||||||
|
|
||||||
|
elif in_function:
|
||||||
|
if stripped and not stripped.startswith("//") and not stripped.startswith("/*"):
|
||||||
|
if not self._is_in_multiline_comment(lines, i):
|
||||||
|
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||||
|
if func_doc:
|
||||||
|
functions.append(func_doc)
|
||||||
|
in_function = False
|
||||||
|
func_name = ""
|
||||||
|
func_signature = ""
|
||||||
|
func_comment = []
|
||||||
|
|
||||||
|
if in_function and func_name:
|
||||||
|
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||||
|
if func_doc:
|
||||||
|
functions.append(func_doc)
|
||||||
|
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def _extract_func_name(self, line: str) -> str:
|
||||||
|
"""Extract function name from function signature."""
|
||||||
|
match = re.search(r'func\s+(?:\([^)]*\)\s*)?(\w+)', line)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_leading_comment(self, lines: List[str], line_num: int) -> List[str]:
|
||||||
|
"""Get comments immediately preceding a line."""
|
||||||
|
comments = []
|
||||||
|
for i in range(line_num - 1, -1, -1):
|
||||||
|
line = lines[i].strip()
|
||||||
|
if line.startswith("//"):
|
||||||
|
clean = line[2:].strip()
|
||||||
|
if clean:
|
||||||
|
comments.insert(0, clean)
|
||||||
|
elif line.startswith("/*"):
|
||||||
|
break
|
||||||
|
elif line:
|
||||||
|
break
|
||||||
|
return comments
|
||||||
|
|
||||||
|
def _is_in_multiline_comment(self, lines: List[str], line_num: int) -> bool:
|
||||||
|
"""Check if a line is inside a multiline comment."""
|
||||||
|
in_comment = False
|
||||||
|
for i in range(line_num):
|
||||||
|
line = lines[i].strip()
|
||||||
|
if "/*" in line:
|
||||||
|
in_comment = True
|
||||||
|
if "*/" in line:
|
||||||
|
in_comment = False
|
||||||
|
return in_comment
|
||||||
|
|
||||||
|
def _build_function_doc(self, name: str, signature: str, comments: List[str], lines: List[str], start: int) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Build a function documentation dictionary."""
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
func_doc = {
|
||||||
|
"name": name,
|
||||||
|
"signature": signature,
|
||||||
|
"description": "",
|
||||||
|
"args": [],
|
||||||
|
"returns": None,
|
||||||
|
"line_number": start + 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
if comments:
|
||||||
|
func_doc["description"] = "\n".join(comments)
|
||||||
|
|
||||||
|
params = self._extract_params_from_signature(signature)
|
||||||
|
func_doc["args"] = params
|
||||||
|
|
||||||
|
returns = self._extract_returns_from_signature(signature)
|
||||||
|
func_doc["returns"] = returns
|
||||||
|
|
||||||
|
return func_doc
|
||||||
|
|
||||||
|
def _extract_params_from_signature(self, signature: str) -> List[Dict[str, str]]:
|
||||||
|
"""Extract parameters from function signature."""
|
||||||
|
params = []
|
||||||
|
|
||||||
|
match = re.search(r'\(([^)]*)\)', signature)
|
||||||
|
if not match:
|
||||||
|
return params
|
||||||
|
|
||||||
|
param_str = match.group(1)
|
||||||
|
if not param_str:
|
||||||
|
return params
|
||||||
|
|
||||||
|
parts = param_str.split(",")
|
||||||
|
for part in parts:
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
|
name_type = part.split()
|
||||||
|
if len(name_type) >= 2:
|
||||||
|
name = name_type[-2]
|
||||||
|
type_str = " ".join(name_type[-1:])
|
||||||
|
elif len(name_type) == 1:
|
||||||
|
name = ""
|
||||||
|
type_str = name_type[0]
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if name and not name.startswith("_"):
|
||||||
|
params.append({"name": name, "type": type_str, "description": ""})
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _extract_returns_from_signature(self, signature: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Extract return type from function signature."""
|
||||||
|
match = re.search(r'\)\s*(\([^)]*\)|\w+)', signature)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return_type = match.group(1).strip()
|
||||||
|
if not return_type:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {"type": return_type, "description": ""}
|
||||||
|
|
||||||
|
def _extract_types(self, content: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Extract type definitions and their documentation."""
|
||||||
|
types = []
|
||||||
|
|
||||||
|
type_pattern = re.compile(
|
||||||
|
r'(?://[^\n]*\n)*\s*type\s+(\w+)\s+(?:struct|interface|type)\s*\{([^}]*)\}',
|
||||||
|
re.MULTILINE | re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
for match in type_pattern.finditer(content):
|
||||||
|
type_name = match.group(1)
|
||||||
|
type_body = match.group(2)
|
||||||
|
|
||||||
|
type_doc = {
|
||||||
|
"name": type_name,
|
||||||
|
"description": "",
|
||||||
|
"fields": [],
|
||||||
|
"line_number": content[:match.start()].count("\n") + 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
types.append(type_doc)
|
||||||
|
|
||||||
|
return types
|
||||||
|
|
||||||
|
def parse_go_doc(self, package_path: str) -> Dict[str, Any]:
|
||||||
|
"""Parse Go documentation using go doc command."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["go", "doc", "-all", package_path],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise ValueError(f"go doc failed: {result.stderr}")
|
||||||
|
|
||||||
|
output = result.stdout
|
||||||
|
return self._parse_go_doc_output(output)
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
raise ValueError("go doc command timed out")
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise ValueError("go command not found - ensure Go is installed")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_go_file(file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Parse a Go file and extract documentation."""
|
||||||
|
parser = GoDocstringParser()
|
||||||
|
return parser.parse_file(file_path)
|
||||||
Reference in New Issue
Block a user