Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
292
doc2man/parsers/go.py
Normal file
292
doc2man/parsers/go.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""Go docstring parser for Doc2Man."""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class GoDocstringParser:
|
||||
"""Parser for Go documentation comments using go doc or AST parsing."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the parser."""
|
||||
pass
|
||||
|
||||
def parse_file(self, file_path: Path) -> Dict[str, Any]:
|
||||
"""Parse a Go source file and extract documentation."""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
return self.parse_content(content, str(file_path))
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading Go file {file_path}: {e}")
|
||||
|
||||
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
|
||||
"""Parse Go source content and extract documentation."""
|
||||
result = {
|
||||
"title": None,
|
||||
"description": None,
|
||||
"package_docstring": None,
|
||||
"functions": [],
|
||||
"types": [],
|
||||
"file_path": file_path,
|
||||
"language": "go",
|
||||
}
|
||||
|
||||
package_doc = self._extract_package_documentation(content)
|
||||
if package_doc:
|
||||
result["package_docstring"] = package_doc
|
||||
result["description"] = package_doc
|
||||
result["title"] = self._extract_title(package_doc)
|
||||
|
||||
functions = self._extract_functions(content)
|
||||
result["functions"] = functions
|
||||
|
||||
types = self._extract_types(content)
|
||||
result["types"] = types
|
||||
|
||||
return result
|
||||
|
||||
def _extract_package_documentation(self, content: str) -> Optional[str]:
|
||||
"""Extract package-level documentation from the beginning of the file."""
|
||||
lines = content.split("\n")
|
||||
|
||||
in_comment = False
|
||||
comment_lines = []
|
||||
found_code = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("package "):
|
||||
if in_comment:
|
||||
return "\n".join(comment_lines).strip() if comment_lines else None
|
||||
return None
|
||||
|
||||
if stripped.startswith("//"):
|
||||
clean_comment = stripped[2:].strip()
|
||||
if clean_comment:
|
||||
in_comment = True
|
||||
comment_lines.append(clean_comment)
|
||||
elif stripped.startswith("/*"):
|
||||
in_comment = True
|
||||
inner = stripped[2:].strip()
|
||||
if inner and not inner.endswith("*/"):
|
||||
comment_lines.append(inner)
|
||||
elif stripped.endswith("*/"):
|
||||
in_comment = False
|
||||
else:
|
||||
if in_comment and stripped:
|
||||
found_code = True
|
||||
|
||||
return "\n".join(comment_lines).strip() if comment_lines else None
|
||||
|
||||
def _extract_title(self, docstring: str) -> Optional[str]:
|
||||
"""Extract the first line as title."""
|
||||
if docstring:
|
||||
lines = docstring.strip().split("\n")
|
||||
return lines[0].strip() if lines else None
|
||||
return None
|
||||
|
||||
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract function documentation from Go source."""
|
||||
functions = []
|
||||
|
||||
lines = content.split("\n")
|
||||
in_function = False
|
||||
func_start = 0
|
||||
func_name = ""
|
||||
func_signature = ""
|
||||
func_comment = []
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("func "):
|
||||
if in_function and func_name:
|
||||
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||
if func_doc:
|
||||
functions.append(func_doc)
|
||||
|
||||
in_function = True
|
||||
func_start = i
|
||||
func_name = self._extract_func_name(stripped)
|
||||
func_signature = stripped
|
||||
func_comment = []
|
||||
|
||||
comment = self._get_leading_comment(lines, i)
|
||||
if comment:
|
||||
func_comment = comment
|
||||
|
||||
elif in_function:
|
||||
if stripped and not stripped.startswith("//") and not stripped.startswith("/*"):
|
||||
if not self._is_in_multiline_comment(lines, i):
|
||||
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||
if func_doc:
|
||||
functions.append(func_doc)
|
||||
in_function = False
|
||||
func_name = ""
|
||||
func_signature = ""
|
||||
func_comment = []
|
||||
|
||||
if in_function and func_name:
|
||||
func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
|
||||
if func_doc:
|
||||
functions.append(func_doc)
|
||||
|
||||
return functions
|
||||
|
||||
def _extract_func_name(self, line: str) -> str:
|
||||
"""Extract function name from function signature."""
|
||||
match = re.search(r'func\s+(?:\([^)]*\)\s*)?(\w+)', line)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return ""
|
||||
|
||||
def _get_leading_comment(self, lines: List[str], line_num: int) -> List[str]:
|
||||
"""Get comments immediately preceding a line."""
|
||||
comments = []
|
||||
for i in range(line_num - 1, -1, -1):
|
||||
line = lines[i].strip()
|
||||
if line.startswith("//"):
|
||||
clean = line[2:].strip()
|
||||
if clean:
|
||||
comments.insert(0, clean)
|
||||
elif line.startswith("/*"):
|
||||
break
|
||||
elif line:
|
||||
break
|
||||
return comments
|
||||
|
||||
def _is_in_multiline_comment(self, lines: List[str], line_num: int) -> bool:
|
||||
"""Check if a line is inside a multiline comment."""
|
||||
in_comment = False
|
||||
for i in range(line_num):
|
||||
line = lines[i].strip()
|
||||
if "/*" in line:
|
||||
in_comment = True
|
||||
if "*/" in line:
|
||||
in_comment = False
|
||||
return in_comment
|
||||
|
||||
def _build_function_doc(self, name: str, signature: str, comments: List[str], lines: List[str], start: int) -> Optional[Dict[str, Any]]:
|
||||
"""Build a function documentation dictionary."""
|
||||
if not name:
|
||||
return None
|
||||
|
||||
func_doc = {
|
||||
"name": name,
|
||||
"signature": signature,
|
||||
"description": "",
|
||||
"args": [],
|
||||
"returns": None,
|
||||
"line_number": start + 1,
|
||||
}
|
||||
|
||||
if comments:
|
||||
func_doc["description"] = "\n".join(comments)
|
||||
|
||||
params = self._extract_params_from_signature(signature)
|
||||
func_doc["args"] = params
|
||||
|
||||
returns = self._extract_returns_from_signature(signature)
|
||||
func_doc["returns"] = returns
|
||||
|
||||
return func_doc
|
||||
|
||||
def _extract_params_from_signature(self, signature: str) -> List[Dict[str, str]]:
|
||||
"""Extract parameters from function signature."""
|
||||
params = []
|
||||
|
||||
match = re.search(r'\(([^)]*)\)', signature)
|
||||
if not match:
|
||||
return params
|
||||
|
||||
param_str = match.group(1)
|
||||
if not param_str:
|
||||
return params
|
||||
|
||||
parts = param_str.split(",")
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
|
||||
name_type = part.split()
|
||||
if len(name_type) >= 2:
|
||||
name = name_type[-2]
|
||||
type_str = " ".join(name_type[-1:])
|
||||
elif len(name_type) == 1:
|
||||
name = ""
|
||||
type_str = name_type[0]
|
||||
else:
|
||||
continue
|
||||
|
||||
if name and not name.startswith("_"):
|
||||
params.append({"name": name, "type": type_str, "description": ""})
|
||||
|
||||
return params
|
||||
|
||||
def _extract_returns_from_signature(self, signature: str) -> Optional[Dict[str, str]]:
|
||||
"""Extract return type from function signature."""
|
||||
match = re.search(r'\)\s*(\([^)]*\)|\w+)', signature)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
return_type = match.group(1).strip()
|
||||
if not return_type:
|
||||
return None
|
||||
|
||||
return {"type": return_type, "description": ""}
|
||||
|
||||
def _extract_types(self, content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract type definitions and their documentation."""
|
||||
types = []
|
||||
|
||||
type_pattern = re.compile(
|
||||
r'(?://[^\n]*\n)*\s*type\s+(\w+)\s+(?:struct|interface|type)\s*\{([^}]*)\}',
|
||||
re.MULTILINE | re.DOTALL
|
||||
)
|
||||
|
||||
for match in type_pattern.finditer(content):
|
||||
type_name = match.group(1)
|
||||
type_body = match.group(2)
|
||||
|
||||
type_doc = {
|
||||
"name": type_name,
|
||||
"description": "",
|
||||
"fields": [],
|
||||
"line_number": content[:match.start()].count("\n") + 1,
|
||||
}
|
||||
|
||||
types.append(type_doc)
|
||||
|
||||
return types
|
||||
|
||||
def parse_go_doc(self, package_path: str) -> Dict[str, Any]:
|
||||
"""Parse Go documentation using go doc command."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["go", "doc", "-all", package_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise ValueError(f"go doc failed: {result.stderr}")
|
||||
|
||||
output = result.stdout
|
||||
return self._parse_go_doc_output(output)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise ValueError("go doc command timed out")
|
||||
except FileNotFoundError:
|
||||
raise ValueError("go command not found - ensure Go is installed")
|
||||
|
||||
|
||||
def parse_go_file(file_path: Path) -> Dict[str, Any]:
|
||||
"""Parse a Go file and extract documentation."""
|
||||
parser = GoDocstringParser()
|
||||
return parser.parse_file(file_path)
|
||||
Reference in New Issue
Block a user