Add parsers: Python, Go, and JavaScript docstring parsers
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-01-31 00:56:36 +00:00
parent e5cc8e3193
commit 903096e80c

364
doc2man/parsers/python.py Normal file
View File

@@ -0,0 +1,364 @@
"""Python docstring parser for Doc2Man."""
import ast
from pathlib import Path
from typing import Any, Dict, List, Optional
class PythonDocstringParser:
"""Parser for Python docstrings supporting Google-style, NumPy-style, and ReST formats."""
def __init__(self):
"""Initialize the parser."""
pass
def parse(self, source_code: str) -> Dict[str, Any]:
"""Parse a Python source file and extract documentation.
Args:
source_code: The Python source code to parse.
Returns:
Dictionary containing parsed documentation.
"""
result = {
"title": None,
"description": None,
"module_docstring": None,
"functions": [],
"classes": [],
}
try:
tree = ast.parse(source_code)
except SyntaxError:
return result
module_docstring = ast.get_docstring(tree)
if module_docstring:
result["module_docstring"] = module_docstring
result["description"] = module_docstring
result["title"] = self._extract_title(module_docstring)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
func_doc = self._parse_function(node, source_code)
if func_doc:
result["functions"].append(func_doc)
elif isinstance(node, ast.ClassDef):
class_doc = self._parse_class(node, source_code)
if class_doc:
result["classes"].append(class_doc)
return result
def _extract_title(self, docstring: str) -> Optional[str]:
"""Extract the first line as title."""
if docstring:
lines = docstring.strip().split("\n")
return lines[0].strip() if lines else None
return None
def _parse_function(self, node: ast.FunctionDef, source_code: str) -> Optional[Dict[str, Any]]:
"""Parse a function node and extract its documentation."""
docstring = ast.get_docstring(node)
func_info = {
"name": node.name,
"description": None,
"args": [],
"returns": None,
"raises": [],
"examples": [],
"line_number": node.lineno,
}
if docstring:
parsed = self._parse_docstring(docstring)
func_info["description"] = parsed.get("description")
func_info["args"] = parsed.get("args", [])
func_info["returns"] = parsed.get("returns")
func_info["raises"] = parsed.get("raises", [])
func_info["examples"] = parsed.get("examples", [])
for arg in node.args.args:
arg_name = arg.arg
if arg_name not in [p.get("name") for p in func_info["args"]]:
func_info["args"].append({
"name": arg_name,
"type": "",
"description": ""
})
return func_info
def _parse_class(self, node: ast.ClassDef, source_code: str) -> Optional[Dict[str, Any]]:
"""Parse a class node and extract its documentation."""
docstring = ast.get_docstring(node)
class_info = {
"name": node.name,
"description": None,
"methods": [],
"line_number": node.lineno,
}
if docstring:
class_info["description"] = docstring.strip().split("\n")[0]
for item in node.body:
if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef):
if not item.name.startswith("_"):
method_doc = self._parse_function(item, source_code)
if method_doc:
class_info["methods"].append(method_doc)
return class_info
def _parse_docstring(self, docstring: str) -> Dict[str, Any]:
"""Parse a docstring and extract sections."""
result = {
"description": "",
"args": [],
"returns": None,
"raises": [],
"examples": [],
}
if not docstring:
return result
lines = docstring.strip().split("\n")
i = 0
while i < len(lines):
line = lines[i].strip()
if not line:
i += 1
continue
line_lower = line.lower()
if line_lower.startswith(("args:", "arguments:", "parameters:")):
i += 1
result["args"] = self._parse_args_section(lines, i)
while i < len(lines) and lines[i].strip():
if lines[i].strip().lower() in ["returns:", "raises:", "examples:", "example:"]:
break
i += 1
elif line_lower.startswith("returns:"):
i += 1
if i < len(lines):
returns_line = lines[i].strip()
result["returns"] = self._parse_returns(returns_line)
i += 1
elif line_lower.startswith("raises:"):
i += 1
result["raises"] = self._parse_raises_section(lines, i)
while i < len(lines) and lines[i].strip():
if lines[i].strip().lower() in ["examples:", "example:"]:
break
i += 1
elif line_lower.startswith(("example:", "examples:")):
i += 1
result["examples"] = self._parse_examples_section(lines, i)
break
elif ":param " in line or ":parameter " in line or line.startswith("@param"):
param = self._parse_param_tag(line)
if param:
result["args"].append(param)
i += 1
elif ":return:" in line or ":returns:" in line or line.startswith("@return") or line.startswith("@returns"):
if i + 1 < len(lines):
result["returns"] = self._parse_returns(lines[i + 1].strip())
i += 1
elif line.startswith(":raises") or line.startswith("@raises"):
raise_info = self._parse_raise_tag(line)
if raise_info:
result["raises"].append(raise_info)
i += 1
elif not result["description"]:
result["description"] = line
i += 1
else:
i += 1
return result
def _parse_args_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
"""Parse the Args section of a docstring."""
args = []
for i in range(start, len(lines)):
line = lines[i].strip()
if not line:
continue
if line.lower() in ["returns:", "raises:", "examples:", "example:"]:
break
if line.startswith("- ") or line.startswith("* "):
line = line[2:]
elif not line[0:1].isalnum() and line[0] != ':':
continue
parts = line.split(":", 1)
name = parts[0].strip()
if not name:
continue
arg = {"name": name, "type": "", "description": ""}
if len(parts) > 1:
rest = parts[1].strip()
type_parts = rest.split(None, 1)
if type_parts and type_parts[0] in ["int", "str", "float", "bool", "list", "dict", "tuple", "set", "any", "Optional", "List", "Dict", "str,int", "number"]:
arg["type"] = type_parts[0]
if len(type_parts) > 1:
arg["description"] = type_parts[1].strip()
else:
arg["description"] = rest
args.append(arg)
return args
def _parse_returns(self, line: str) -> Optional[Dict[str, str]]:
"""Parse a returns line."""
if not line:
return None
line = line.strip()
if not line:
return None
if line.startswith(":"):
line = line[1:].strip()
return {"type": "", "description": line}
def _parse_raises_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
"""Parse the Raises section of a docstring."""
raises = []
for i in range(start, len(lines)):
line = lines[i].strip()
if not line:
continue
if line.lower() in ["examples:", "example:"]:
break
if line.startswith("- ") or line.startswith("* "):
line = line[2:]
parts = line.split(":", 1)
exception = parts[0].strip()
description = parts[1].strip() if len(parts) > 1 else ""
if exception:
raises.append({"exception": exception, "description": description})
return raises
def _parse_examples_section(self, lines: List[str], start: int) -> List[str]:
"""Parse the Examples section of a docstring."""
examples = []
current_example = []
for i in range(start, len(lines)):
line = lines[i]
stripped = line.strip()
if not stripped:
continue
if stripped.lower() in ["", "returns:", "raises:"]:
if current_example:
examples.append("\n".join(current_example))
current_example = []
break
if stripped and not stripped.startswith("#"):
if line.startswith(" ") or line.startswith("\t") or current_example or stripped.startswith(">>>"):
current_example.append(stripped)
elif current_example:
examples.append("\n".join(current_example))
current_example = [stripped]
if current_example:
examples.append("\n".join(current_example))
return examples
def _parse_param_tag(self, line: str) -> Optional[Dict[str, str]]:
"""Parse a :param: style tag."""
try:
tag_content = line.replace(":param", "").replace(":parameter", "").replace("@param", "").strip()
if not tag_content:
return None
parts = tag_content.split(None, 2)
if len(parts) >= 2:
param_type = parts[0] if parts[0].startswith("{") else ""
name = parts[1] if not parts[0].startswith("{") else parts[1]
description = parts[2] if len(parts) > 2 else ""
if name.startswith("{") and not param_type:
name_parts = name.split("}", 1)
if len(name_parts) > 1:
param_type = name_parts[0].replace("{", "")
name = name_parts[1].strip()
return {"name": name, "type": param_type.replace("{", "").replace("}", ""), "description": description}
return None
except Exception:
return None
def _parse_raise_tag(self, line: str) -> Optional[Dict[str, str]]:
"""Parse a :raises: style tag."""
try:
tag_content = line.replace(":raises", "").replace("@raises", "").strip()
if not tag_content:
return None
parts = tag_content.split(":", 1)
exception = parts[0].strip()
description = parts[1].strip() if len(parts) > 1 else ""
return {"exception": exception, "description": description}
except Exception:
return None
def parse_python_file(file_path: Path) -> Dict[str, Any]:
"""Parse a Python file and extract documentation."""
parser = PythonDocstringParser()
try:
with open(file_path, "r", encoding="utf-8") as f:
source_code = f.read()
result = parser.parse(source_code)
result["file_path"] = str(file_path)
result["language"] = "python"
return result
except UnicodeDecodeError:
try:
with open(file_path, "r", encoding="latin-1") as f:
source_code = f.read()
result = parser.parse(source_code)
result["file_path"] = str(file_path)
result["language"] = "python"
return result
except Exception as e:
raise ValueError(f"Error reading Python file {file_path}: {e}")
except Exception as e:
raise ValueError(f"Error parsing Python file {file_path}: {e}")