Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
364
doc2man/parsers/python.py
Normal file
364
doc2man/parsers/python.py
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
"""Python docstring parser for Doc2Man."""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class PythonDocstringParser:
|
||||||
|
"""Parser for Python docstrings supporting Google-style, NumPy-style, and ReST formats."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the parser."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse(self, source_code: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a Python source file and extract documentation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_code: The Python source code to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing parsed documentation.
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
"title": None,
|
||||||
|
"description": None,
|
||||||
|
"module_docstring": None,
|
||||||
|
"functions": [],
|
||||||
|
"classes": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(source_code)
|
||||||
|
except SyntaxError:
|
||||||
|
return result
|
||||||
|
|
||||||
|
module_docstring = ast.get_docstring(tree)
|
||||||
|
if module_docstring:
|
||||||
|
result["module_docstring"] = module_docstring
|
||||||
|
result["description"] = module_docstring
|
||||||
|
result["title"] = self._extract_title(module_docstring)
|
||||||
|
|
||||||
|
for node in ast.iter_child_nodes(tree):
|
||||||
|
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||||
|
func_doc = self._parse_function(node, source_code)
|
||||||
|
if func_doc:
|
||||||
|
result["functions"].append(func_doc)
|
||||||
|
|
||||||
|
elif isinstance(node, ast.ClassDef):
|
||||||
|
class_doc = self._parse_class(node, source_code)
|
||||||
|
if class_doc:
|
||||||
|
result["classes"].append(class_doc)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_title(self, docstring: str) -> Optional[str]:
|
||||||
|
"""Extract the first line as title."""
|
||||||
|
if docstring:
|
||||||
|
lines = docstring.strip().split("\n")
|
||||||
|
return lines[0].strip() if lines else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_function(self, node: ast.FunctionDef, source_code: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Parse a function node and extract its documentation."""
|
||||||
|
docstring = ast.get_docstring(node)
|
||||||
|
func_info = {
|
||||||
|
"name": node.name,
|
||||||
|
"description": None,
|
||||||
|
"args": [],
|
||||||
|
"returns": None,
|
||||||
|
"raises": [],
|
||||||
|
"examples": [],
|
||||||
|
"line_number": node.lineno,
|
||||||
|
}
|
||||||
|
|
||||||
|
if docstring:
|
||||||
|
parsed = self._parse_docstring(docstring)
|
||||||
|
func_info["description"] = parsed.get("description")
|
||||||
|
func_info["args"] = parsed.get("args", [])
|
||||||
|
func_info["returns"] = parsed.get("returns")
|
||||||
|
func_info["raises"] = parsed.get("raises", [])
|
||||||
|
func_info["examples"] = parsed.get("examples", [])
|
||||||
|
|
||||||
|
for arg in node.args.args:
|
||||||
|
arg_name = arg.arg
|
||||||
|
if arg_name not in [p.get("name") for p in func_info["args"]]:
|
||||||
|
func_info["args"].append({
|
||||||
|
"name": arg_name,
|
||||||
|
"type": "",
|
||||||
|
"description": ""
|
||||||
|
})
|
||||||
|
|
||||||
|
return func_info
|
||||||
|
|
||||||
|
def _parse_class(self, node: ast.ClassDef, source_code: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Parse a class node and extract its documentation."""
|
||||||
|
docstring = ast.get_docstring(node)
|
||||||
|
class_info = {
|
||||||
|
"name": node.name,
|
||||||
|
"description": None,
|
||||||
|
"methods": [],
|
||||||
|
"line_number": node.lineno,
|
||||||
|
}
|
||||||
|
|
||||||
|
if docstring:
|
||||||
|
class_info["description"] = docstring.strip().split("\n")[0]
|
||||||
|
|
||||||
|
for item in node.body:
|
||||||
|
if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef):
|
||||||
|
if not item.name.startswith("_"):
|
||||||
|
method_doc = self._parse_function(item, source_code)
|
||||||
|
if method_doc:
|
||||||
|
class_info["methods"].append(method_doc)
|
||||||
|
|
||||||
|
return class_info
|
||||||
|
|
||||||
|
def _parse_docstring(self, docstring: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a docstring and extract sections."""
|
||||||
|
result = {
|
||||||
|
"description": "",
|
||||||
|
"args": [],
|
||||||
|
"returns": None,
|
||||||
|
"raises": [],
|
||||||
|
"examples": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
if not docstring:
|
||||||
|
return result
|
||||||
|
|
||||||
|
lines = docstring.strip().split("\n")
|
||||||
|
i = 0
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
if not line:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
line_lower = line.lower()
|
||||||
|
|
||||||
|
if line_lower.startswith(("args:", "arguments:", "parameters:")):
|
||||||
|
i += 1
|
||||||
|
result["args"] = self._parse_args_section(lines, i)
|
||||||
|
while i < len(lines) and lines[i].strip():
|
||||||
|
if lines[i].strip().lower() in ["returns:", "raises:", "examples:", "example:"]:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif line_lower.startswith("returns:"):
|
||||||
|
i += 1
|
||||||
|
if i < len(lines):
|
||||||
|
returns_line = lines[i].strip()
|
||||||
|
result["returns"] = self._parse_returns(returns_line)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif line_lower.startswith("raises:"):
|
||||||
|
i += 1
|
||||||
|
result["raises"] = self._parse_raises_section(lines, i)
|
||||||
|
while i < len(lines) and lines[i].strip():
|
||||||
|
if lines[i].strip().lower() in ["examples:", "example:"]:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif line_lower.startswith(("example:", "examples:")):
|
||||||
|
i += 1
|
||||||
|
result["examples"] = self._parse_examples_section(lines, i)
|
||||||
|
break
|
||||||
|
|
||||||
|
elif ":param " in line or ":parameter " in line or line.startswith("@param"):
|
||||||
|
param = self._parse_param_tag(line)
|
||||||
|
if param:
|
||||||
|
result["args"].append(param)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif ":return:" in line or ":returns:" in line or line.startswith("@return") or line.startswith("@returns"):
|
||||||
|
if i + 1 < len(lines):
|
||||||
|
result["returns"] = self._parse_returns(lines[i + 1].strip())
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif line.startswith(":raises") or line.startswith("@raises"):
|
||||||
|
raise_info = self._parse_raise_tag(line)
|
||||||
|
if raise_info:
|
||||||
|
result["raises"].append(raise_info)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
elif not result["description"]:
|
||||||
|
result["description"] = line
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _parse_args_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
|
||||||
|
"""Parse the Args section of a docstring."""
|
||||||
|
args = []
|
||||||
|
|
||||||
|
for i in range(start, len(lines)):
|
||||||
|
line = lines[i].strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.lower() in ["returns:", "raises:", "examples:", "example:"]:
|
||||||
|
break
|
||||||
|
|
||||||
|
if line.startswith("- ") or line.startswith("* "):
|
||||||
|
line = line[2:]
|
||||||
|
elif not line[0:1].isalnum() and line[0] != ':':
|
||||||
|
continue
|
||||||
|
|
||||||
|
parts = line.split(":", 1)
|
||||||
|
name = parts[0].strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
arg = {"name": name, "type": "", "description": ""}
|
||||||
|
if len(parts) > 1:
|
||||||
|
rest = parts[1].strip()
|
||||||
|
type_parts = rest.split(None, 1)
|
||||||
|
if type_parts and type_parts[0] in ["int", "str", "float", "bool", "list", "dict", "tuple", "set", "any", "Optional", "List", "Dict", "str,int", "number"]:
|
||||||
|
arg["type"] = type_parts[0]
|
||||||
|
if len(type_parts) > 1:
|
||||||
|
arg["description"] = type_parts[1].strip()
|
||||||
|
else:
|
||||||
|
arg["description"] = rest
|
||||||
|
|
||||||
|
args.append(arg)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
def _parse_returns(self, line: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Parse a returns line."""
|
||||||
|
if not line:
|
||||||
|
return None
|
||||||
|
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
if not line:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if line.startswith(":"):
|
||||||
|
line = line[1:].strip()
|
||||||
|
|
||||||
|
return {"type": "", "description": line}
|
||||||
|
|
||||||
|
def _parse_raises_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
|
||||||
|
"""Parse the Raises section of a docstring."""
|
||||||
|
raises = []
|
||||||
|
|
||||||
|
for i in range(start, len(lines)):
|
||||||
|
line = lines[i].strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.lower() in ["examples:", "example:"]:
|
||||||
|
break
|
||||||
|
|
||||||
|
if line.startswith("- ") or line.startswith("* "):
|
||||||
|
line = line[2:]
|
||||||
|
|
||||||
|
parts = line.split(":", 1)
|
||||||
|
exception = parts[0].strip()
|
||||||
|
description = parts[1].strip() if len(parts) > 1 else ""
|
||||||
|
if exception:
|
||||||
|
raises.append({"exception": exception, "description": description})
|
||||||
|
|
||||||
|
return raises
|
||||||
|
|
||||||
|
def _parse_examples_section(self, lines: List[str], start: int) -> List[str]:
|
||||||
|
"""Parse the Examples section of a docstring."""
|
||||||
|
examples = []
|
||||||
|
current_example = []
|
||||||
|
|
||||||
|
for i in range(start, len(lines)):
|
||||||
|
line = lines[i]
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped:
|
||||||
|
continue
|
||||||
|
if stripped.lower() in ["", "returns:", "raises:"]:
|
||||||
|
if current_example:
|
||||||
|
examples.append("\n".join(current_example))
|
||||||
|
current_example = []
|
||||||
|
break
|
||||||
|
|
||||||
|
if stripped and not stripped.startswith("#"):
|
||||||
|
if line.startswith(" ") or line.startswith("\t") or current_example or stripped.startswith(">>>"):
|
||||||
|
current_example.append(stripped)
|
||||||
|
elif current_example:
|
||||||
|
examples.append("\n".join(current_example))
|
||||||
|
current_example = [stripped]
|
||||||
|
|
||||||
|
if current_example:
|
||||||
|
examples.append("\n".join(current_example))
|
||||||
|
|
||||||
|
return examples
|
||||||
|
|
||||||
|
def _parse_param_tag(self, line: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Parse a :param: style tag."""
|
||||||
|
try:
|
||||||
|
tag_content = line.replace(":param", "").replace(":parameter", "").replace("@param", "").strip()
|
||||||
|
if not tag_content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parts = tag_content.split(None, 2)
|
||||||
|
if len(parts) >= 2:
|
||||||
|
param_type = parts[0] if parts[0].startswith("{") else ""
|
||||||
|
name = parts[1] if not parts[0].startswith("{") else parts[1]
|
||||||
|
description = parts[2] if len(parts) > 2 else ""
|
||||||
|
|
||||||
|
if name.startswith("{") and not param_type:
|
||||||
|
name_parts = name.split("}", 1)
|
||||||
|
if len(name_parts) > 1:
|
||||||
|
param_type = name_parts[0].replace("{", "")
|
||||||
|
name = name_parts[1].strip()
|
||||||
|
|
||||||
|
return {"name": name, "type": param_type.replace("{", "").replace("}", ""), "description": description}
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_raise_tag(self, line: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Parse a :raises: style tag."""
|
||||||
|
try:
|
||||||
|
tag_content = line.replace(":raises", "").replace("@raises", "").strip()
|
||||||
|
if not tag_content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parts = tag_content.split(":", 1)
|
||||||
|
exception = parts[0].strip()
|
||||||
|
description = parts[1].strip() if len(parts) > 1 else ""
|
||||||
|
|
||||||
|
return {"exception": exception, "description": description}
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_python_file(file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Parse a Python file and extract documentation."""
|
||||||
|
parser = PythonDocstringParser()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
source_code = f.read()
|
||||||
|
|
||||||
|
result = parser.parse(source_code)
|
||||||
|
result["file_path"] = str(file_path)
|
||||||
|
result["language"] = "python"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="latin-1") as f:
|
||||||
|
source_code = f.read()
|
||||||
|
|
||||||
|
result = parser.parse(source_code)
|
||||||
|
result["file_path"] = str(file_path)
|
||||||
|
result["language"] = "python"
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error reading Python file {file_path}: {e}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error parsing Python file {file_path}: {e}")
|
||||||
Reference in New Issue
Block a user