Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
364
doc2man/parsers/python.py
Normal file
364
doc2man/parsers/python.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""Python docstring parser for Doc2Man."""
|
||||
|
||||
import ast
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class PythonDocstringParser:
|
||||
"""Parser for Python docstrings supporting Google-style, NumPy-style, and ReST formats."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the parser."""
|
||||
pass
|
||||
|
||||
def parse(self, source_code: str) -> Dict[str, Any]:
|
||||
"""Parse a Python source file and extract documentation.
|
||||
|
||||
Args:
|
||||
source_code: The Python source code to parse.
|
||||
|
||||
Returns:
|
||||
Dictionary containing parsed documentation.
|
||||
"""
|
||||
result = {
|
||||
"title": None,
|
||||
"description": None,
|
||||
"module_docstring": None,
|
||||
"functions": [],
|
||||
"classes": [],
|
||||
}
|
||||
|
||||
try:
|
||||
tree = ast.parse(source_code)
|
||||
except SyntaxError:
|
||||
return result
|
||||
|
||||
module_docstring = ast.get_docstring(tree)
|
||||
if module_docstring:
|
||||
result["module_docstring"] = module_docstring
|
||||
result["description"] = module_docstring
|
||||
result["title"] = self._extract_title(module_docstring)
|
||||
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||
func_doc = self._parse_function(node, source_code)
|
||||
if func_doc:
|
||||
result["functions"].append(func_doc)
|
||||
|
||||
elif isinstance(node, ast.ClassDef):
|
||||
class_doc = self._parse_class(node, source_code)
|
||||
if class_doc:
|
||||
result["classes"].append(class_doc)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_title(self, docstring: str) -> Optional[str]:
|
||||
"""Extract the first line as title."""
|
||||
if docstring:
|
||||
lines = docstring.strip().split("\n")
|
||||
return lines[0].strip() if lines else None
|
||||
return None
|
||||
|
||||
def _parse_function(self, node: ast.FunctionDef, source_code: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parse a function node and extract its documentation."""
|
||||
docstring = ast.get_docstring(node)
|
||||
func_info = {
|
||||
"name": node.name,
|
||||
"description": None,
|
||||
"args": [],
|
||||
"returns": None,
|
||||
"raises": [],
|
||||
"examples": [],
|
||||
"line_number": node.lineno,
|
||||
}
|
||||
|
||||
if docstring:
|
||||
parsed = self._parse_docstring(docstring)
|
||||
func_info["description"] = parsed.get("description")
|
||||
func_info["args"] = parsed.get("args", [])
|
||||
func_info["returns"] = parsed.get("returns")
|
||||
func_info["raises"] = parsed.get("raises", [])
|
||||
func_info["examples"] = parsed.get("examples", [])
|
||||
|
||||
for arg in node.args.args:
|
||||
arg_name = arg.arg
|
||||
if arg_name not in [p.get("name") for p in func_info["args"]]:
|
||||
func_info["args"].append({
|
||||
"name": arg_name,
|
||||
"type": "",
|
||||
"description": ""
|
||||
})
|
||||
|
||||
return func_info
|
||||
|
||||
def _parse_class(self, node: ast.ClassDef, source_code: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parse a class node and extract its documentation."""
|
||||
docstring = ast.get_docstring(node)
|
||||
class_info = {
|
||||
"name": node.name,
|
||||
"description": None,
|
||||
"methods": [],
|
||||
"line_number": node.lineno,
|
||||
}
|
||||
|
||||
if docstring:
|
||||
class_info["description"] = docstring.strip().split("\n")[0]
|
||||
|
||||
for item in node.body:
|
||||
if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef):
|
||||
if not item.name.startswith("_"):
|
||||
method_doc = self._parse_function(item, source_code)
|
||||
if method_doc:
|
||||
class_info["methods"].append(method_doc)
|
||||
|
||||
return class_info
|
||||
|
||||
def _parse_docstring(self, docstring: str) -> Dict[str, Any]:
|
||||
"""Parse a docstring and extract sections."""
|
||||
result = {
|
||||
"description": "",
|
||||
"args": [],
|
||||
"returns": None,
|
||||
"raises": [],
|
||||
"examples": [],
|
||||
}
|
||||
|
||||
if not docstring:
|
||||
return result
|
||||
|
||||
lines = docstring.strip().split("\n")
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
line_lower = line.lower()
|
||||
|
||||
if line_lower.startswith(("args:", "arguments:", "parameters:")):
|
||||
i += 1
|
||||
result["args"] = self._parse_args_section(lines, i)
|
||||
while i < len(lines) and lines[i].strip():
|
||||
if lines[i].strip().lower() in ["returns:", "raises:", "examples:", "example:"]:
|
||||
break
|
||||
i += 1
|
||||
|
||||
elif line_lower.startswith("returns:"):
|
||||
i += 1
|
||||
if i < len(lines):
|
||||
returns_line = lines[i].strip()
|
||||
result["returns"] = self._parse_returns(returns_line)
|
||||
i += 1
|
||||
|
||||
elif line_lower.startswith("raises:"):
|
||||
i += 1
|
||||
result["raises"] = self._parse_raises_section(lines, i)
|
||||
while i < len(lines) and lines[i].strip():
|
||||
if lines[i].strip().lower() in ["examples:", "example:"]:
|
||||
break
|
||||
i += 1
|
||||
|
||||
elif line_lower.startswith(("example:", "examples:")):
|
||||
i += 1
|
||||
result["examples"] = self._parse_examples_section(lines, i)
|
||||
break
|
||||
|
||||
elif ":param " in line or ":parameter " in line or line.startswith("@param"):
|
||||
param = self._parse_param_tag(line)
|
||||
if param:
|
||||
result["args"].append(param)
|
||||
i += 1
|
||||
|
||||
elif ":return:" in line or ":returns:" in line or line.startswith("@return") or line.startswith("@returns"):
|
||||
if i + 1 < len(lines):
|
||||
result["returns"] = self._parse_returns(lines[i + 1].strip())
|
||||
i += 1
|
||||
|
||||
elif line.startswith(":raises") or line.startswith("@raises"):
|
||||
raise_info = self._parse_raise_tag(line)
|
||||
if raise_info:
|
||||
result["raises"].append(raise_info)
|
||||
i += 1
|
||||
|
||||
elif not result["description"]:
|
||||
result["description"] = line
|
||||
i += 1
|
||||
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return result
|
||||
|
||||
def _parse_args_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
|
||||
"""Parse the Args section of a docstring."""
|
||||
args = []
|
||||
|
||||
for i in range(start, len(lines)):
|
||||
line = lines[i].strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.lower() in ["returns:", "raises:", "examples:", "example:"]:
|
||||
break
|
||||
|
||||
if line.startswith("- ") or line.startswith("* "):
|
||||
line = line[2:]
|
||||
elif not line[0:1].isalnum() and line[0] != ':':
|
||||
continue
|
||||
|
||||
parts = line.split(":", 1)
|
||||
name = parts[0].strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
arg = {"name": name, "type": "", "description": ""}
|
||||
if len(parts) > 1:
|
||||
rest = parts[1].strip()
|
||||
type_parts = rest.split(None, 1)
|
||||
if type_parts and type_parts[0] in ["int", "str", "float", "bool", "list", "dict", "tuple", "set", "any", "Optional", "List", "Dict", "str,int", "number"]:
|
||||
arg["type"] = type_parts[0]
|
||||
if len(type_parts) > 1:
|
||||
arg["description"] = type_parts[1].strip()
|
||||
else:
|
||||
arg["description"] = rest
|
||||
|
||||
args.append(arg)
|
||||
|
||||
return args
|
||||
|
||||
def _parse_returns(self, line: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse a returns line."""
|
||||
if not line:
|
||||
return None
|
||||
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
return None
|
||||
|
||||
if line.startswith(":"):
|
||||
line = line[1:].strip()
|
||||
|
||||
return {"type": "", "description": line}
|
||||
|
||||
def _parse_raises_section(self, lines: List[str], start: int) -> List[Dict[str, str]]:
|
||||
"""Parse the Raises section of a docstring."""
|
||||
raises = []
|
||||
|
||||
for i in range(start, len(lines)):
|
||||
line = lines[i].strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.lower() in ["examples:", "example:"]:
|
||||
break
|
||||
|
||||
if line.startswith("- ") or line.startswith("* "):
|
||||
line = line[2:]
|
||||
|
||||
parts = line.split(":", 1)
|
||||
exception = parts[0].strip()
|
||||
description = parts[1].strip() if len(parts) > 1 else ""
|
||||
if exception:
|
||||
raises.append({"exception": exception, "description": description})
|
||||
|
||||
return raises
|
||||
|
||||
def _parse_examples_section(self, lines: List[str], start: int) -> List[str]:
|
||||
"""Parse the Examples section of a docstring."""
|
||||
examples = []
|
||||
current_example = []
|
||||
|
||||
for i in range(start, len(lines)):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.lower() in ["", "returns:", "raises:"]:
|
||||
if current_example:
|
||||
examples.append("\n".join(current_example))
|
||||
current_example = []
|
||||
break
|
||||
|
||||
if stripped and not stripped.startswith("#"):
|
||||
if line.startswith(" ") or line.startswith("\t") or current_example or stripped.startswith(">>>"):
|
||||
current_example.append(stripped)
|
||||
elif current_example:
|
||||
examples.append("\n".join(current_example))
|
||||
current_example = [stripped]
|
||||
|
||||
if current_example:
|
||||
examples.append("\n".join(current_example))
|
||||
|
||||
return examples
|
||||
|
||||
def _parse_param_tag(self, line: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse a :param: style tag."""
|
||||
try:
|
||||
tag_content = line.replace(":param", "").replace(":parameter", "").replace("@param", "").strip()
|
||||
if not tag_content:
|
||||
return None
|
||||
|
||||
parts = tag_content.split(None, 2)
|
||||
if len(parts) >= 2:
|
||||
param_type = parts[0] if parts[0].startswith("{") else ""
|
||||
name = parts[1] if not parts[0].startswith("{") else parts[1]
|
||||
description = parts[2] if len(parts) > 2 else ""
|
||||
|
||||
if name.startswith("{") and not param_type:
|
||||
name_parts = name.split("}", 1)
|
||||
if len(name_parts) > 1:
|
||||
param_type = name_parts[0].replace("{", "")
|
||||
name = name_parts[1].strip()
|
||||
|
||||
return {"name": name, "type": param_type.replace("{", "").replace("}", ""), "description": description}
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_raise_tag(self, line: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse a :raises: style tag."""
|
||||
try:
|
||||
tag_content = line.replace(":raises", "").replace("@raises", "").strip()
|
||||
if not tag_content:
|
||||
return None
|
||||
|
||||
parts = tag_content.split(":", 1)
|
||||
exception = parts[0].strip()
|
||||
description = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
return {"exception": exception, "description": description}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def parse_python_file(file_path: Path) -> Dict[str, Any]:
|
||||
"""Parse a Python file and extract documentation."""
|
||||
parser = PythonDocstringParser()
|
||||
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
source_code = f.read()
|
||||
|
||||
result = parser.parse(source_code)
|
||||
result["file_path"] = str(file_path)
|
||||
result["language"] = "python"
|
||||
|
||||
return result
|
||||
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
with open(file_path, "r", encoding="latin-1") as f:
|
||||
source_code = f.read()
|
||||
|
||||
result = parser.parse(source_code)
|
||||
result["file_path"] = str(file_path)
|
||||
result["language"] = "python"
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading Python file {file_path}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error parsing Python file {file_path}: {e}")
|
||||
Reference in New Issue
Block a user