Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
316
doc2man/parsers/javascript.py
Normal file
316
doc2man/parsers/javascript.py
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
"""JavaScript docstring parser for Doc2Man."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class JavaScriptDocstringParser:
|
||||||
|
"""Parser for JSDoc comments from JavaScript/TypeScript files."""
|
||||||
|
|
||||||
|
JSDOC_PATTERN = re.compile(
|
||||||
|
r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/',
|
||||||
|
re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
TAG_PATTERN = re.compile(
|
||||||
|
r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?',
|
||||||
|
re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the parser."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse_file(self, file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Parse a JavaScript/TypeScript file and extract documentation."""
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
return self.parse_content(content, str(file_path))
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="latin-1") as f:
|
||||||
|
content = f.read()
|
||||||
|
return self.parse_content(content, str(file_path))
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error reading file {file_path}: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error reading file {file_path}: {e}")
|
||||||
|
|
||||||
|
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
|
||||||
|
"""Parse JavaScript/TypeScript content and extract documentation."""
|
||||||
|
result = {
|
||||||
|
"title": None,
|
||||||
|
"description": None,
|
||||||
|
"module_docstring": None,
|
||||||
|
"functions": [],
|
||||||
|
"classes": [],
|
||||||
|
"file_path": file_path,
|
||||||
|
"language": "javascript",
|
||||||
|
}
|
||||||
|
|
||||||
|
module_doc = self._extract_module_documentation(content)
|
||||||
|
if module_doc:
|
||||||
|
result["module_docstring"] = module_doc.get("description")
|
||||||
|
result["description"] = module_doc.get("description")
|
||||||
|
result["title"] = module_doc.get("title")
|
||||||
|
|
||||||
|
functions = self._extract_functions(content)
|
||||||
|
result["functions"] = functions
|
||||||
|
|
||||||
|
classes = self._extract_classes(content)
|
||||||
|
result["classes"] = classes
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Extract module/file-level documentation."""
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped.startswith("import ") or stripped.startswith("export "):
|
||||||
|
break
|
||||||
|
if stripped.startswith("/**"):
|
||||||
|
comment = self._extract_jsdoc_block(content, i)
|
||||||
|
if comment:
|
||||||
|
return self._parse_jsdoc_comment(comment)
|
||||||
|
break
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]:
|
||||||
|
"""Extract a JSDoc block starting at the given position."""
|
||||||
|
lines = content.split("\n")
|
||||||
|
if start >= len(lines):
|
||||||
|
return None
|
||||||
|
|
||||||
|
line = lines[start]
|
||||||
|
if not line.strip().startswith("/**"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
end = start + 1
|
||||||
|
while end < len(lines):
|
||||||
|
if "*/" in lines[end]:
|
||||||
|
break
|
||||||
|
end += 1
|
||||||
|
|
||||||
|
block = "\n".join(lines[start:end + 1])
|
||||||
|
match = self.JSDOC_PATTERN.search(block)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a JSDoc comment and extract its components."""
|
||||||
|
result = {
|
||||||
|
"description": "",
|
||||||
|
"params": [],
|
||||||
|
"returns": None,
|
||||||
|
"examples": [],
|
||||||
|
"tags": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
lines = comment.split("\n")
|
||||||
|
description_lines = []
|
||||||
|
current_tag = None
|
||||||
|
current_tag_content = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
if stripped.startswith("*"):
|
||||||
|
stripped = stripped[1:].strip()
|
||||||
|
|
||||||
|
if stripped.startswith("@"):
|
||||||
|
if current_tag and current_tag_content:
|
||||||
|
self._add_tag_content(result, current_tag, current_tag_content)
|
||||||
|
match = self.TAG_PATTERN.match(stripped)
|
||||||
|
if match:
|
||||||
|
current_tag = match.group(1)
|
||||||
|
current_tag_content = []
|
||||||
|
type_hint = match.group(2)
|
||||||
|
name = match.group(3)
|
||||||
|
description = match.group(4) or ""
|
||||||
|
if type_hint:
|
||||||
|
current_tag_content.append(f"type:{type_hint}")
|
||||||
|
if name:
|
||||||
|
current_tag_content.append(f"name:{name}")
|
||||||
|
if description:
|
||||||
|
current_tag_content.append(description)
|
||||||
|
else:
|
||||||
|
current_tag = None
|
||||||
|
current_tag_content = []
|
||||||
|
elif current_tag:
|
||||||
|
current_tag_content.append(stripped)
|
||||||
|
elif stripped:
|
||||||
|
description_lines.append(stripped)
|
||||||
|
|
||||||
|
if current_tag and current_tag_content:
|
||||||
|
self._add_tag_content(result, current_tag, current_tag_content)
|
||||||
|
|
||||||
|
result["description"] = "\n".join(description_lines).strip()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None:
|
||||||
|
"""Add parsed tag content to the result."""
|
||||||
|
combined = " ".join(content).strip()
|
||||||
|
|
||||||
|
if tag == "param":
|
||||||
|
param = self._parse_param_tag(combined)
|
||||||
|
if param:
|
||||||
|
result["params"].append(param)
|
||||||
|
elif tag == "returns" or tag == "return":
|
||||||
|
ret = self._parse_returns_tag(combined)
|
||||||
|
if ret:
|
||||||
|
result["returns"] = ret
|
||||||
|
elif tag == "example":
|
||||||
|
result["examples"].append(combined)
|
||||||
|
elif tag == "examples":
|
||||||
|
for line in content:
|
||||||
|
if line.strip():
|
||||||
|
result["examples"].append(line.strip())
|
||||||
|
else:
|
||||||
|
result["tags"][tag] = combined
|
||||||
|
|
||||||
|
def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Parse a @param tag."""
|
||||||
|
param = {"name": "", "type": "", "description": ""}
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
param["type"] = match.group(1) or ""
|
||||||
|
param["name"] = match.group(2) or ""
|
||||||
|
param["description"] = (match.group(3) or "").strip()
|
||||||
|
|
||||||
|
if not param["name"]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return param
|
||||||
|
|
||||||
|
def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Parse a @returns tag."""
|
||||||
|
ret = {"type": "", "description": ""}
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
ret["type"] = match.group(1) or ""
|
||||||
|
ret["description"] = (match.group(2) or "").strip()
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Extract function documentation from source."""
|
||||||
|
functions = []
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
funcs = [
|
||||||
|
(r'export\s+async\s+function\s+(\w+)', True),
|
||||||
|
(r'export\s+function\s+(\w+)', True),
|
||||||
|
(r'export\s+const\s+(\w+)\s*=', True),
|
||||||
|
(r'export\s+let\s+(\w+)\s*=', True),
|
||||||
|
(r'async\s+function\s+(\w+)', True),
|
||||||
|
(r'^function\s+(\w+)', True),
|
||||||
|
(r'const\s+(\w+)\s*=\s*function', True),
|
||||||
|
(r'const\s+(\w+)\s*=\s*async', True),
|
||||||
|
(r'let\s+(\w+)\s*=\s*function', True),
|
||||||
|
(r'let\s+(\w+)\s*=\s*async', True),
|
||||||
|
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
|
||||||
|
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
|
||||||
|
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
|
||||||
|
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern, has_jsdoc in funcs:
|
||||||
|
match = re.search(pattern, stripped)
|
||||||
|
if match:
|
||||||
|
func_name = match.group(1)
|
||||||
|
if func_name.startswith("_"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
func_doc = {
|
||||||
|
"name": func_name,
|
||||||
|
"description": "",
|
||||||
|
"args": [],
|
||||||
|
"returns": None,
|
||||||
|
"examples": [],
|
||||||
|
"line_number": line_num + 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||||
|
if jsdoc:
|
||||||
|
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||||
|
func_doc["description"] = parsed.get("description", "")
|
||||||
|
func_doc["args"] = parsed.get("params", [])
|
||||||
|
func_doc["returns"] = parsed.get("returns")
|
||||||
|
func_doc["examples"] = parsed.get("examples", [])
|
||||||
|
|
||||||
|
functions.append(func_doc)
|
||||||
|
break
|
||||||
|
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def _extract_classes(self, content: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Extract class documentation from source."""
|
||||||
|
classes = []
|
||||||
|
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
export_match = re.search(r'export\s+class\s+(\w+)', stripped)
|
||||||
|
if export_match:
|
||||||
|
class_name = export_match.group(1)
|
||||||
|
class_doc = {
|
||||||
|
"name": class_name,
|
||||||
|
"description": "",
|
||||||
|
"methods": [],
|
||||||
|
"line_number": line_num + 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||||
|
if jsdoc:
|
||||||
|
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||||
|
class_doc["description"] = parsed.get("description", "")
|
||||||
|
|
||||||
|
classes.append(class_doc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if re.match(r'^class\s+(\w+)', stripped):
|
||||||
|
match = re.match(r'^class\s+(\w+)', stripped)
|
||||||
|
if match:
|
||||||
|
class_name = match.group(1)
|
||||||
|
class_doc = {
|
||||||
|
"name": class_name,
|
||||||
|
"description": "",
|
||||||
|
"methods": [],
|
||||||
|
"line_number": line_num + 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||||
|
if jsdoc:
|
||||||
|
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||||
|
class_doc["description"] = parsed.get("description", "")
|
||||||
|
|
||||||
|
classes.append(class_doc)
|
||||||
|
|
||||||
|
return classes
|
||||||
|
|
||||||
|
|
||||||
|
def parse_javascript_file(file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Parse a JavaScript/TypeScript file and extract documentation."""
|
||||||
|
parser = JavaScriptDocstringParser()
|
||||||
|
return parser.parse_file(file_path)
|
||||||
Reference in New Issue
Block a user