Files
doc2man/doc2man/parsers/javascript.py
7000pctAUTO 3ac81c4290
Some checks failed
CI / build (push) Has been cancelled
CI / test (push) Has been cancelled
Add parsers: Python, Go, and JavaScript docstring parsers
2026-01-31 00:56:37 +00:00

317 lines
11 KiB
Python

"""JavaScript docstring parser for Doc2Man."""
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
class JavaScriptDocstringParser:
"""Parser for JSDoc comments from JavaScript/TypeScript files."""
JSDOC_PATTERN = re.compile(
r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/',
re.DOTALL
)
TAG_PATTERN = re.compile(
r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?',
re.DOTALL
)
def __init__(self):
"""Initialize the parser."""
pass
def parse_file(self, file_path: Path) -> Dict[str, Any]:
"""Parse a JavaScript/TypeScript file and extract documentation."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
return self.parse_content(content, str(file_path))
except UnicodeDecodeError:
try:
with open(file_path, "r", encoding="latin-1") as f:
content = f.read()
return self.parse_content(content, str(file_path))
except Exception as e:
raise ValueError(f"Error reading file {file_path}: {e}")
except Exception as e:
raise ValueError(f"Error reading file {file_path}: {e}")
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
"""Parse JavaScript/TypeScript content and extract documentation."""
result = {
"title": None,
"description": None,
"module_docstring": None,
"functions": [],
"classes": [],
"file_path": file_path,
"language": "javascript",
}
module_doc = self._extract_module_documentation(content)
if module_doc:
result["module_docstring"] = module_doc.get("description")
result["description"] = module_doc.get("description")
result["title"] = module_doc.get("title")
functions = self._extract_functions(content)
result["functions"] = functions
classes = self._extract_classes(content)
result["classes"] = classes
return result
def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]:
"""Extract module/file-level documentation."""
lines = content.split("\n")
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("import ") or stripped.startswith("export "):
break
if stripped.startswith("/**"):
comment = self._extract_jsdoc_block(content, i)
if comment:
return self._parse_jsdoc_comment(comment)
break
return None
def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]:
"""Extract a JSDoc block starting at the given position."""
lines = content.split("\n")
if start >= len(lines):
return None
line = lines[start]
if not line.strip().startswith("/**"):
return None
end = start + 1
while end < len(lines):
if "*/" in lines[end]:
break
end += 1
block = "\n".join(lines[start:end + 1])
match = self.JSDOC_PATTERN.search(block)
if match:
return match.group(1).strip()
return None
def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
"""Parse a JSDoc comment and extract its components."""
result = {
"description": "",
"params": [],
"returns": None,
"examples": [],
"tags": {},
}
lines = comment.split("\n")
description_lines = []
current_tag = None
current_tag_content = []
for line in lines:
stripped = line.strip()
if stripped.startswith("*"):
stripped = stripped[1:].strip()
if stripped.startswith("@"):
if current_tag and current_tag_content:
self._add_tag_content(result, current_tag, current_tag_content)
match = self.TAG_PATTERN.match(stripped)
if match:
current_tag = match.group(1)
current_tag_content = []
type_hint = match.group(2)
name = match.group(3)
description = match.group(4) or ""
if type_hint:
current_tag_content.append(f"type:{type_hint}")
if name:
current_tag_content.append(f"name:{name}")
if description:
current_tag_content.append(description)
else:
current_tag = None
current_tag_content = []
elif current_tag:
current_tag_content.append(stripped)
elif stripped:
description_lines.append(stripped)
if current_tag and current_tag_content:
self._add_tag_content(result, current_tag, current_tag_content)
result["description"] = "\n".join(description_lines).strip()
return result
def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None:
"""Add parsed tag content to the result."""
combined = " ".join(content).strip()
if tag == "param":
param = self._parse_param_tag(combined)
if param:
result["params"].append(param)
elif tag == "returns" or tag == "return":
ret = self._parse_returns_tag(combined)
if ret:
result["returns"] = ret
elif tag == "example":
result["examples"].append(combined)
elif tag == "examples":
for line in content:
if line.strip():
result["examples"].append(line.strip())
else:
result["tags"][tag] = combined
def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]:
"""Parse a @param tag."""
param = {"name": "", "type": "", "description": ""}
if not content:
return None
match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL)
if match:
param["type"] = match.group(1) or ""
param["name"] = match.group(2) or ""
param["description"] = (match.group(3) or "").strip()
if not param["name"]:
return None
return param
def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]:
"""Parse a @returns tag."""
ret = {"type": "", "description": ""}
if not content:
return None
match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL)
if match:
ret["type"] = match.group(1) or ""
ret["description"] = (match.group(2) or "").strip()
return ret
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
"""Extract function documentation from source."""
functions = []
lines = content.split("\n")
for line_num, line in enumerate(lines):
stripped = line.strip()
funcs = [
(r'export\s+async\s+function\s+(\w+)', True),
(r'export\s+function\s+(\w+)', True),
(r'export\s+const\s+(\w+)\s*=', True),
(r'export\s+let\s+(\w+)\s*=', True),
(r'async\s+function\s+(\w+)', True),
(r'^function\s+(\w+)', True),
(r'const\s+(\w+)\s*=\s*function', True),
(r'const\s+(\w+)\s*=\s*async', True),
(r'let\s+(\w+)\s*=\s*function', True),
(r'let\s+(\w+)\s*=\s*async', True),
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
]
for pattern, has_jsdoc in funcs:
match = re.search(pattern, stripped)
if match:
func_name = match.group(1)
if func_name.startswith("_"):
continue
func_doc = {
"name": func_name,
"description": "",
"args": [],
"returns": None,
"examples": [],
"line_number": line_num + 1,
}
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
if jsdoc:
parsed = self._parse_jsdoc_comment(jsdoc)
func_doc["description"] = parsed.get("description", "")
func_doc["args"] = parsed.get("params", [])
func_doc["returns"] = parsed.get("returns")
func_doc["examples"] = parsed.get("examples", [])
functions.append(func_doc)
break
return functions
def _extract_classes(self, content: str) -> List[Dict[str, Any]]:
"""Extract class documentation from source."""
classes = []
lines = content.split("\n")
for line_num, line in enumerate(lines):
stripped = line.strip()
export_match = re.search(r'export\s+class\s+(\w+)', stripped)
if export_match:
class_name = export_match.group(1)
class_doc = {
"name": class_name,
"description": "",
"methods": [],
"line_number": line_num + 1,
}
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
if jsdoc:
parsed = self._parse_jsdoc_comment(jsdoc)
class_doc["description"] = parsed.get("description", "")
classes.append(class_doc)
continue
if re.match(r'^class\s+(\w+)', stripped):
match = re.match(r'^class\s+(\w+)', stripped)
if match:
class_name = match.group(1)
class_doc = {
"name": class_name,
"description": "",
"methods": [],
"line_number": line_num + 1,
}
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
if jsdoc:
parsed = self._parse_jsdoc_comment(jsdoc)
class_doc["description"] = parsed.get("description", "")
classes.append(class_doc)
return classes
def parse_javascript_file(file_path: Path) -> Dict[str, Any]:
"""Parse a JavaScript/TypeScript file and extract documentation."""
parser = JavaScriptDocstringParser()
return parser.parse_file(file_path)