Add parsers: Python, Go, and JavaScript docstring parsers
This commit is contained in:
316
doc2man/parsers/javascript.py
Normal file
316
doc2man/parsers/javascript.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""JavaScript docstring parser for Doc2Man."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class JavaScriptDocstringParser:
|
||||
"""Parser for JSDoc comments from JavaScript/TypeScript files."""
|
||||
|
||||
JSDOC_PATTERN = re.compile(
|
||||
r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
TAG_PATTERN = re.compile(
|
||||
r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the parser."""
|
||||
pass
|
||||
|
||||
def parse_file(self, file_path: Path) -> Dict[str, Any]:
|
||||
"""Parse a JavaScript/TypeScript file and extract documentation."""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
return self.parse_content(content, str(file_path))
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
with open(file_path, "r", encoding="latin-1") as f:
|
||||
content = f.read()
|
||||
return self.parse_content(content, str(file_path))
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading file {file_path}: {e}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading file {file_path}: {e}")
|
||||
|
||||
def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
|
||||
"""Parse JavaScript/TypeScript content and extract documentation."""
|
||||
result = {
|
||||
"title": None,
|
||||
"description": None,
|
||||
"module_docstring": None,
|
||||
"functions": [],
|
||||
"classes": [],
|
||||
"file_path": file_path,
|
||||
"language": "javascript",
|
||||
}
|
||||
|
||||
module_doc = self._extract_module_documentation(content)
|
||||
if module_doc:
|
||||
result["module_docstring"] = module_doc.get("description")
|
||||
result["description"] = module_doc.get("description")
|
||||
result["title"] = module_doc.get("title")
|
||||
|
||||
functions = self._extract_functions(content)
|
||||
result["functions"] = functions
|
||||
|
||||
classes = self._extract_classes(content)
|
||||
result["classes"] = classes
|
||||
|
||||
return result
|
||||
|
||||
def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]:
|
||||
"""Extract module/file-level documentation."""
|
||||
lines = content.split("\n")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("import ") or stripped.startswith("export "):
|
||||
break
|
||||
if stripped.startswith("/**"):
|
||||
comment = self._extract_jsdoc_block(content, i)
|
||||
if comment:
|
||||
return self._parse_jsdoc_comment(comment)
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]:
|
||||
"""Extract a JSDoc block starting at the given position."""
|
||||
lines = content.split("\n")
|
||||
if start >= len(lines):
|
||||
return None
|
||||
|
||||
line = lines[start]
|
||||
if not line.strip().startswith("/**"):
|
||||
return None
|
||||
|
||||
end = start + 1
|
||||
while end < len(lines):
|
||||
if "*/" in lines[end]:
|
||||
break
|
||||
end += 1
|
||||
|
||||
block = "\n".join(lines[start:end + 1])
|
||||
match = self.JSDOC_PATTERN.search(block)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
return None
|
||||
|
||||
def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
|
||||
"""Parse a JSDoc comment and extract its components."""
|
||||
result = {
|
||||
"description": "",
|
||||
"params": [],
|
||||
"returns": None,
|
||||
"examples": [],
|
||||
"tags": {},
|
||||
}
|
||||
|
||||
lines = comment.split("\n")
|
||||
description_lines = []
|
||||
current_tag = None
|
||||
current_tag_content = []
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("*"):
|
||||
stripped = stripped[1:].strip()
|
||||
|
||||
if stripped.startswith("@"):
|
||||
if current_tag and current_tag_content:
|
||||
self._add_tag_content(result, current_tag, current_tag_content)
|
||||
match = self.TAG_PATTERN.match(stripped)
|
||||
if match:
|
||||
current_tag = match.group(1)
|
||||
current_tag_content = []
|
||||
type_hint = match.group(2)
|
||||
name = match.group(3)
|
||||
description = match.group(4) or ""
|
||||
if type_hint:
|
||||
current_tag_content.append(f"type:{type_hint}")
|
||||
if name:
|
||||
current_tag_content.append(f"name:{name}")
|
||||
if description:
|
||||
current_tag_content.append(description)
|
||||
else:
|
||||
current_tag = None
|
||||
current_tag_content = []
|
||||
elif current_tag:
|
||||
current_tag_content.append(stripped)
|
||||
elif stripped:
|
||||
description_lines.append(stripped)
|
||||
|
||||
if current_tag and current_tag_content:
|
||||
self._add_tag_content(result, current_tag, current_tag_content)
|
||||
|
||||
result["description"] = "\n".join(description_lines).strip()
|
||||
|
||||
return result
|
||||
|
||||
def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None:
|
||||
"""Add parsed tag content to the result."""
|
||||
combined = " ".join(content).strip()
|
||||
|
||||
if tag == "param":
|
||||
param = self._parse_param_tag(combined)
|
||||
if param:
|
||||
result["params"].append(param)
|
||||
elif tag == "returns" or tag == "return":
|
||||
ret = self._parse_returns_tag(combined)
|
||||
if ret:
|
||||
result["returns"] = ret
|
||||
elif tag == "example":
|
||||
result["examples"].append(combined)
|
||||
elif tag == "examples":
|
||||
for line in content:
|
||||
if line.strip():
|
||||
result["examples"].append(line.strip())
|
||||
else:
|
||||
result["tags"][tag] = combined
|
||||
|
||||
def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse a @param tag."""
|
||||
param = {"name": "", "type": "", "description": ""}
|
||||
|
||||
if not content:
|
||||
return None
|
||||
|
||||
match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL)
|
||||
if match:
|
||||
param["type"] = match.group(1) or ""
|
||||
param["name"] = match.group(2) or ""
|
||||
param["description"] = (match.group(3) or "").strip()
|
||||
|
||||
if not param["name"]:
|
||||
return None
|
||||
|
||||
return param
|
||||
|
||||
def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse a @returns tag."""
|
||||
ret = {"type": "", "description": ""}
|
||||
|
||||
if not content:
|
||||
return None
|
||||
|
||||
match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL)
|
||||
if match:
|
||||
ret["type"] = match.group(1) or ""
|
||||
ret["description"] = (match.group(2) or "").strip()
|
||||
|
||||
return ret
|
||||
|
||||
def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract function documentation from source."""
|
||||
functions = []
|
||||
lines = content.split("\n")
|
||||
|
||||
for line_num, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
funcs = [
|
||||
(r'export\s+async\s+function\s+(\w+)', True),
|
||||
(r'export\s+function\s+(\w+)', True),
|
||||
(r'export\s+const\s+(\w+)\s*=', True),
|
||||
(r'export\s+let\s+(\w+)\s*=', True),
|
||||
(r'async\s+function\s+(\w+)', True),
|
||||
(r'^function\s+(\w+)', True),
|
||||
(r'const\s+(\w+)\s*=\s*function', True),
|
||||
(r'const\s+(\w+)\s*=\s*async', True),
|
||||
(r'let\s+(\w+)\s*=\s*function', True),
|
||||
(r'let\s+(\w+)\s*=\s*async', True),
|
||||
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
|
||||
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
|
||||
(r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
|
||||
(r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
|
||||
]
|
||||
|
||||
for pattern, has_jsdoc in funcs:
|
||||
match = re.search(pattern, stripped)
|
||||
if match:
|
||||
func_name = match.group(1)
|
||||
if func_name.startswith("_"):
|
||||
continue
|
||||
|
||||
func_doc = {
|
||||
"name": func_name,
|
||||
"description": "",
|
||||
"args": [],
|
||||
"returns": None,
|
||||
"examples": [],
|
||||
"line_number": line_num + 1,
|
||||
}
|
||||
|
||||
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||
if jsdoc:
|
||||
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||
func_doc["description"] = parsed.get("description", "")
|
||||
func_doc["args"] = parsed.get("params", [])
|
||||
func_doc["returns"] = parsed.get("returns")
|
||||
func_doc["examples"] = parsed.get("examples", [])
|
||||
|
||||
functions.append(func_doc)
|
||||
break
|
||||
|
||||
return functions
|
||||
|
||||
def _extract_classes(self, content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract class documentation from source."""
|
||||
classes = []
|
||||
|
||||
lines = content.split("\n")
|
||||
|
||||
for line_num, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
export_match = re.search(r'export\s+class\s+(\w+)', stripped)
|
||||
if export_match:
|
||||
class_name = export_match.group(1)
|
||||
class_doc = {
|
||||
"name": class_name,
|
||||
"description": "",
|
||||
"methods": [],
|
||||
"line_number": line_num + 1,
|
||||
}
|
||||
|
||||
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||
if jsdoc:
|
||||
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||
class_doc["description"] = parsed.get("description", "")
|
||||
|
||||
classes.append(class_doc)
|
||||
continue
|
||||
|
||||
if re.match(r'^class\s+(\w+)', stripped):
|
||||
match = re.match(r'^class\s+(\w+)', stripped)
|
||||
if match:
|
||||
class_name = match.group(1)
|
||||
class_doc = {
|
||||
"name": class_name,
|
||||
"description": "",
|
||||
"methods": [],
|
||||
"line_number": line_num + 1,
|
||||
}
|
||||
|
||||
jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
|
||||
if jsdoc:
|
||||
parsed = self._parse_jsdoc_comment(jsdoc)
|
||||
class_doc["description"] = parsed.get("description", "")
|
||||
|
||||
classes.append(class_doc)
|
||||
|
||||
return classes
|
||||
|
||||
|
||||
def parse_javascript_file(file_path: Path) -> Dict[str, Any]:
|
||||
"""Parse a JavaScript/TypeScript file and extract documentation."""
|
||||
parser = JavaScriptDocstringParser()
|
||||
return parser.parse_file(file_path)
|
||||
Reference in New Issue
Block a user