diff --git a/doc2man/parsers/javascript.py b/doc2man/parsers/javascript.py new file mode 100644 index 0000000..23ef3c0 --- /dev/null +++ b/doc2man/parsers/javascript.py @@ -0,0 +1,316 @@ +"""JavaScript docstring parser for Doc2Man.""" + +import re +from pathlib import Path +from typing import Any, Dict, List, Optional + + +class JavaScriptDocstringParser: + """Parser for JSDoc comments from JavaScript/TypeScript files.""" + + JSDOC_PATTERN = re.compile( + r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/', + re.DOTALL + ) + + TAG_PATTERN = re.compile( + r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?', + re.DOTALL + ) + + def __init__(self): + """Initialize the parser.""" + pass + + def parse_file(self, file_path: Path) -> Dict[str, Any]: + """Parse a JavaScript/TypeScript file and extract documentation.""" + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + return self.parse_content(content, str(file_path)) + except UnicodeDecodeError: + try: + with open(file_path, "r", encoding="latin-1") as f: + content = f.read() + return self.parse_content(content, str(file_path)) + except Exception as e: + raise ValueError(f"Error reading file {file_path}: {e}") + except Exception as e: + raise ValueError(f"Error reading file {file_path}: {e}") + + def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]: + """Parse JavaScript/TypeScript content and extract documentation.""" + result = { + "title": None, + "description": None, + "module_docstring": None, + "functions": [], + "classes": [], + "file_path": file_path, + "language": "javascript", + } + + module_doc = self._extract_module_documentation(content) + if module_doc: + result["module_docstring"] = module_doc.get("description") + result["description"] = module_doc.get("description") + result["title"] = module_doc.get("title") + + functions = self._extract_functions(content) + result["functions"] = functions + + classes = self._extract_classes(content) + result["classes"] = classes + + return result + + def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]: + """Extract module/file-level documentation.""" + lines = content.split("\n") + + for i, line in enumerate(lines): + stripped = line.strip() + if stripped.startswith("import ") or stripped.startswith("export "): + break + if stripped.startswith("/**"): + comment = self._extract_jsdoc_block(content, i) + if comment: + return self._parse_jsdoc_comment(comment) + break + + return None + + def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]: + """Extract a JSDoc block starting at the given position.""" + lines = content.split("\n") + if start >= len(lines): + return None + + line = lines[start] + if not line.strip().startswith("/**"): + return None + + end = start + 1 + while end < len(lines): + if "*/" in lines[end]: + break + end += 1 + + block = "\n".join(lines[start:end + 1]) + match = self.JSDOC_PATTERN.search(block) + if match: + return match.group(1).strip() + + return None + + def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]: + """Parse a JSDoc comment and extract its components.""" + result = { + "description": "", + "params": [], + "returns": None, + "examples": [], + "tags": {}, + } + + lines = comment.split("\n") + description_lines = [] + current_tag = None + current_tag_content = [] + + for line in lines: + stripped = line.strip() + + if stripped.startswith("*"): + stripped = stripped[1:].strip() + + if stripped.startswith("@"): + if current_tag and current_tag_content: + self._add_tag_content(result, current_tag, current_tag_content) + match = self.TAG_PATTERN.match(stripped) + if match: + current_tag = match.group(1) + current_tag_content = [] + type_hint = match.group(2) + name = match.group(3) + description = match.group(4) or "" + if type_hint: + current_tag_content.append(f"type:{type_hint}") + if name: + current_tag_content.append(f"name:{name}") + if description: + current_tag_content.append(description) + else: + current_tag = None + current_tag_content = [] + elif current_tag: + current_tag_content.append(stripped) + elif stripped: + description_lines.append(stripped) + + if current_tag and current_tag_content: + self._add_tag_content(result, current_tag, current_tag_content) + + result["description"] = "\n".join(description_lines).strip() + + return result + + def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None: + """Add parsed tag content to the result.""" + combined = " ".join(content).strip() + + if tag == "param": + param = self._parse_param_tag(combined) + if param: + result["params"].append(param) + elif tag == "returns" or tag == "return": + ret = self._parse_returns_tag(combined) + if ret: + result["returns"] = ret + elif tag == "example": + result["examples"].append(combined) + elif tag == "examples": + for line in content: + if line.strip(): + result["examples"].append(line.strip()) + else: + result["tags"][tag] = combined + + def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]: + """Parse a @param tag.""" + param = {"name": "", "type": "", "description": ""} + + if not content: + return None + + match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL) + if match: + param["type"] = match.group(1) or "" + param["name"] = match.group(2) or "" + param["description"] = (match.group(3) or "").strip() + + if not param["name"]: + return None + + return param + + def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]: + """Parse a @returns tag.""" + ret = {"type": "", "description": ""} + + if not content: + return None + + match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL) + if match: + ret["type"] = match.group(1) or "" + ret["description"] = (match.group(2) or "").strip() + + return ret + + def _extract_functions(self, content: str) -> List[Dict[str, Any]]: + """Extract function documentation from source.""" + functions = [] + lines = content.split("\n") + + for line_num, line in enumerate(lines): + stripped = line.strip() + + funcs = [ + (r'export\s+async\s+function\s+(\w+)', True), + (r'export\s+function\s+(\w+)', True), + (r'export\s+const\s+(\w+)\s*=', True), + (r'export\s+let\s+(\w+)\s*=', True), + (r'async\s+function\s+(\w+)', True), + (r'^function\s+(\w+)', True), + (r'const\s+(\w+)\s*=\s*function', True), + (r'const\s+(\w+)\s*=\s*async', True), + (r'let\s+(\w+)\s*=\s*function', True), + (r'let\s+(\w+)\s*=\s*async', True), + (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True), + (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True), + (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True), + (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True), + ] + + for pattern, has_jsdoc in funcs: + match = re.search(pattern, stripped) + if match: + func_name = match.group(1) + if func_name.startswith("_"): + continue + + func_doc = { + "name": func_name, + "description": "", + "args": [], + "returns": None, + "examples": [], + "line_number": line_num + 1, + } + + jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10)) + if jsdoc: + parsed = self._parse_jsdoc_comment(jsdoc) + func_doc["description"] = parsed.get("description", "") + func_doc["args"] = parsed.get("params", []) + func_doc["returns"] = parsed.get("returns") + func_doc["examples"] = parsed.get("examples", []) + + functions.append(func_doc) + break + + return functions + + def _extract_classes(self, content: str) -> List[Dict[str, Any]]: + """Extract class documentation from source.""" + classes = [] + + lines = content.split("\n") + + for line_num, line in enumerate(lines): + stripped = line.strip() + + export_match = re.search(r'export\s+class\s+(\w+)', stripped) + if export_match: + class_name = export_match.group(1) + class_doc = { + "name": class_name, + "description": "", + "methods": [], + "line_number": line_num + 1, + } + + jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10)) + if jsdoc: + parsed = self._parse_jsdoc_comment(jsdoc) + class_doc["description"] = parsed.get("description", "") + + classes.append(class_doc) + continue + + if re.match(r'^class\s+(\w+)', stripped): + match = re.match(r'^class\s+(\w+)', stripped) + if match: + class_name = match.group(1) + class_doc = { + "name": class_name, + "description": "", + "methods": [], + "line_number": line_num + 1, + } + + jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10)) + if jsdoc: + parsed = self._parse_jsdoc_comment(jsdoc) + class_doc["description"] = parsed.get("description", "") + + classes.append(class_doc) + + return classes + + +def parse_javascript_file(file_path: Path) -> Dict[str, Any]: + """Parse a JavaScript/TypeScript file and extract documentation.""" + parser = JavaScriptDocstringParser() + return parser.parse_file(file_path)