Add parsers: Python, Go, and JavaScript docstring parsers

2026-01-31 00:56:37 +00:00
parent fb2fb893aa
commit 3ac81c4290
1 changed files with 316 additions and 0 deletions
--- a/doc2man/parsers/javascript.py
+++ b/doc2man/parsers/javascript.py
@@ -0,0 +1,316 @@
+"""JavaScript docstring parser for Doc2Man."""
+
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+class JavaScriptDocstringParser:
+    """Parser for JSDoc comments from JavaScript/TypeScript files."""
+
+    JSDOC_PATTERN = re.compile(
+        r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/',
+        re.DOTALL
+    )
+
+    TAG_PATTERN = re.compile(
+        r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?',
+        re.DOTALL
+    )
+
+    def __init__(self):
+        """Initialize the parser."""
+        pass
+
+    def parse_file(self, file_path: Path) -> Dict[str, Any]:
+        """Parse a JavaScript/TypeScript file and extract documentation."""
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            return self.parse_content(content, str(file_path))
+        except UnicodeDecodeError:
+            try:
+                with open(file_path, "r", encoding="latin-1") as f:
+                    content = f.read()
+                return self.parse_content(content, str(file_path))
+            except Exception as e:
+                raise ValueError(f"Error reading file {file_path}: {e}")
+        except Exception as e:
+            raise ValueError(f"Error reading file {file_path}: {e}")
+
+    def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
+        """Parse JavaScript/TypeScript content and extract documentation."""
+        result = {
+            "title": None,
+            "description": None,
+            "module_docstring": None,
+            "functions": [],
+            "classes": [],
+            "file_path": file_path,
+            "language": "javascript",
+        }
+
+        module_doc = self._extract_module_documentation(content)
+        if module_doc:
+            result["module_docstring"] = module_doc.get("description")
+            result["description"] = module_doc.get("description")
+            result["title"] = module_doc.get("title")
+
+        functions = self._extract_functions(content)
+        result["functions"] = functions
+
+        classes = self._extract_classes(content)
+        result["classes"] = classes
+
+        return result
+
+    def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]:
+        """Extract module/file-level documentation."""
+        lines = content.split("\n")
+
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            if stripped.startswith("import ") or stripped.startswith("export "):
+                break
+            if stripped.startswith("/**"):
+                comment = self._extract_jsdoc_block(content, i)
+                if comment:
+                    return self._parse_jsdoc_comment(comment)
+                break
+
+        return None
+
+    def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]:
+        """Extract a JSDoc block starting at the given position."""
+        lines = content.split("\n")
+        if start >= len(lines):
+            return None
+
+        line = lines[start]
+        if not line.strip().startswith("/**"):
+            return None
+
+        end = start + 1
+        while end < len(lines):
+            if "*/" in lines[end]:
+                break
+            end += 1
+
+        block = "\n".join(lines[start:end + 1])
+        match = self.JSDOC_PATTERN.search(block)
+        if match:
+            return match.group(1).strip()
+
+        return None
+
+    def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
+        """Parse a JSDoc comment and extract its components."""
+        result = {
+            "description": "",
+            "params": [],
+            "returns": None,
+            "examples": [],
+            "tags": {},
+        }
+
+        lines = comment.split("\n")
+        description_lines = []
+        current_tag = None
+        current_tag_content = []
+
+        for line in lines:
+            stripped = line.strip()
+
+            if stripped.startswith("*"):
+                stripped = stripped[1:].strip()
+
+            if stripped.startswith("@"):
+                if current_tag and current_tag_content:
+                    self._add_tag_content(result, current_tag, current_tag_content)
+                match = self.TAG_PATTERN.match(stripped)
+                if match:
+                    current_tag = match.group(1)
+                    current_tag_content = []
+                    type_hint = match.group(2)
+                    name = match.group(3)
+                    description = match.group(4) or ""
+                    if type_hint:
+                        current_tag_content.append(f"type:{type_hint}")
+                    if name:
+                        current_tag_content.append(f"name:{name}")
+                    if description:
+                        current_tag_content.append(description)
+                else:
+                    current_tag = None
+                    current_tag_content = []
+            elif current_tag:
+                current_tag_content.append(stripped)
+            elif stripped:
+                description_lines.append(stripped)
+
+        if current_tag and current_tag_content:
+            self._add_tag_content(result, current_tag, current_tag_content)
+
+        result["description"] = "\n".join(description_lines).strip()
+
+        return result
+
+    def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None:
+        """Add parsed tag content to the result."""
+        combined = " ".join(content).strip()
+
+        if tag == "param":
+            param = self._parse_param_tag(combined)
+            if param:
+                result["params"].append(param)
+        elif tag == "returns" or tag == "return":
+            ret = self._parse_returns_tag(combined)
+            if ret:
+                result["returns"] = ret
+        elif tag == "example":
+            result["examples"].append(combined)
+        elif tag == "examples":
+            for line in content:
+                if line.strip():
+                    result["examples"].append(line.strip())
+        else:
+            result["tags"][tag] = combined
+
+    def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]:
+        """Parse a @param tag."""
+        param = {"name": "", "type": "", "description": ""}
+
+        if not content:
+            return None
+
+        match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL)
+        if match:
+            param["type"] = match.group(1) or ""
+            param["name"] = match.group(2) or ""
+            param["description"] = (match.group(3) or "").strip()
+
+        if not param["name"]:
+            return None
+
+        return param
+
+    def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]:
+        """Parse a @returns tag."""
+        ret = {"type": "", "description": ""}
+
+        if not content:
+            return None
+
+        match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL)
+        if match:
+            ret["type"] = match.group(1) or ""
+            ret["description"] = (match.group(2) or "").strip()
+
+        return ret
+
+    def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
+        """Extract function documentation from source."""
+        functions = []
+        lines = content.split("\n")
+
+        for line_num, line in enumerate(lines):
+            stripped = line.strip()
+
+            funcs = [
+                (r'export\s+async\s+function\s+(\w+)', True),
+                (r'export\s+function\s+(\w+)', True),
+                (r'export\s+const\s+(\w+)\s*=', True),
+                (r'export\s+let\s+(\w+)\s*=', True),
+                (r'async\s+function\s+(\w+)', True),
+                (r'^function\s+(\w+)', True),
+                (r'const\s+(\w+)\s*=\s*function', True),
+                (r'const\s+(\w+)\s*=\s*async', True),
+                (r'let\s+(\w+)\s*=\s*function', True),
+                (r'let\s+(\w+)\s*=\s*async', True),
+                (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
+                (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
+                (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
+                (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
+            ]
+
+            for pattern, has_jsdoc in funcs:
+                match = re.search(pattern, stripped)
+                if match:
+                    func_name = match.group(1)
+                    if func_name.startswith("_"):
+                        continue
+
+                    func_doc = {
+                        "name": func_name,
+                        "description": "",
+                        "args": [],
+                        "returns": None,
+                        "examples": [],
+                        "line_number": line_num + 1,
+                    }
+
+                    jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
+                    if jsdoc:
+                        parsed = self._parse_jsdoc_comment(jsdoc)
+                        func_doc["description"] = parsed.get("description", "")
+                        func_doc["args"] = parsed.get("params", [])
+                        func_doc["returns"] = parsed.get("returns")
+                        func_doc["examples"] = parsed.get("examples", [])
+
+                    functions.append(func_doc)
+                    break
+
+        return functions
+
+    def _extract_classes(self, content: str) -> List[Dict[str, Any]]:
+        """Extract class documentation from source."""
+        classes = []
+
+        lines = content.split("\n")
+
+        for line_num, line in enumerate(lines):
+            stripped = line.strip()
+
+            export_match = re.search(r'export\s+class\s+(\w+)', stripped)
+            if export_match:
+                class_name = export_match.group(1)
+                class_doc = {
+                    "name": class_name,
+                    "description": "",
+                    "methods": [],
+                    "line_number": line_num + 1,
+                }
+
+                jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
+                if jsdoc:
+                    parsed = self._parse_jsdoc_comment(jsdoc)
+                    class_doc["description"] = parsed.get("description", "")
+
+                classes.append(class_doc)
+                continue
+
+            if re.match(r'^class\s+(\w+)', stripped):
+                match = re.match(r'^class\s+(\w+)', stripped)
+                if match:
+                    class_name = match.group(1)
+                    class_doc = {
+                        "name": class_name,
+                        "description": "",
+                        "methods": [],
+                        "line_number": line_num + 1,
+                    }
+
+                    jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
+                    if jsdoc:
+                        parsed = self._parse_jsdoc_comment(jsdoc)
+                        class_doc["description"] = parsed.get("description", "")
+
+                    classes.append(class_doc)
+
+        return classes
+
+
+def parse_javascript_file(file_path: Path) -> Dict[str, Any]:
+    """Parse a JavaScript/TypeScript file and extract documentation."""
+    parser = JavaScriptDocstringParser()
+    return parser.parse_file(file_path)