Add parsers: Python, Go, and JavaScript docstring parsers

2026-01-31 00:56:36 +00:00
parent 903096e80c
commit fb2fb893aa
1 changed files with 292 additions and 0 deletions
--- a/doc2man/parsers/go.py
+++ b/doc2man/parsers/go.py
@@ -0,0 +1,292 @@
 """Go docstring parser for Doc2Man."""
 import re
 import subprocess
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 class GoDocstringParser:
    """Parser for Go documentation comments using go doc or AST parsing."""
    def __init__(self):
        """Initialize the parser."""
        pass
    def parse_file(self, file_path: Path) -> Dict[str, Any]:
        """Parse a Go source file and extract documentation."""
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            return self.parse_content(content, str(file_path))
        except Exception as e:
            raise ValueError(f"Error reading Go file {file_path}: {e}")
    def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
        """Parse Go source content and extract documentation."""
        result = {
            "title": None,
            "description": None,
            "package_docstring": None,
            "functions": [],
            "types": [],
            "file_path": file_path,
            "language": "go",
        }
        package_doc = self._extract_package_documentation(content)
        if package_doc:
            result["package_docstring"] = package_doc
            result["description"] = package_doc
            result["title"] = self._extract_title(package_doc)
        functions = self._extract_functions(content)
        result["functions"] = functions
        types = self._extract_types(content)
        result["types"] = types
        return result
    def _extract_package_documentation(self, content: str) -> Optional[str]:
        """Extract package-level documentation from the beginning of the file."""
        lines = content.split("\n")
        in_comment = False
        comment_lines = []
        found_code = False
        for i, line in enumerate(lines):
            stripped = line.strip()
            if stripped.startswith("package "):
                if in_comment:
                    return "\n".join(comment_lines).strip() if comment_lines else None
                return None
            if stripped.startswith("//"):
                clean_comment = stripped[2:].strip()
                if clean_comment:
                    in_comment = True
                    comment_lines.append(clean_comment)
            elif stripped.startswith("/*"):
                in_comment = True
                inner = stripped[2:].strip()
                if inner and not inner.endswith("*/"):
                    comment_lines.append(inner)
            elif stripped.endswith("*/"):
                in_comment = False
            else:
                if in_comment and stripped:
                    found_code = True
        return "\n".join(comment_lines).strip() if comment_lines else None
    def _extract_title(self, docstring: str) -> Optional[str]:
        """Extract the first line as title."""
        if docstring:
            lines = docstring.strip().split("\n")
            return lines[0].strip() if lines else None
        return None
    def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
        """Extract function documentation from Go source."""
        functions = []
        lines = content.split("\n")
        in_function = False
        func_start = 0
        func_name = ""
        func_signature = ""
        func_comment = []
        for i, line in enumerate(lines):
            stripped = line.strip()
            if stripped.startswith("func "):
                if in_function and func_name:
                    func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
                    if func_doc:
                        functions.append(func_doc)
                in_function = True
                func_start = i
                func_name = self._extract_func_name(stripped)
                func_signature = stripped
                func_comment = []
                comment = self._get_leading_comment(lines, i)
                if comment:
                    func_comment = comment
            elif in_function:
                if stripped and not stripped.startswith("//") and not stripped.startswith("/*"):
                    if not self._is_in_multiline_comment(lines, i):
                        func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
                        if func_doc:
                            functions.append(func_doc)
                        in_function = False
                        func_name = ""
                        func_signature = ""
                        func_comment = []
        if in_function and func_name:
            func_doc = self._build_function_doc(func_name, func_signature, func_comment, lines, func_start)
            if func_doc:
                functions.append(func_doc)
        return functions
    def _extract_func_name(self, line: str) -> str:
        """Extract function name from function signature."""
        match = re.search(r'func\s+(?:\([^)]*\)\s*)?(\w+)', line)
        if match:
            return match.group(1)
        return ""
    def _get_leading_comment(self, lines: List[str], line_num: int) -> List[str]:
        """Get comments immediately preceding a line."""
        comments = []
        for i in range(line_num - 1, -1, -1):
            line = lines[i].strip()
            if line.startswith("//"):
                clean = line[2:].strip()
                if clean:
                    comments.insert(0, clean)
            elif line.startswith("/*"):
                break
            elif line:
                break
        return comments
    def _is_in_multiline_comment(self, lines: List[str], line_num: int) -> bool:
        """Check if a line is inside a multiline comment."""
        in_comment = False
        for i in range(line_num):
            line = lines[i].strip()
            if "/*" in line:
                in_comment = True
            if "*/" in line:
                in_comment = False
        return in_comment
    def _build_function_doc(self, name: str, signature: str, comments: List[str], lines: List[str], start: int) -> Optional[Dict[str, Any]]:
        """Build a function documentation dictionary."""
        if not name:
            return None
        func_doc = {
            "name": name,
            "signature": signature,
            "description": "",
            "args": [],
            "returns": None,
            "line_number": start + 1,
        }
        if comments:
            func_doc["description"] = "\n".join(comments)
        params = self._extract_params_from_signature(signature)
        func_doc["args"] = params
        returns = self._extract_returns_from_signature(signature)
        func_doc["returns"] = returns
        return func_doc
    def _extract_params_from_signature(self, signature: str) -> List[Dict[str, str]]:
        """Extract parameters from function signature."""
        params = []
        match = re.search(r'\(([^)]*)\)', signature)
        if not match:
            return params
        param_str = match.group(1)
        if not param_str:
            return params
        parts = param_str.split(",")
        for part in parts:
            part = part.strip()
            if not part:
                continue
            name_type = part.split()
            if len(name_type) >= 2:
                name = name_type[-2]
                type_str = " ".join(name_type[-1:])
            elif len(name_type) == 1:
                name = ""
                type_str = name_type[0]
            else:
                continue
            if name and not name.startswith("_"):
                params.append({"name": name, "type": type_str, "description": ""})
        return params
    def _extract_returns_from_signature(self, signature: str) -> Optional[Dict[str, str]]:
        """Extract return type from function signature."""
        match = re.search(r'\)\s*(\([^)]*\)|\w+)', signature)
        if not match:
            return None
        return_type = match.group(1).strip()
        if not return_type:
            return None
        return {"type": return_type, "description": ""}
    def _extract_types(self, content: str) -> List[Dict[str, Any]]:
        """Extract type definitions and their documentation."""
        types = []
        type_pattern = re.compile(
            r'(?://[^\n]*\n)*\s*type\s+(\w+)\s+(?:struct|interface|type)\s*\{([^}]*)\}',
            re.MULTILINE | re.DOTALL
        )
        for match in type_pattern.finditer(content):
            type_name = match.group(1)
            type_body = match.group(2)
            type_doc = {
                "name": type_name,
                "description": "",
                "fields": [],
                "line_number": content[:match.start()].count("\n") + 1,
            }
            types.append(type_doc)
        return types
    def parse_go_doc(self, package_path: str) -> Dict[str, Any]:
        """Parse Go documentation using go doc command."""
        try:
            result = subprocess.run(
                ["go", "doc", "-all", package_path],
                capture_output=True,
                text=True,
                timeout=30
            )
            if result.returncode != 0:
                raise ValueError(f"go doc failed: {result.stderr}")
            output = result.stdout
            return self._parse_go_doc_output(output)
        except subprocess.TimeoutExpired:
            raise ValueError("go doc command timed out")
        except FileNotFoundError:
            raise ValueError("go command not found - ensure Go is installed")
 def parse_go_file(file_path: Path) -> Dict[str, Any]:
    """Parse a Go file and extract documentation."""
    parser = GoDocstringParser()
    return parser.parse_file(file_path)