doc2man/doc2man/parsers/javascript.py

"""JavaScript docstring parser for Doc2Man."""

import re
from pathlib import Path
from typing import Any, Dict, List, Optional


class JavaScriptDocstringParser:
    """Parser for JSDoc comments from JavaScript/TypeScript files."""

    JSDOC_PATTERN = re.compile(
        r'/\*\*([^*]*(?:\*(?!/)[^*]*)*)\*/',
        re.DOTALL
    )

    TAG_PATTERN = re.compile(
        r'@(\w+)(?:\s*(\{[^\{\}]+\}))?(?:\s*(\S+))?(?:\s+(.*))?',
        re.DOTALL
    )

    def __init__(self):
        """Initialize the parser."""
        pass

    def parse_file(self, file_path: Path) -> Dict[str, Any]:
        """Parse a JavaScript/TypeScript file and extract documentation."""
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            return self.parse_content(content, str(file_path))
        except UnicodeDecodeError:
            try:
                with open(file_path, "r", encoding="latin-1") as f:
                    content = f.read()
                return self.parse_content(content, str(file_path))
            except Exception as e:
                raise ValueError(f"Error reading file {file_path}: {e}")
        except Exception as e:
            raise ValueError(f"Error reading file {file_path}: {e}")

    def parse_content(self, content: str, file_path: str = "") -> Dict[str, Any]:
        """Parse JavaScript/TypeScript content and extract documentation."""
        result = {
            "title": None,
            "description": None,
            "module_docstring": None,
            "functions": [],
            "classes": [],
            "file_path": file_path,
            "language": "javascript",
        }

        module_doc = self._extract_module_documentation(content)
        if module_doc:
            result["module_docstring"] = module_doc.get("description")
            result["description"] = module_doc.get("description")
            result["title"] = module_doc.get("title")

        functions = self._extract_functions(content)
        result["functions"] = functions

        classes = self._extract_classes(content)
        result["classes"] = classes

        return result

    def _extract_module_documentation(self, content: str) -> Optional[Dict[str, Any]]:
        """Extract module/file-level documentation."""
        lines = content.split("\n")

        for i, line in enumerate(lines):
            stripped = line.strip()
            if stripped.startswith("import ") or stripped.startswith("export "):
                break
            if stripped.startswith("/**"):
                comment = self._extract_jsdoc_block(content, i)
                if comment:
                    return self._parse_jsdoc_comment(comment)
                break

        return None

    def _extract_jsdoc_block(self, content: str, start: int) -> Optional[str]:
        """Extract a JSDoc block starting at the given position."""
        lines = content.split("\n")
        if start >= len(lines):
            return None

        line = lines[start]
        if not line.strip().startswith("/**"):
            return None

        end = start + 1
        while end < len(lines):
            if "*/" in lines[end]:
                break
            end += 1

        block = "\n".join(lines[start:end + 1])
        match = self.JSDOC_PATTERN.search(block)
        if match:
            return match.group(1).strip()

        return None

    def _parse_jsdoc_comment(self, comment: str) -> Dict[str, Any]:
        """Parse a JSDoc comment and extract its components."""
        result = {
            "description": "",
            "params": [],
            "returns": None,
            "examples": [],
            "tags": {},
        }

        lines = comment.split("\n")
        description_lines = []
        current_tag = None
        current_tag_content = []

        for line in lines:
            stripped = line.strip()

            if stripped.startswith("*"):
                stripped = stripped[1:].strip()

            if stripped.startswith("@"):
                if current_tag and current_tag_content:
                    self._add_tag_content(result, current_tag, current_tag_content)
                match = self.TAG_PATTERN.match(stripped)
                if match:
                    current_tag = match.group(1)
                    current_tag_content = []
                    type_hint = match.group(2)
                    name = match.group(3)
                    description = match.group(4) or ""
                    if type_hint:
                        current_tag_content.append(f"type:{type_hint}")
                    if name:
                        current_tag_content.append(f"name:{name}")
                    if description:
                        current_tag_content.append(description)
                else:
                    current_tag = None
                    current_tag_content = []
            elif current_tag:
                current_tag_content.append(stripped)
            elif stripped:
                description_lines.append(stripped)

        if current_tag and current_tag_content:
            self._add_tag_content(result, current_tag, current_tag_content)

        result["description"] = "\n".join(description_lines).strip()

        return result

    def _add_tag_content(self, result: Dict, tag: str, content: List[str]) -> None:
        """Add parsed tag content to the result."""
        combined = " ".join(content).strip()

        if tag == "param":
            param = self._parse_param_tag(combined)
            if param:
                result["params"].append(param)
        elif tag == "returns" or tag == "return":
            ret = self._parse_returns_tag(combined)
            if ret:
                result["returns"] = ret
        elif tag == "example":
            result["examples"].append(combined)
        elif tag == "examples":
            for line in content:
                if line.strip():
                    result["examples"].append(line.strip())
        else:
            result["tags"][tag] = combined

    def _parse_param_tag(self, content: str) -> Optional[Dict[str, str]]:
        """Parse a @param tag."""
        param = {"name": "", "type": "", "description": ""}

        if not content:
            return None

        match = re.match(r'(?:\{([^}]+)\})?\s*(\S+)?\s*(?:-)?\s*(.*)', content, re.DOTALL)
        if match:
            param["type"] = match.group(1) or ""
            param["name"] = match.group(2) or ""
            param["description"] = (match.group(3) or "").strip()

        if not param["name"]:
            return None

        return param

    def _parse_returns_tag(self, content: str) -> Optional[Dict[str, str]]:
        """Parse a @returns tag."""
        ret = {"type": "", "description": ""}

        if not content:
            return None

        match = re.match(r'(?:\{([^}]+)\})?(?:\s*-)?\s*(.*)', content, re.DOTALL)
        if match:
            ret["type"] = match.group(1) or ""
            ret["description"] = (match.group(2) or "").strip()

        return ret

    def _extract_functions(self, content: str) -> List[Dict[str, Any]]:
        """Extract function documentation from source."""
        functions = []
        lines = content.split("\n")

        for line_num, line in enumerate(lines):
            stripped = line.strip()

            funcs = [
                (r'export\s+async\s+function\s+(\w+)', True),
                (r'export\s+function\s+(\w+)', True),
                (r'export\s+const\s+(\w+)\s*=', True),
                (r'export\s+let\s+(\w+)\s*=', True),
                (r'async\s+function\s+(\w+)', True),
                (r'^function\s+(\w+)', True),
                (r'const\s+(\w+)\s*=\s*function', True),
                (r'const\s+(\w+)\s*=\s*async', True),
                (r'let\s+(\w+)\s*=\s*function', True),
                (r'let\s+(\w+)\s*=\s*async', True),
                (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
                (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>', True),
                (r'const\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
                (r'let\s+(\w+)\s*=\s*\(([^)]*)\)\s*=>\s*async', True),
            ]

            for pattern, has_jsdoc in funcs:
                match = re.search(pattern, stripped)
                if match:
                    func_name = match.group(1)
                    if func_name.startswith("_"):
                        continue

                    func_doc = {
                        "name": func_name,
                        "description": "",
                        "args": [],
                        "returns": None,
                        "examples": [],
                        "line_number": line_num + 1,
                    }

                    jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
                    if jsdoc:
                        parsed = self._parse_jsdoc_comment(jsdoc)
                        func_doc["description"] = parsed.get("description", "")
                        func_doc["args"] = parsed.get("params", [])
                        func_doc["returns"] = parsed.get("returns")
                        func_doc["examples"] = parsed.get("examples", [])

                    functions.append(func_doc)
                    break

        return functions

    def _extract_classes(self, content: str) -> List[Dict[str, Any]]:
        """Extract class documentation from source."""
        classes = []

        lines = content.split("\n")

        for line_num, line in enumerate(lines):
            stripped = line.strip()

            export_match = re.search(r'export\s+class\s+(\w+)', stripped)
            if export_match:
                class_name = export_match.group(1)
                class_doc = {
                    "name": class_name,
                    "description": "",
                    "methods": [],
                    "line_number": line_num + 1,
                }

                jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
                if jsdoc:
                    parsed = self._parse_jsdoc_comment(jsdoc)
                    class_doc["description"] = parsed.get("description", "")

                classes.append(class_doc)
                continue

            if re.match(r'^class\s+(\w+)', stripped):
                match = re.match(r'^class\s+(\w+)', stripped)
                if match:
                    class_name = match.group(1)
                    class_doc = {
                        "name": class_name,
                        "description": "",
                        "methods": [],
                        "line_number": line_num + 1,
                    }

                    jsdoc = self._extract_jsdoc_block(content, max(0, line_num - 10))
                    if jsdoc:
                        parsed = self._parse_jsdoc_comment(jsdoc)
                        class_doc["description"] = parsed.get("description", "")

                    classes.append(class_doc)

        return classes


def parse_javascript_file(file_path: Path) -> Dict[str, Any]:
    """Parse a JavaScript/TypeScript file and extract documentation."""
    parser = JavaScriptDocstringParser()
    return parser.parse_file(file_path)