code-doc-cli/.code_doc_cli/parsers/go_parser.py

"""Go parser using regex patterns."""

import re
from typing import Optional, List
from .base import Parser, DocElement, ElementType, Parameter


class GoParser(Parser):
    """Parser for Go source files."""

    EXTENSIONS = [".go"]

    def __init__(self, file_path: str):
        super().__init__(file_path)

    def get_language_name(self) -> str:
        return "go"

    @classmethod
    def supports_file(cls, file_path: str) -> bool:
        ext = cls._get_extension(file_path)
        return ext in cls.EXTENSIONS

    @staticmethod
    def _get_extension(file_path: str) -> str:
        import os
        return os.path.splitext(file_path)[1].lower()

    def parse(self) -> list[DocElement]:
        """Parse Go file and extract documentation elements."""
        self.content = self._read_content()
        self.elements = []

        self._parse_package_docstring()
        self._parse_functions()
        self._parse_types()
        self._parse_constants()
        self._parse_variables()

        return self.elements

    def _parse_package_docstring(self) -> None:
        """Parse package documentation."""
        lines = self.content.split("\n")
        package_name = ""
        docstring_lines = []

        for i, line in enumerate(lines):
            stripped = line.strip()

            if stripped.startswith("package "):
                package_name = stripped.split()[1]

                for j in range(i - 1, -1, -1):
                    prev_line = lines[j].strip()
                    if prev_line.startswith("//") and not prev_line.startswith("///"):
                        comment = prev_line[2:].strip()
                        if comment.startswith(" "):
                            comment = comment[1:]
                        docstring_lines.insert(0, comment)
                    else:
                        break

                if docstring_lines:
                    docstring = " ".join(docstring_lines)
                    elem = DocElement(
                        name=package_name,
                        element_type=ElementType.MODULE,
                        description=docstring,
                        full_docstring=docstring,
                        source_file=self.file_path,
                    )
                    self.elements.append(elem)
                break

    def _parse_functions(self) -> None:
        """Parse function definitions."""
        pattern = r"^func\s+(?:\([^)]*\)\s*)?(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*(?:([^{]*))?\s*\{"

        for match in re.finditer(pattern, self.content, re.MULTILINE):
            groups = match.groups()
            name = groups[0]
            params_str = groups[1] if len(groups) > 1 else ""
            type_param = groups[2] if len(groups) > 2 else None
            return_type = groups[3] if len(groups) > 3 else None

            params = self._parse_go_params(params_str)

            docstring = self._find_godoc_before(match.start())

            elem = DocElement(
                name=name,
                element_type=ElementType.FUNCTION,
                description=self._extract_summary(docstring),
                full_docstring=docstring,
                parameters=params,
                return_type=self._clean_type(return_type) if return_type else None,
                return_description=self._extract_godoc_tag(docstring, "return"),
                raises=self._extract_godoc_panics(docstring),
                source_file=self.file_path,
                line_number=self._get_line_number(match.start()),
                visibility=self._get_visibility(name),
            )
            self.elements.append(elem)

    def _parse_types(self) -> None:
        """Parse type definitions (structs and interfaces)."""
        patterns = [
            (r"^type\s+(\w+)\s+struct\s*\{([^}]*)\}", ElementType.STRUCT),
            (r"^type\s+(\w+)\s+interface\s*\{([^}]*)\}", ElementType.INTERFACE),
        ]

        for pattern, elem_type in patterns:
            for match in re.finditer(pattern, self.content, re.MULTILINE):
                name = match.group(1)
                body = match.group(2)

                docstring = self._find_godoc_before(match.start())

                elem = DocElement(
                    name=name,
                    element_type=elem_type,
                    description=self._extract_summary(docstring),
                    full_docstring=docstring,
                    source_file=self.file_path,
                    line_number=self._get_line_number(match.start()),
                    visibility=self._get_visibility(name),
                )

                if elem_type == ElementType.STRUCT:
                    elem.attributes = self._parse_struct_fields(body)
                elif elem_type == ElementType.INTERFACE:
                    elem.attributes = self._parse_interface_methods(body)

                self.elements.append(elem)

    def _parse_struct_fields(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
        """Parse struct fields."""
        fields = []
        for line in body.split("\n"):
            line = line.strip()
            if not line or line.startswith("//"):
                continue

            match = re.match(r"(\w+)\s+([^\s;]+)(?:\s*`([^`]+)`)?", line)
            if match:
                field_name = match.group(1)
                field_type = match.group(2).strip()
                tags = match.group(3)
                desc = f"Tag: {tags}" if tags else None
                fields.append((field_name, field_type, desc))

        return fields

    def _parse_interface_methods(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
        """Parse interface methods."""
        methods = []
        for line in body.split("\n"):
            line = line.strip()
            if not line or line.startswith("//"):
                continue

            match = re.match(r"(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*([^{]+)", line)
            if match:
                method_name = match.group(1)
                params = match.group(2)
                type_param = match.group(3)
                returns = match.group(4)
                signature = f"({params}) {type_param if type_param else ''} {returns}".strip()
                methods.append((method_name, signature, None))

        return methods

    def _parse_constants(self) -> None:
        """Parse constant declarations."""
        const_pattern = r"^const\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
        for match in re.finditer(const_pattern, self.content, re.MULTILINE):
            groups = match.groups()
            if groups[0]:
                const_group = groups[0]
                for line in const_group.split("\n"):
                    line = line.strip()
                    if not line or line.startswith("//"):
                        continue
                    const_match = re.match(r"(\w+)\s*=\s*(.+)", line)
                    if const_match:
                        self._create_const_element(const_match.group(1), const_match.group(2))
            else:
                self._create_const_element(groups[1], groups[2])

    def _create_const_element(self, name: str, value: str) -> None:
        """Create a constant documentation element."""
        docstring = self._find_godoc_before(self.content.find(f"const {name}"))
        elem = DocElement(
            name=name,
            element_type=ElementType.CONSTANT,
            description=self._extract_summary(docstring),
            full_docstring=docstring,
            source_file=self.file_path,
            line_number=self._get_line_number(self.content.find(f"const {name}")),
            visibility=self._get_visibility(name),
        )
        self.elements.append(elem)

    def _parse_variables(self) -> None:
        """Parse variable declarations."""
        var_pattern = r"^var\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
        for match in re.finditer(var_pattern, self.content, re.MULTILINE):
            groups = match.groups()
            if groups[0]:
                var_group = groups[0]
                for line in var_group.split("\n"):
                    line = line.strip()
                    if not line or line.startswith("//"):
                        continue
                    var_match = re.match(r"(\w+)(?:\s+\w+)?(?:\s*=\s*(.+))?", line)
                    if var_match:
                        self._create_var_element(var_match.group(1), var_match.group(2))
            else:
                self._create_var_element(groups[1], groups[2])

    def _create_var_element(self, name: str, value: Optional[str]) -> None:
        """Create a variable documentation element."""
        pos = self.content.find(f"var {name}")
        if pos == -1:
            pos = self.content.find(name)

        docstring = self._find_godoc_before(pos) if pos != -1 else ""
        elem = DocElement(
            name=name,
            element_type=ElementType.VARIABLE,
            description=self._extract_summary(docstring),
            full_docstring=docstring,
            source_file=self.file_path,
            line_number=self._get_line_number(pos) if pos != -1 else 0,
            visibility=self._get_visibility(name),
        )
        self.elements.append(elem)

    def _find_godoc_before(self, position: int) -> str:
        """Find GoDoc comment before a position."""
        search_text = self.content[:position]
        lines = search_text.split("\n")

        docstring_lines = []
        for line in reversed(lines):
            stripped = line.strip()
            if stripped.startswith("//"):
                comment = stripped[2:].strip()
                if comment.startswith(" "):
                    comment = comment[1:]
                docstring_lines.insert(0, comment)
            elif docstring_lines:
                break

        return "\n".join(docstring_lines)

    def _extract_summary(self, docstring: str) -> str:
        """Extract first line as summary."""
        if not docstring:
            return ""
        lines = docstring.strip().split("\n")
        return lines[0].strip() if lines else ""

    def _extract_godoc_tag(self, docstring: str, tag: str) -> Optional[str]:
        """Extract value of a specific GoDoc tag."""
        pattern = rf"{tag}\s+(.+?)(?:\n\n|\Z)"
        match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
        if match:
            return match.group(1).strip()
        return None

    def _extract_godoc_panics(self, docstring: str) -> list[tuple[str, str]]:
        """Extract panic information from GoDoc."""
        panics = []
        pattern = r"panics?\s+(.+?)(?:\n\n|\Z)"
        for match in re.finditer(pattern, docstring, re.DOTALL | re.IGNORECASE):
            content = match.group(1).strip()
            if ":" in content:
                parts = content.split(":", 1)
                panics.append((parts[0].strip(), parts[1].strip()))
            else:
                panics.append(("panic", content))
        return panics

    def _parse_go_params(self, params_str: str) -> list[Parameter]:
        """Parse Go function parameters."""
        params = []
        if not params_str.strip():
            return params

        param_groups = self._split_go_params(params_str)
        for group in param_groups:
            group = group.strip()
            if not group:
                continue

            parts = group.rsplit(None, 1)
            if len(parts) == 2:
                param_names = [p.strip() for p in parts[0].split(",")]
                param_type = parts[1]

                for name in param_names:
                    params.append(Parameter(name=name, type_hint=self._clean_type(param_type)))

        return params

    def _split_go_params(self, params_str: str) -> list[str]:
        """Split parameter groups respecting nested types."""
        parts = []
        current = ""
        depth = 0
        for char in params_str:
            if char == "[":
                depth += 1
                current += char
            elif char == "]":
                depth -= 1
                current += char
            elif char == "," and depth == 0:
                parts.append(current)
                current = ""
            else:
                current += char
        if current.strip():
            parts.append(current)
        return parts

    def _clean_type(self, type_str: Optional[str]) -> Optional[str]:
        """Clean type string."""
        if not type_str:
            return None
        return type_str.strip()

    def _get_visibility(self, name: str) -> str:
        """Determine visibility based on name."""
        if name and name[0].isupper():
            return "public"
        return "private"

    def _get_line_number(self, position: int) -> int:
        """Get line number from position."""
        return self.content[:position].count("\n") + 1