diff --git a/.code_doc_cli/parsers/go_parser.py b/.code_doc_cli/parsers/go_parser.py new file mode 100644 index 0000000..3c7a5a9 --- /dev/null +++ b/.code_doc_cli/parsers/go_parser.py @@ -0,0 +1,343 @@ +"""Go parser using regex patterns.""" + +import re +from typing import Optional, List +from .base import Parser, DocElement, ElementType, Parameter + + +class GoParser(Parser): + """Parser for Go source files.""" + + EXTENSIONS = [".go"] + + def __init__(self, file_path: str): + super().__init__(file_path) + + def get_language_name(self) -> str: + return "go" + + @classmethod + def supports_file(cls, file_path: str) -> bool: + ext = cls._get_extension(file_path) + return ext in cls.EXTENSIONS + + @staticmethod + def _get_extension(file_path: str) -> str: + import os + return os.path.splitext(file_path)[1].lower() + + def parse(self) -> list[DocElement]: + """Parse Go file and extract documentation elements.""" + self.content = self._read_content() + self.elements = [] + + self._parse_package_docstring() + self._parse_functions() + self._parse_types() + self._parse_constants() + self._parse_variables() + + return self.elements + + def _parse_package_docstring(self) -> None: + """Parse package documentation.""" + lines = self.content.split("\n") + package_name = "" + docstring_lines = [] + + for i, line in enumerate(lines): + stripped = line.strip() + + if stripped.startswith("package "): + package_name = stripped.split()[1] + + for j in range(i - 1, -1, -1): + prev_line = lines[j].strip() + if prev_line.startswith("//") and not prev_line.startswith("///"): + comment = prev_line[2:].strip() + if comment.startswith(" "): + comment = comment[1:] + docstring_lines.insert(0, comment) + else: + break + + if docstring_lines: + docstring = " ".join(docstring_lines) + elem = DocElement( + name=package_name, + element_type=ElementType.MODULE, + description=docstring, + full_docstring=docstring, + source_file=self.file_path, + ) + self.elements.append(elem) + break + + def _parse_functions(self) -> None: + """Parse function definitions.""" + pattern = r"^func\s+(?:\([^)]*\)\s*)?(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*(?:([^{]*))?\s*\{" + + for match in re.finditer(pattern, self.content, re.MULTILINE): + groups = match.groups() + name = groups[0] + params_str = groups[1] if len(groups) > 1 else "" + type_param = groups[2] if len(groups) > 2 else None + return_type = groups[3] if len(groups) > 3 else None + + params = self._parse_go_params(params_str) + + docstring = self._find_godoc_before(match.start()) + + elem = DocElement( + name=name, + element_type=ElementType.FUNCTION, + description=self._extract_summary(docstring), + full_docstring=docstring, + parameters=params, + return_type=self._clean_type(return_type) if return_type else None, + return_description=self._extract_godoc_tag(docstring, "return"), + raises=self._extract_godoc_panics(docstring), + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(name), + ) + self.elements.append(elem) + + def _parse_types(self) -> None: + """Parse type definitions (structs and interfaces).""" + patterns = [ + (r"^type\s+(\w+)\s+struct\s*\{([^}]*)\}", ElementType.STRUCT), + (r"^type\s+(\w+)\s+interface\s*\{([^}]*)\}", ElementType.INTERFACE), + ] + + for pattern, elem_type in patterns: + for match in re.finditer(pattern, self.content, re.MULTILINE): + name = match.group(1) + body = match.group(2) + + docstring = self._find_godoc_before(match.start()) + + elem = DocElement( + name=name, + element_type=elem_type, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(name), + ) + + if elem_type == ElementType.STRUCT: + elem.attributes = self._parse_struct_fields(body) + elif elem_type == ElementType.INTERFACE: + elem.attributes = self._parse_interface_methods(body) + + self.elements.append(elem) + + def _parse_struct_fields(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]: + """Parse struct fields.""" + fields = [] + for line in body.split("\n"): + line = line.strip() + if not line or line.startswith("//"): + continue + + match = re.match(r"(\w+)\s+([^\s;]+)(?:\s*`([^`]+)`)?", line) + if match: + field_name = match.group(1) + field_type = match.group(2).strip() + tags = match.group(3) + desc = f"Tag: {tags}" if tags else None + fields.append((field_name, field_type, desc)) + + return fields + + def _parse_interface_methods(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]: + """Parse interface methods.""" + methods = [] + for line in body.split("\n"): + line = line.strip() + if not line or line.startswith("//"): + continue + + match = re.match(r"(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*([^{]+)", line) + if match: + method_name = match.group(1) + params = match.group(2) + type_param = match.group(3) + returns = match.group(4) + signature = f"({params}) {type_param if type_param else ''} {returns}".strip() + methods.append((method_name, signature, None)) + + return methods + + def _parse_constants(self) -> None: + """Parse constant declarations.""" + const_pattern = r"^const\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))" + for match in re.finditer(const_pattern, self.content, re.MULTILINE): + groups = match.groups() + if groups[0]: + const_group = groups[0] + for line in const_group.split("\n"): + line = line.strip() + if not line or line.startswith("//"): + continue + const_match = re.match(r"(\w+)\s*=\s*(.+)", line) + if const_match: + self._create_const_element(const_match.group(1), const_match.group(2)) + else: + self._create_const_element(groups[1], groups[2]) + + def _create_const_element(self, name: str, value: str) -> None: + """Create a constant documentation element.""" + docstring = self._find_godoc_before(self.content.find(f"const {name}")) + elem = DocElement( + name=name, + element_type=ElementType.CONSTANT, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(self.content.find(f"const {name}")), + visibility=self._get_visibility(name), + ) + self.elements.append(elem) + + def _parse_variables(self) -> None: + """Parse variable declarations.""" + var_pattern = r"^var\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))" + for match in re.finditer(var_pattern, self.content, re.MULTILINE): + groups = match.groups() + if groups[0]: + var_group = groups[0] + for line in var_group.split("\n"): + line = line.strip() + if not line or line.startswith("//"): + continue + var_match = re.match(r"(\w+)(?:\s+\w+)?(?:\s*=\s*(.+))?", line) + if var_match: + self._create_var_element(var_match.group(1), var_match.group(2)) + else: + self._create_var_element(groups[1], groups[2]) + + def _create_var_element(self, name: str, value: Optional[str]) -> None: + """Create a variable documentation element.""" + pos = self.content.find(f"var {name}") + if pos == -1: + pos = self.content.find(name) + + docstring = self._find_godoc_before(pos) if pos != -1 else "" + elem = DocElement( + name=name, + element_type=ElementType.VARIABLE, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(pos) if pos != -1 else 0, + visibility=self._get_visibility(name), + ) + self.elements.append(elem) + + def _find_godoc_before(self, position: int) -> str: + """Find GoDoc comment before a position.""" + search_text = self.content[:position] + lines = search_text.split("\n") + + docstring_lines = [] + for line in reversed(lines): + stripped = line.strip() + if stripped.startswith("//"): + comment = stripped[2:].strip() + if comment.startswith(" "): + comment = comment[1:] + docstring_lines.insert(0, comment) + elif docstring_lines: + break + + return "\n".join(docstring_lines) + + def _extract_summary(self, docstring: str) -> str: + """Extract first line as summary.""" + if not docstring: + return "" + lines = docstring.strip().split("\n") + return lines[0].strip() if lines else "" + + def _extract_godoc_tag(self, docstring: str, tag: str) -> Optional[str]: + """Extract value of a specific GoDoc tag.""" + pattern = rf"{tag}\s+(.+?)(?:\n\n|\Z)" + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + return match.group(1).strip() + return None + + def _extract_godoc_panics(self, docstring: str) -> list[tuple[str, str]]: + """Extract panic information from GoDoc.""" + panics = [] + pattern = r"panics?\s+(.+?)(?:\n\n|\Z)" + for match in re.finditer(pattern, docstring, re.DOTALL | re.IGNORECASE): + content = match.group(1).strip() + if ":" in content: + parts = content.split(":", 1) + panics.append((parts[0].strip(), parts[1].strip())) + else: + panics.append(("panic", content)) + return panics + + def _parse_go_params(self, params_str: str) -> list[Parameter]: + """Parse Go function parameters.""" + params = [] + if not params_str.strip(): + return params + + param_groups = self._split_go_params(params_str) + for group in param_groups: + group = group.strip() + if not group: + continue + + parts = group.rsplit(None, 1) + if len(parts) == 2: + param_names = [p.strip() for p in parts[0].split(",")] + param_type = parts[1] + + for name in param_names: + params.append(Parameter(name=name, type_hint=self._clean_type(param_type))) + + return params + + def _split_go_params(self, params_str: str) -> list[str]: + """Split parameter groups respecting nested types.""" + parts = [] + current = "" + depth = 0 + for char in params_str: + if char == "[": + depth += 1 + current += char + elif char == "]": + depth -= 1 + current += char + elif char == "," and depth == 0: + parts.append(current) + current = "" + else: + current += char + if current.strip(): + parts.append(current) + return parts + + def _clean_type(self, type_str: Optional[str]) -> Optional[str]: + """Clean type string.""" + if not type_str: + return None + return type_str.strip() + + def _get_visibility(self, name: str) -> str: + """Determine visibility based on name.""" + if name and name[0].isupper(): + return "public" + return "private" + + def _get_line_number(self, position: int) -> int: + """Get line number from position.""" + return self.content[:position].count("\n") + 1