"""Go parser using regex patterns.""" import re from typing import Optional, List from .base import Parser, DocElement, ElementType, Parameter class GoParser(Parser): """Parser for Go source files.""" EXTENSIONS = [".go"] def __init__(self, file_path: str): super().__init__(file_path) def get_language_name(self) -> str: return "go" @classmethod def supports_file(cls, file_path: str) -> bool: ext = cls._get_extension(file_path) return ext in cls.EXTENSIONS @staticmethod def _get_extension(file_path: str) -> str: import os return os.path.splitext(file_path)[1].lower() def parse(self) -> list[DocElement]: """Parse Go file and extract documentation elements.""" self.content = self._read_content() self.elements = [] self._parse_package_docstring() self._parse_functions() self._parse_types() self._parse_constants() self._parse_variables() return self.elements def _parse_package_docstring(self) -> None: """Parse package documentation.""" lines = self.content.split("\n") package_name = "" docstring_lines = [] for i, line in enumerate(lines): stripped = line.strip() if stripped.startswith("package "): package_name = stripped.split()[1] for j in range(i - 1, -1, -1): prev_line = lines[j].strip() if prev_line.startswith("//") and not prev_line.startswith("///"): comment = prev_line[2:].strip() if comment.startswith(" "): comment = comment[1:] docstring_lines.insert(0, comment) else: break if docstring_lines: docstring = " ".join(docstring_lines) elem = DocElement( name=package_name, element_type=ElementType.MODULE, description=docstring, full_docstring=docstring, source_file=self.file_path, ) self.elements.append(elem) break def _parse_functions(self) -> None: """Parse function definitions.""" pattern = r"^func\s+(?:\([^)]*\)\s*)?(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*(?:([^{]*))?\s*\{" for match in re.finditer(pattern, self.content, re.MULTILINE): groups = match.groups() name = groups[0] params_str = groups[1] if len(groups) > 1 else "" type_param = groups[2] if len(groups) > 2 else None return_type = groups[3] if len(groups) > 3 else None params = self._parse_go_params(params_str) docstring = self._find_godoc_before(match.start()) elem = DocElement( name=name, element_type=ElementType.FUNCTION, description=self._extract_summary(docstring), full_docstring=docstring, parameters=params, return_type=self._clean_type(return_type) if return_type else None, return_description=self._extract_godoc_tag(docstring, "return"), raises=self._extract_godoc_panics(docstring), source_file=self.file_path, line_number=self._get_line_number(match.start()), visibility=self._get_visibility(name), ) self.elements.append(elem) def _parse_types(self) -> None: """Parse type definitions (structs and interfaces).""" patterns = [ (r"^type\s+(\w+)\s+struct\s*\{([^}]*)\}", ElementType.STRUCT), (r"^type\s+(\w+)\s+interface\s*\{([^}]*)\}", ElementType.INTERFACE), ] for pattern, elem_type in patterns: for match in re.finditer(pattern, self.content, re.MULTILINE): name = match.group(1) body = match.group(2) docstring = self._find_godoc_before(match.start()) elem = DocElement( name=name, element_type=elem_type, description=self._extract_summary(docstring), full_docstring=docstring, source_file=self.file_path, line_number=self._get_line_number(match.start()), visibility=self._get_visibility(name), ) if elem_type == ElementType.STRUCT: elem.attributes = self._parse_struct_fields(body) elif elem_type == ElementType.INTERFACE: elem.attributes = self._parse_interface_methods(body) self.elements.append(elem) def _parse_struct_fields(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]: """Parse struct fields.""" fields = [] for line in body.split("\n"): line = line.strip() if not line or line.startswith("//"): continue match = re.match(r"(\w+)\s+([^\s;]+)(?:\s*`([^`]+)`)?", line) if match: field_name = match.group(1) field_type = match.group(2).strip() tags = match.group(3) desc = f"Tag: {tags}" if tags else None fields.append((field_name, field_type, desc)) return fields def _parse_interface_methods(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]: """Parse interface methods.""" methods = [] for line in body.split("\n"): line = line.strip() if not line or line.startswith("//"): continue match = re.match(r"(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*([^{]+)", line) if match: method_name = match.group(1) params = match.group(2) type_param = match.group(3) returns = match.group(4) signature = f"({params}) {type_param if type_param else ''} {returns}".strip() methods.append((method_name, signature, None)) return methods def _parse_constants(self) -> None: """Parse constant declarations.""" const_pattern = r"^const\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))" for match in re.finditer(const_pattern, self.content, re.MULTILINE): groups = match.groups() if groups[0]: const_group = groups[0] for line in const_group.split("\n"): line = line.strip() if not line or line.startswith("//"): continue const_match = re.match(r"(\w+)\s*=\s*(.+)", line) if const_match: self._create_const_element(const_match.group(1), const_match.group(2)) else: self._create_const_element(groups[1], groups[2]) def _create_const_element(self, name: str, value: str) -> None: """Create a constant documentation element.""" docstring = self._find_godoc_before(self.content.find(f"const {name}")) elem = DocElement( name=name, element_type=ElementType.CONSTANT, description=self._extract_summary(docstring), full_docstring=docstring, source_file=self.file_path, line_number=self._get_line_number(self.content.find(f"const {name}")), visibility=self._get_visibility(name), ) self.elements.append(elem) def _parse_variables(self) -> None: """Parse variable declarations.""" var_pattern = r"^var\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))" for match in re.finditer(var_pattern, self.content, re.MULTILINE): groups = match.groups() if groups[0]: var_group = groups[0] for line in var_group.split("\n"): line = line.strip() if not line or line.startswith("//"): continue var_match = re.match(r"(\w+)(?:\s+\w+)?(?:\s*=\s*(.+))?", line) if var_match: self._create_var_element(var_match.group(1), var_match.group(2)) else: self._create_var_element(groups[1], groups[2]) def _create_var_element(self, name: str, value: Optional[str]) -> None: """Create a variable documentation element.""" pos = self.content.find(f"var {name}") if pos == -1: pos = self.content.find(name) docstring = self._find_godoc_before(pos) if pos != -1 else "" elem = DocElement( name=name, element_type=ElementType.VARIABLE, description=self._extract_summary(docstring), full_docstring=docstring, source_file=self.file_path, line_number=self._get_line_number(pos) if pos != -1 else 0, visibility=self._get_visibility(name), ) self.elements.append(elem) def _find_godoc_before(self, position: int) -> str: """Find GoDoc comment before a position.""" search_text = self.content[:position] lines = search_text.split("\n") docstring_lines = [] for line in reversed(lines): stripped = line.strip() if stripped.startswith("//"): comment = stripped[2:].strip() if comment.startswith(" "): comment = comment[1:] docstring_lines.insert(0, comment) elif docstring_lines: break return "\n".join(docstring_lines) def _extract_summary(self, docstring: str) -> str: """Extract first line as summary.""" if not docstring: return "" lines = docstring.strip().split("\n") return lines[0].strip() if lines else "" def _extract_godoc_tag(self, docstring: str, tag: str) -> Optional[str]: """Extract value of a specific GoDoc tag.""" pattern = rf"{tag}\s+(.+?)(?:\n\n|\Z)" match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) if match: return match.group(1).strip() return None def _extract_godoc_panics(self, docstring: str) -> list[tuple[str, str]]: """Extract panic information from GoDoc.""" panics = [] pattern = r"panics?\s+(.+?)(?:\n\n|\Z)" for match in re.finditer(pattern, docstring, re.DOTALL | re.IGNORECASE): content = match.group(1).strip() if ":" in content: parts = content.split(":", 1) panics.append((parts[0].strip(), parts[1].strip())) else: panics.append(("panic", content)) return panics def _parse_go_params(self, params_str: str) -> list[Parameter]: """Parse Go function parameters.""" params = [] if not params_str.strip(): return params param_groups = self._split_go_params(params_str) for group in param_groups: group = group.strip() if not group: continue parts = group.rsplit(None, 1) if len(parts) == 2: param_names = [p.strip() for p in parts[0].split(",")] param_type = parts[1] for name in param_names: params.append(Parameter(name=name, type_hint=self._clean_type(param_type))) return params def _split_go_params(self, params_str: str) -> list[str]: """Split parameter groups respecting nested types.""" parts = [] current = "" depth = 0 for char in params_str: if char == "[": depth += 1 current += char elif char == "]": depth -= 1 current += char elif char == "," and depth == 0: parts.append(current) current = "" else: current += char if current.strip(): parts.append(current) return parts def _clean_type(self, type_str: Optional[str]) -> Optional[str]: """Clean type string.""" if not type_str: return None return type_str.strip() def _get_visibility(self, name: str) -> str: """Determine visibility based on name.""" if name and name[0].isupper(): return "public" return "private" def _get_line_number(self, position: int) -> int: """Get line number from position.""" return self.content[:position].count("\n") + 1