This commit is contained in:
343
.code_doc_cli/parsers/go_parser.py
Normal file
343
.code_doc_cli/parsers/go_parser.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""Go parser using regex patterns."""
|
||||
|
||||
import re
|
||||
from typing import Optional, List
|
||||
from .base import Parser, DocElement, ElementType, Parameter
|
||||
|
||||
|
||||
class GoParser(Parser):
|
||||
"""Parser for Go source files."""
|
||||
|
||||
EXTENSIONS = [".go"]
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
super().__init__(file_path)
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "go"
|
||||
|
||||
@classmethod
|
||||
def supports_file(cls, file_path: str) -> bool:
|
||||
ext = cls._get_extension(file_path)
|
||||
return ext in cls.EXTENSIONS
|
||||
|
||||
@staticmethod
|
||||
def _get_extension(file_path: str) -> str:
|
||||
import os
|
||||
return os.path.splitext(file_path)[1].lower()
|
||||
|
||||
def parse(self) -> list[DocElement]:
|
||||
"""Parse Go file and extract documentation elements."""
|
||||
self.content = self._read_content()
|
||||
self.elements = []
|
||||
|
||||
self._parse_package_docstring()
|
||||
self._parse_functions()
|
||||
self._parse_types()
|
||||
self._parse_constants()
|
||||
self._parse_variables()
|
||||
|
||||
return self.elements
|
||||
|
||||
def _parse_package_docstring(self) -> None:
|
||||
"""Parse package documentation."""
|
||||
lines = self.content.split("\n")
|
||||
package_name = ""
|
||||
docstring_lines = []
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("package "):
|
||||
package_name = stripped.split()[1]
|
||||
|
||||
for j in range(i - 1, -1, -1):
|
||||
prev_line = lines[j].strip()
|
||||
if prev_line.startswith("//") and not prev_line.startswith("///"):
|
||||
comment = prev_line[2:].strip()
|
||||
if comment.startswith(" "):
|
||||
comment = comment[1:]
|
||||
docstring_lines.insert(0, comment)
|
||||
else:
|
||||
break
|
||||
|
||||
if docstring_lines:
|
||||
docstring = " ".join(docstring_lines)
|
||||
elem = DocElement(
|
||||
name=package_name,
|
||||
element_type=ElementType.MODULE,
|
||||
description=docstring,
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
)
|
||||
self.elements.append(elem)
|
||||
break
|
||||
|
||||
def _parse_functions(self) -> None:
|
||||
"""Parse function definitions."""
|
||||
pattern = r"^func\s+(?:\([^)]*\)\s*)?(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*(?:([^{]*))?\s*\{"
|
||||
|
||||
for match in re.finditer(pattern, self.content, re.MULTILINE):
|
||||
groups = match.groups()
|
||||
name = groups[0]
|
||||
params_str = groups[1] if len(groups) > 1 else ""
|
||||
type_param = groups[2] if len(groups) > 2 else None
|
||||
return_type = groups[3] if len(groups) > 3 else None
|
||||
|
||||
params = self._parse_go_params(params_str)
|
||||
|
||||
docstring = self._find_godoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=ElementType.FUNCTION,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
parameters=params,
|
||||
return_type=self._clean_type(return_type) if return_type else None,
|
||||
return_description=self._extract_godoc_tag(docstring, "return"),
|
||||
raises=self._extract_godoc_panics(docstring),
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_types(self) -> None:
|
||||
"""Parse type definitions (structs and interfaces)."""
|
||||
patterns = [
|
||||
(r"^type\s+(\w+)\s+struct\s*\{([^}]*)\}", ElementType.STRUCT),
|
||||
(r"^type\s+(\w+)\s+interface\s*\{([^}]*)\}", ElementType.INTERFACE),
|
||||
]
|
||||
|
||||
for pattern, elem_type in patterns:
|
||||
for match in re.finditer(pattern, self.content, re.MULTILINE):
|
||||
name = match.group(1)
|
||||
body = match.group(2)
|
||||
|
||||
docstring = self._find_godoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=elem_type,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
|
||||
if elem_type == ElementType.STRUCT:
|
||||
elem.attributes = self._parse_struct_fields(body)
|
||||
elif elem_type == ElementType.INTERFACE:
|
||||
elem.attributes = self._parse_interface_methods(body)
|
||||
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_struct_fields(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
|
||||
"""Parse struct fields."""
|
||||
fields = []
|
||||
for line in body.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//"):
|
||||
continue
|
||||
|
||||
match = re.match(r"(\w+)\s+([^\s;]+)(?:\s*`([^`]+)`)?", line)
|
||||
if match:
|
||||
field_name = match.group(1)
|
||||
field_type = match.group(2).strip()
|
||||
tags = match.group(3)
|
||||
desc = f"Tag: {tags}" if tags else None
|
||||
fields.append((field_name, field_type, desc))
|
||||
|
||||
return fields
|
||||
|
||||
def _parse_interface_methods(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
|
||||
"""Parse interface methods."""
|
||||
methods = []
|
||||
for line in body.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//"):
|
||||
continue
|
||||
|
||||
match = re.match(r"(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*([^{]+)", line)
|
||||
if match:
|
||||
method_name = match.group(1)
|
||||
params = match.group(2)
|
||||
type_param = match.group(3)
|
||||
returns = match.group(4)
|
||||
signature = f"({params}) {type_param if type_param else ''} {returns}".strip()
|
||||
methods.append((method_name, signature, None))
|
||||
|
||||
return methods
|
||||
|
||||
def _parse_constants(self) -> None:
|
||||
"""Parse constant declarations."""
|
||||
const_pattern = r"^const\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
|
||||
for match in re.finditer(const_pattern, self.content, re.MULTILINE):
|
||||
groups = match.groups()
|
||||
if groups[0]:
|
||||
const_group = groups[0]
|
||||
for line in const_group.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//"):
|
||||
continue
|
||||
const_match = re.match(r"(\w+)\s*=\s*(.+)", line)
|
||||
if const_match:
|
||||
self._create_const_element(const_match.group(1), const_match.group(2))
|
||||
else:
|
||||
self._create_const_element(groups[1], groups[2])
|
||||
|
||||
def _create_const_element(self, name: str, value: str) -> None:
|
||||
"""Create a constant documentation element."""
|
||||
docstring = self._find_godoc_before(self.content.find(f"const {name}"))
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=ElementType.CONSTANT,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(self.content.find(f"const {name}")),
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_variables(self) -> None:
|
||||
"""Parse variable declarations."""
|
||||
var_pattern = r"^var\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
|
||||
for match in re.finditer(var_pattern, self.content, re.MULTILINE):
|
||||
groups = match.groups()
|
||||
if groups[0]:
|
||||
var_group = groups[0]
|
||||
for line in var_group.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//"):
|
||||
continue
|
||||
var_match = re.match(r"(\w+)(?:\s+\w+)?(?:\s*=\s*(.+))?", line)
|
||||
if var_match:
|
||||
self._create_var_element(var_match.group(1), var_match.group(2))
|
||||
else:
|
||||
self._create_var_element(groups[1], groups[2])
|
||||
|
||||
def _create_var_element(self, name: str, value: Optional[str]) -> None:
|
||||
"""Create a variable documentation element."""
|
||||
pos = self.content.find(f"var {name}")
|
||||
if pos == -1:
|
||||
pos = self.content.find(name)
|
||||
|
||||
docstring = self._find_godoc_before(pos) if pos != -1 else ""
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=ElementType.VARIABLE,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(pos) if pos != -1 else 0,
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _find_godoc_before(self, position: int) -> str:
|
||||
"""Find GoDoc comment before a position."""
|
||||
search_text = self.content[:position]
|
||||
lines = search_text.split("\n")
|
||||
|
||||
docstring_lines = []
|
||||
for line in reversed(lines):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("//"):
|
||||
comment = stripped[2:].strip()
|
||||
if comment.startswith(" "):
|
||||
comment = comment[1:]
|
||||
docstring_lines.insert(0, comment)
|
||||
elif docstring_lines:
|
||||
break
|
||||
|
||||
return "\n".join(docstring_lines)
|
||||
|
||||
def _extract_summary(self, docstring: str) -> str:
|
||||
"""Extract first line as summary."""
|
||||
if not docstring:
|
||||
return ""
|
||||
lines = docstring.strip().split("\n")
|
||||
return lines[0].strip() if lines else ""
|
||||
|
||||
def _extract_godoc_tag(self, docstring: str, tag: str) -> Optional[str]:
|
||||
"""Extract value of a specific GoDoc tag."""
|
||||
pattern = rf"{tag}\s+(.+?)(?:\n\n|\Z)"
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return None
|
||||
|
||||
def _extract_godoc_panics(self, docstring: str) -> list[tuple[str, str]]:
|
||||
"""Extract panic information from GoDoc."""
|
||||
panics = []
|
||||
pattern = r"panics?\s+(.+?)(?:\n\n|\Z)"
|
||||
for match in re.finditer(pattern, docstring, re.DOTALL | re.IGNORECASE):
|
||||
content = match.group(1).strip()
|
||||
if ":" in content:
|
||||
parts = content.split(":", 1)
|
||||
panics.append((parts[0].strip(), parts[1].strip()))
|
||||
else:
|
||||
panics.append(("panic", content))
|
||||
return panics
|
||||
|
||||
def _parse_go_params(self, params_str: str) -> list[Parameter]:
|
||||
"""Parse Go function parameters."""
|
||||
params = []
|
||||
if not params_str.strip():
|
||||
return params
|
||||
|
||||
param_groups = self._split_go_params(params_str)
|
||||
for group in param_groups:
|
||||
group = group.strip()
|
||||
if not group:
|
||||
continue
|
||||
|
||||
parts = group.rsplit(None, 1)
|
||||
if len(parts) == 2:
|
||||
param_names = [p.strip() for p in parts[0].split(",")]
|
||||
param_type = parts[1]
|
||||
|
||||
for name in param_names:
|
||||
params.append(Parameter(name=name, type_hint=self._clean_type(param_type)))
|
||||
|
||||
return params
|
||||
|
||||
def _split_go_params(self, params_str: str) -> list[str]:
|
||||
"""Split parameter groups respecting nested types."""
|
||||
parts = []
|
||||
current = ""
|
||||
depth = 0
|
||||
for char in params_str:
|
||||
if char == "[":
|
||||
depth += 1
|
||||
current += char
|
||||
elif char == "]":
|
||||
depth -= 1
|
||||
current += char
|
||||
elif char == "," and depth == 0:
|
||||
parts.append(current)
|
||||
current = ""
|
||||
else:
|
||||
current += char
|
||||
if current.strip():
|
||||
parts.append(current)
|
||||
return parts
|
||||
|
||||
def _clean_type(self, type_str: Optional[str]) -> Optional[str]:
|
||||
"""Clean type string."""
|
||||
if not type_str:
|
||||
return None
|
||||
return type_str.strip()
|
||||
|
||||
def _get_visibility(self, name: str) -> str:
|
||||
"""Determine visibility based on name."""
|
||||
if name and name[0].isupper():
|
||||
return "public"
|
||||
return "private"
|
||||
|
||||
def _get_line_number(self, position: int) -> int:
|
||||
"""Get line number from position."""
|
||||
return self.content[:position].count("\n") + 1
|
||||
Reference in New Issue
Block a user