Files
7000pctAUTO 24b299a6dc
Some checks failed
CI / test (push) Has been cancelled
Add TypeScript/Go parsers and utils modules
2026-01-29 16:53:38 +00:00

344 lines
13 KiB
Python

"""Go parser using regex patterns."""
import re
from typing import Optional, List
from .base import Parser, DocElement, ElementType, Parameter
class GoParser(Parser):
"""Parser for Go source files."""
EXTENSIONS = [".go"]
def __init__(self, file_path: str):
super().__init__(file_path)
def get_language_name(self) -> str:
return "go"
@classmethod
def supports_file(cls, file_path: str) -> bool:
ext = cls._get_extension(file_path)
return ext in cls.EXTENSIONS
@staticmethod
def _get_extension(file_path: str) -> str:
import os
return os.path.splitext(file_path)[1].lower()
def parse(self) -> list[DocElement]:
"""Parse Go file and extract documentation elements."""
self.content = self._read_content()
self.elements = []
self._parse_package_docstring()
self._parse_functions()
self._parse_types()
self._parse_constants()
self._parse_variables()
return self.elements
def _parse_package_docstring(self) -> None:
"""Parse package documentation."""
lines = self.content.split("\n")
package_name = ""
docstring_lines = []
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("package "):
package_name = stripped.split()[1]
for j in range(i - 1, -1, -1):
prev_line = lines[j].strip()
if prev_line.startswith("//") and not prev_line.startswith("///"):
comment = prev_line[2:].strip()
if comment.startswith(" "):
comment = comment[1:]
docstring_lines.insert(0, comment)
else:
break
if docstring_lines:
docstring = " ".join(docstring_lines)
elem = DocElement(
name=package_name,
element_type=ElementType.MODULE,
description=docstring,
full_docstring=docstring,
source_file=self.file_path,
)
self.elements.append(elem)
break
def _parse_functions(self) -> None:
"""Parse function definitions."""
pattern = r"^func\s+(?:\([^)]*\)\s*)?(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*(?:([^{]*))?\s*\{"
for match in re.finditer(pattern, self.content, re.MULTILINE):
groups = match.groups()
name = groups[0]
params_str = groups[1] if len(groups) > 1 else ""
type_param = groups[2] if len(groups) > 2 else None
return_type = groups[3] if len(groups) > 3 else None
params = self._parse_go_params(params_str)
docstring = self._find_godoc_before(match.start())
elem = DocElement(
name=name,
element_type=ElementType.FUNCTION,
description=self._extract_summary(docstring),
full_docstring=docstring,
parameters=params,
return_type=self._clean_type(return_type) if return_type else None,
return_description=self._extract_godoc_tag(docstring, "return"),
raises=self._extract_godoc_panics(docstring),
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(name),
)
self.elements.append(elem)
def _parse_types(self) -> None:
"""Parse type definitions (structs and interfaces)."""
patterns = [
(r"^type\s+(\w+)\s+struct\s*\{([^}]*)\}", ElementType.STRUCT),
(r"^type\s+(\w+)\s+interface\s*\{([^}]*)\}", ElementType.INTERFACE),
]
for pattern, elem_type in patterns:
for match in re.finditer(pattern, self.content, re.MULTILINE):
name = match.group(1)
body = match.group(2)
docstring = self._find_godoc_before(match.start())
elem = DocElement(
name=name,
element_type=elem_type,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(name),
)
if elem_type == ElementType.STRUCT:
elem.attributes = self._parse_struct_fields(body)
elif elem_type == ElementType.INTERFACE:
elem.attributes = self._parse_interface_methods(body)
self.elements.append(elem)
def _parse_struct_fields(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
"""Parse struct fields."""
fields = []
for line in body.split("\n"):
line = line.strip()
if not line or line.startswith("//"):
continue
match = re.match(r"(\w+)\s+([^\s;]+)(?:\s*`([^`]+)`)?", line)
if match:
field_name = match.group(1)
field_type = match.group(2).strip()
tags = match.group(3)
desc = f"Tag: {tags}" if tags else None
fields.append((field_name, field_type, desc))
return fields
def _parse_interface_methods(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
"""Parse interface methods."""
methods = []
for line in body.split("\n"):
line = line.strip()
if not line or line.startswith("//"):
continue
match = re.match(r"(\w+)\s*\(([^)]*)\)\s*(?:\[[^\]]*\])?\s*([^{]+)", line)
if match:
method_name = match.group(1)
params = match.group(2)
type_param = match.group(3)
returns = match.group(4)
signature = f"({params}) {type_param if type_param else ''} {returns}".strip()
methods.append((method_name, signature, None))
return methods
def _parse_constants(self) -> None:
"""Parse constant declarations."""
const_pattern = r"^const\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
for match in re.finditer(const_pattern, self.content, re.MULTILINE):
groups = match.groups()
if groups[0]:
const_group = groups[0]
for line in const_group.split("\n"):
line = line.strip()
if not line or line.startswith("//"):
continue
const_match = re.match(r"(\w+)\s*=\s*(.+)", line)
if const_match:
self._create_const_element(const_match.group(1), const_match.group(2))
else:
self._create_const_element(groups[1], groups[2])
def _create_const_element(self, name: str, value: str) -> None:
"""Create a constant documentation element."""
docstring = self._find_godoc_before(self.content.find(f"const {name}"))
elem = DocElement(
name=name,
element_type=ElementType.CONSTANT,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(self.content.find(f"const {name}")),
visibility=self._get_visibility(name),
)
self.elements.append(elem)
def _parse_variables(self) -> None:
"""Parse variable declarations."""
var_pattern = r"^var\s+(?:\(([^)]*)\)|(\w+)\s*=\s*(.+?)(?:\s*$|\s*,\s*))"
for match in re.finditer(var_pattern, self.content, re.MULTILINE):
groups = match.groups()
if groups[0]:
var_group = groups[0]
for line in var_group.split("\n"):
line = line.strip()
if not line or line.startswith("//"):
continue
var_match = re.match(r"(\w+)(?:\s+\w+)?(?:\s*=\s*(.+))?", line)
if var_match:
self._create_var_element(var_match.group(1), var_match.group(2))
else:
self._create_var_element(groups[1], groups[2])
def _create_var_element(self, name: str, value: Optional[str]) -> None:
"""Create a variable documentation element."""
pos = self.content.find(f"var {name}")
if pos == -1:
pos = self.content.find(name)
docstring = self._find_godoc_before(pos) if pos != -1 else ""
elem = DocElement(
name=name,
element_type=ElementType.VARIABLE,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(pos) if pos != -1 else 0,
visibility=self._get_visibility(name),
)
self.elements.append(elem)
def _find_godoc_before(self, position: int) -> str:
"""Find GoDoc comment before a position."""
search_text = self.content[:position]
lines = search_text.split("\n")
docstring_lines = []
for line in reversed(lines):
stripped = line.strip()
if stripped.startswith("//"):
comment = stripped[2:].strip()
if comment.startswith(" "):
comment = comment[1:]
docstring_lines.insert(0, comment)
elif docstring_lines:
break
return "\n".join(docstring_lines)
def _extract_summary(self, docstring: str) -> str:
"""Extract first line as summary."""
if not docstring:
return ""
lines = docstring.strip().split("\n")
return lines[0].strip() if lines else ""
def _extract_godoc_tag(self, docstring: str, tag: str) -> Optional[str]:
"""Extract value of a specific GoDoc tag."""
pattern = rf"{tag}\s+(.+?)(?:\n\n|\Z)"
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1).strip()
return None
def _extract_godoc_panics(self, docstring: str) -> list[tuple[str, str]]:
"""Extract panic information from GoDoc."""
panics = []
pattern = r"panics?\s+(.+?)(?:\n\n|\Z)"
for match in re.finditer(pattern, docstring, re.DOTALL | re.IGNORECASE):
content = match.group(1).strip()
if ":" in content:
parts = content.split(":", 1)
panics.append((parts[0].strip(), parts[1].strip()))
else:
panics.append(("panic", content))
return panics
def _parse_go_params(self, params_str: str) -> list[Parameter]:
"""Parse Go function parameters."""
params = []
if not params_str.strip():
return params
param_groups = self._split_go_params(params_str)
for group in param_groups:
group = group.strip()
if not group:
continue
parts = group.rsplit(None, 1)
if len(parts) == 2:
param_names = [p.strip() for p in parts[0].split(",")]
param_type = parts[1]
for name in param_names:
params.append(Parameter(name=name, type_hint=self._clean_type(param_type)))
return params
def _split_go_params(self, params_str: str) -> list[str]:
"""Split parameter groups respecting nested types."""
parts = []
current = ""
depth = 0
for char in params_str:
if char == "[":
depth += 1
current += char
elif char == "]":
depth -= 1
current += char
elif char == "," and depth == 0:
parts.append(current)
current = ""
else:
current += char
if current.strip():
parts.append(current)
return parts
def _clean_type(self, type_str: Optional[str]) -> Optional[str]:
"""Clean type string."""
if not type_str:
return None
return type_str.strip()
def _get_visibility(self, name: str) -> str:
"""Determine visibility based on name."""
if name and name[0].isupper():
return "public"
return "private"
def _get_line_number(self, position: int) -> int:
"""Get line number from position."""
return self.content[:position].count("\n") + 1