Add parsers module
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 02:38:29 +00:00
parent c289a898eb
commit f9683136be

186
src/parsers/go.py Normal file
View File

@@ -0,0 +1,186 @@
from pathlib import Path
import re
from src.parsers.base import BaseParser, ParserResult, Entity, EntityType
class GoParser(BaseParser):
SUPPORTED_EXTENSIONS = [".go"]
def __init__(self):
pass
def parse(self, file_path: Path, content: str) -> ParserResult:
result = ParserResult(file_path=file_path, language="go")
try:
result.entities = self.extract_entities(content, file_path)
result.imports = self.extract_imports(content)
return result
except Exception as e:
result.errors.append(f"Parse error: {str(e)}")
return result
def extract_entities(self, content: str, file_path: Path) -> list[Entity]:
entities = []
entities.extend(self._extract_functions(content, file_path))
entities.extend(self._extract_structs(content, file_path))
entities.extend(self._extract_interfaces(content, file_path))
return entities
def _extract_functions(self, content: str, file_path: Path) -> list[Entity]:
functions = []
lines = content.split('\n')
pattern = r'^func\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*([\w\[\]]+)?\s*\{?'
for i, line in enumerate(lines):
line = line.strip()
match = re.match(pattern, line)
if match:
func_name = match.group(1)
params = match.group(2)
start_line = i + 1
end_line = self._find_braces_end(lines, i)
code_lines = lines[i:end_line]
code = '\n'.join(code_lines)
entity = Entity(
name=func_name,
entity_type=EntityType.FUNCTION,
file_path=file_path,
start_line=start_line,
end_line=end_line,
code=code,
attributes={"parameters": self._parse_go_params(params)},
calls=self._extract_function_calls(code),
)
functions.append(entity)
return functions
def _extract_structs(self, content: str, file_path: Path) -> list[Entity]:
structs = []
lines = content.split('\n')
for i, line in enumerate(lines):
line = line.strip()
match = re.match(r'type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+struct\s*\{', line)
if match:
struct_name = match.group(1)
start_line = i + 1
end_line = self._find_braces_end(lines, i)
code_lines = lines[i:end_line]
code = '\n'.join(code_lines)
entity = Entity(
name=struct_name,
entity_type=EntityType.CLASS,
file_path=file_path,
start_line=start_line,
end_line=end_line,
code=code,
)
structs.append(entity)
return structs
def _extract_interfaces(self, content: str, file_path: Path) -> list[Entity]:
interfaces = []
lines = content.split('\n')
for i, line in enumerate(lines):
line = line.strip()
match = re.match(r'type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+interface\s*\{', line)
if match:
iface_name = match.group(1)
start_line = i + 1
end_line = self._find_braces_end(lines, i)
code_lines = lines[i:end_line]
code = '\n'.join(code_lines)
entity = Entity(
name=iface_name,
entity_type=EntityType.CLASS,
file_path=file_path,
start_line=start_line,
end_line=end_line,
code=code,
)
interfaces.append(entity)
return interfaces
def _parse_go_params(self, params: str) -> list[str]:
param_list = []
for param in params.split(','):
param = param.strip()
if param:
parts = param.split()
if len(parts) >= 2:
param_list.append(parts[0])
return param_list
def _extract_function_calls(self, code: str) -> list[str]:
calls = []
pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)'
for match in re.finditer(pattern, code):
func_name = match.group(1)
if func_name not in ['if', 'for', 'switch', 'return', 'panic', 'print', 'println', 'printf']:
calls.append(func_name)
return list(set(calls))
def _find_braces_end(self, lines: list[str], start_index: int) -> int:
brace_count = 0
in_string = False
string_char = None
for i, line in enumerate(lines[start_index:], start_index):
for j, char in enumerate(line):
if char in ['"', "'"] and (j == 0 or line[j-1] != '\\'):
if not in_string:
in_string = True
string_char = char
elif char == string_char:
in_string = False
string_char = None
elif not in_string and char == '{':
brace_count += 1
elif not in_string and char == '}':
brace_count -= 1
if brace_count == 0:
return i + 1
return len(lines)
def extract_imports(self, content: str) -> list[str]:
imports = []
lines = content.split('\n')
import_block = False
import_lines = []
for line in lines:
if 'import (' in line:
import_block = True
continue
if import_block:
if line.strip() == ')':
break
match = re.search(r'"([^"]+)"', line)
if match:
import_lines.append(match.group(1))
else:
match = re.search(r'import\s+"([^"]+)"', line)
if match:
import_lines.append(match.group(1))
imports.extend(import_lines)
return imports
def extract_calls(self, content: str) -> list[str]:
return self._extract_function_calls(content)