From f9683136be48ca588ae9a58cdad7cdfc63c7b2d4 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Mon, 2 Feb 2026 02:38:29 +0000 Subject: [PATCH] Add parsers module --- src/parsers/go.py | 186 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 src/parsers/go.py diff --git a/src/parsers/go.py b/src/parsers/go.py new file mode 100644 index 0000000..f14c146 --- /dev/null +++ b/src/parsers/go.py @@ -0,0 +1,186 @@ +from pathlib import Path +import re + +from src.parsers.base import BaseParser, ParserResult, Entity, EntityType + + +class GoParser(BaseParser): + SUPPORTED_EXTENSIONS = [".go"] + + def __init__(self): + pass + + def parse(self, file_path: Path, content: str) -> ParserResult: + result = ParserResult(file_path=file_path, language="go") + try: + result.entities = self.extract_entities(content, file_path) + result.imports = self.extract_imports(content) + return result + except Exception as e: + result.errors.append(f"Parse error: {str(e)}") + return result + + def extract_entities(self, content: str, file_path: Path) -> list[Entity]: + entities = [] + entities.extend(self._extract_functions(content, file_path)) + entities.extend(self._extract_structs(content, file_path)) + entities.extend(self._extract_interfaces(content, file_path)) + return entities + + def _extract_functions(self, content: str, file_path: Path) -> list[Entity]: + functions = [] + lines = content.split('\n') + pattern = r'^func\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*([\w\[\]]+)?\s*\{?' + + for i, line in enumerate(lines): + line = line.strip() + match = re.match(pattern, line) + if match: + func_name = match.group(1) + params = match.group(2) + + start_line = i + 1 + end_line = self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=func_name, + entity_type=EntityType.FUNCTION, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + attributes={"parameters": self._parse_go_params(params)}, + calls=self._extract_function_calls(code), + ) + functions.append(entity) + + return functions + + def _extract_structs(self, content: str, file_path: Path) -> list[Entity]: + structs = [] + lines = content.split('\n') + + for i, line in enumerate(lines): + line = line.strip() + match = re.match(r'type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+struct\s*\{', line) + if match: + struct_name = match.group(1) + + start_line = i + 1 + end_line = self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=struct_name, + entity_type=EntityType.CLASS, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + ) + structs.append(entity) + + return structs + + def _extract_interfaces(self, content: str, file_path: Path) -> list[Entity]: + interfaces = [] + lines = content.split('\n') + + for i, line in enumerate(lines): + line = line.strip() + match = re.match(r'type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+interface\s*\{', line) + if match: + iface_name = match.group(1) + + start_line = i + 1 + end_line = self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=iface_name, + entity_type=EntityType.CLASS, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + ) + interfaces.append(entity) + + return interfaces + + def _parse_go_params(self, params: str) -> list[str]: + param_list = [] + for param in params.split(','): + param = param.strip() + if param: + parts = param.split() + if len(parts) >= 2: + param_list.append(parts[0]) + return param_list + + def _extract_function_calls(self, code: str) -> list[str]: + calls = [] + pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)' + for match in re.finditer(pattern, code): + func_name = match.group(1) + if func_name not in ['if', 'for', 'switch', 'return', 'panic', 'print', 'println', 'printf']: + calls.append(func_name) + return list(set(calls)) + + def _find_braces_end(self, lines: list[str], start_index: int) -> int: + brace_count = 0 + in_string = False + string_char = None + + for i, line in enumerate(lines[start_index:], start_index): + for j, char in enumerate(line): + if char in ['"', "'"] and (j == 0 or line[j-1] != '\\'): + if not in_string: + in_string = True + string_char = char + elif char == string_char: + in_string = False + string_char = None + elif not in_string and char == '{': + brace_count += 1 + elif not in_string and char == '}': + brace_count -= 1 + if brace_count == 0: + return i + 1 + + return len(lines) + + def extract_imports(self, content: str) -> list[str]: + imports = [] + lines = content.split('\n') + + import_block = False + import_lines = [] + + for line in lines: + if 'import (' in line: + import_block = True + continue + if import_block: + if line.strip() == ')': + break + match = re.search(r'"([^"]+)"', line) + if match: + import_lines.append(match.group(1)) + else: + match = re.search(r'import\s+"([^"]+)"', line) + if match: + import_lines.append(match.group(1)) + + imports.extend(import_lines) + return imports + + def extract_calls(self, content: str) -> list[str]: + return self._extract_function_calls(content)