From c289a898eb0363c8988f6bbaac859a37bd413c39 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Mon, 2 Feb 2026 02:38:28 +0000 Subject: [PATCH] Add parsers module --- src/parsers/javascript.py | 219 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 src/parsers/javascript.py diff --git a/src/parsers/javascript.py b/src/parsers/javascript.py new file mode 100644 index 0000000..effd3d7 --- /dev/null +++ b/src/parsers/javascript.py @@ -0,0 +1,219 @@ +from pathlib import Path +import re + +from src.parsers.base import BaseParser, ParserResult, Entity, EntityType + + +class JavaScriptParser(BaseParser): + SUPPORTED_EXTENSIONS = [".js", ".jsx", ".mjs", ".cjs"] + + def __init__(self): + self._use_simple_parsing = True + + def parse(self, file_path: Path, content: str) -> ParserResult: + result = ParserResult(file_path=file_path, language="javascript") + try: + result.entities = self.extract_entities(content, file_path) + result.imports = self.extract_imports(content) + return result + except Exception as e: + result.errors.append(f"Parse error: {str(e)}") + return result + + def extract_entities(self, content: str, file_path: Path) -> list[Entity]: + entities = [] + entities.extend(self._extract_functions(content, file_path)) + entities.extend(self._extract_classes(content, file_path)) + return entities + + def _extract_functions(self, content: str, file_path: Path) -> list[Entity]: + functions = [] + lines = content.split('\n') + + for i, line in enumerate(lines): + line = line.strip() + + match = re.match(r'function\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?', line) + if match: + func_name = match.group(1) + params = match.group(2) + + start_line = i + 1 + end_line = self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=func_name, + entity_type=EntityType.FUNCTION, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]}, + calls=self._extract_function_calls(code), + ) + functions.append(entity) + + arrow_match = re.match(r'(?:const|let|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*\(([^)]*)\)\s*=>', line) + if arrow_match: + func_name = arrow_match.group(1) + params = arrow_match.group(2) + + start_line = i + 1 + end_line = self._find_expression_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=func_name, + entity_type=EntityType.FUNCTION, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]}, + calls=self._extract_function_calls(code), + ) + functions.append(entity) + + return functions + + def _extract_classes(self, content: str, file_path: Path) -> list[Entity]: + classes = [] + lines = content.split('\n') + + for i, line in enumerate(lines): + line = line.strip() + + match = re.match(r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(\{|$)', line) + if match: + class_name = match.group(1) + + start_line = i + 1 + end_line = self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + class_content = '\n'.join(lines[i+1:end_line]) + methods = self._extract_methods(class_content, file_path, start_line) + + entity = Entity( + name=class_name, + entity_type=EntityType.CLASS, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + children=methods, + ) + classes.append(entity) + + return classes + + def _extract_methods(self, content: str, file_path: Path, base_line: int) -> list[Entity]: + methods = [] + lines = content.split('\n') + method_patterns = [ + r'(\s*)async\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?', + r'(\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?', + ] + + for i, line in enumerate(lines): + for pattern in method_patterns: + match = re.match(pattern, line) + if match: + if len(match.groups()) == 3: + _, method_name, params = match.groups() + else: + method_name, params = match.group(2), match.group(3) + + if method_name in ['if', 'while', 'for', 'switch', 'try', 'catch']: + continue + + start_line = base_line + i + end_line = base_line + self._find_braces_end(lines, i) + + code_lines = lines[i:end_line] + code = '\n'.join(code_lines) + + entity = Entity( + name=method_name, + entity_type=EntityType.METHOD, + file_path=file_path, + start_line=start_line, + end_line=end_line, + code=code, + attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]}, + calls=self._extract_function_calls(code), + ) + methods.append(entity) + break + + return methods + + def _extract_function_calls(self, code: str) -> list[str]: + calls = [] + patterns = [ + r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)\s*;', + r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)\s*$', + ] + for pattern in patterns: + for match in re.finditer(pattern, code): + func_name = match.group(1) + if func_name not in ['if', 'while', 'for', 'return', 'throw', 'new', 'console']: + calls.append(func_name) + return list(set(calls)) + + def _find_braces_end(self, lines: list[str], start_index: int) -> int: + brace_count = 0 + in_string = False + string_char = None + + for i, line in enumerate(lines[start_index:], start_index): + for j, char in enumerate(line): + if char in ['"', "'"] and (j == 0 or line[j-1] != '\\'): + if not in_string: + in_string = True + string_char = char + elif char == string_char: + in_string = False + string_char = None + elif not in_string and char == '{': + brace_count += 1 + elif not in_string and char == '}': + brace_count -= 1 + if brace_count == 0: + return i + 1 + + return len(lines) + + def _find_expression_end(self, lines: list[str], start_index: int) -> int: + for i in range(start_index + 1, len(lines)): + line = lines[i].strip() + if line and not line.startswith('//') and not line.startswith('*'): + return i + return len(lines) + + def extract_imports(self, content: str) -> list[str]: + imports = [] + lines = content.split('\n') + + for line in lines: + line = line.strip() + + import_match = re.match(r"import\s+(?:\{[^}]*\}|\*)\s+from\s+['\"]([^'\"]+)['\"жа", line) + if import_match: + imports.append(import_match.group(1)) + + require_match = re.match(r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*require\(['\"]([^'\"]+)['\"]\)", line) + if require_match: + imports.append(require_match.group(2)) + + return imports + + def extract_calls(self, content: str) -> list[str]: + return self._extract_function_calls(content)