Add parsers module
This commit is contained in:
219
src/parsers/javascript.py
Normal file
219
src/parsers/javascript.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from src.parsers.base import BaseParser, ParserResult, Entity, EntityType
|
||||||
|
|
||||||
|
|
||||||
|
class JavaScriptParser(BaseParser):
|
||||||
|
SUPPORTED_EXTENSIONS = [".js", ".jsx", ".mjs", ".cjs"]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._use_simple_parsing = True
|
||||||
|
|
||||||
|
def parse(self, file_path: Path, content: str) -> ParserResult:
|
||||||
|
result = ParserResult(file_path=file_path, language="javascript")
|
||||||
|
try:
|
||||||
|
result.entities = self.extract_entities(content, file_path)
|
||||||
|
result.imports = self.extract_imports(content)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
result.errors.append(f"Parse error: {str(e)}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
def extract_entities(self, content: str, file_path: Path) -> list[Entity]:
|
||||||
|
entities = []
|
||||||
|
entities.extend(self._extract_functions(content, file_path))
|
||||||
|
entities.extend(self._extract_classes(content, file_path))
|
||||||
|
return entities
|
||||||
|
|
||||||
|
def _extract_functions(self, content: str, file_path: Path) -> list[Entity]:
|
||||||
|
functions = []
|
||||||
|
lines = content.split('\n')
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
match = re.match(r'function\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?', line)
|
||||||
|
if match:
|
||||||
|
func_name = match.group(1)
|
||||||
|
params = match.group(2)
|
||||||
|
|
||||||
|
start_line = i + 1
|
||||||
|
end_line = self._find_braces_end(lines, i)
|
||||||
|
|
||||||
|
code_lines = lines[i:end_line]
|
||||||
|
code = '\n'.join(code_lines)
|
||||||
|
|
||||||
|
entity = Entity(
|
||||||
|
name=func_name,
|
||||||
|
entity_type=EntityType.FUNCTION,
|
||||||
|
file_path=file_path,
|
||||||
|
start_line=start_line,
|
||||||
|
end_line=end_line,
|
||||||
|
code=code,
|
||||||
|
attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]},
|
||||||
|
calls=self._extract_function_calls(code),
|
||||||
|
)
|
||||||
|
functions.append(entity)
|
||||||
|
|
||||||
|
arrow_match = re.match(r'(?:const|let|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*\(([^)]*)\)\s*=>', line)
|
||||||
|
if arrow_match:
|
||||||
|
func_name = arrow_match.group(1)
|
||||||
|
params = arrow_match.group(2)
|
||||||
|
|
||||||
|
start_line = i + 1
|
||||||
|
end_line = self._find_expression_end(lines, i)
|
||||||
|
|
||||||
|
code_lines = lines[i:end_line]
|
||||||
|
code = '\n'.join(code_lines)
|
||||||
|
|
||||||
|
entity = Entity(
|
||||||
|
name=func_name,
|
||||||
|
entity_type=EntityType.FUNCTION,
|
||||||
|
file_path=file_path,
|
||||||
|
start_line=start_line,
|
||||||
|
end_line=end_line,
|
||||||
|
code=code,
|
||||||
|
attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]},
|
||||||
|
calls=self._extract_function_calls(code),
|
||||||
|
)
|
||||||
|
functions.append(entity)
|
||||||
|
|
||||||
|
return functions
|
||||||
|
|
||||||
|
def _extract_classes(self, content: str, file_path: Path) -> list[Entity]:
|
||||||
|
classes = []
|
||||||
|
lines = content.split('\n')
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
match = re.match(r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(\{|$)', line)
|
||||||
|
if match:
|
||||||
|
class_name = match.group(1)
|
||||||
|
|
||||||
|
start_line = i + 1
|
||||||
|
end_line = self._find_braces_end(lines, i)
|
||||||
|
|
||||||
|
code_lines = lines[i:end_line]
|
||||||
|
code = '\n'.join(code_lines)
|
||||||
|
|
||||||
|
class_content = '\n'.join(lines[i+1:end_line])
|
||||||
|
methods = self._extract_methods(class_content, file_path, start_line)
|
||||||
|
|
||||||
|
entity = Entity(
|
||||||
|
name=class_name,
|
||||||
|
entity_type=EntityType.CLASS,
|
||||||
|
file_path=file_path,
|
||||||
|
start_line=start_line,
|
||||||
|
end_line=end_line,
|
||||||
|
code=code,
|
||||||
|
children=methods,
|
||||||
|
)
|
||||||
|
classes.append(entity)
|
||||||
|
|
||||||
|
return classes
|
||||||
|
|
||||||
|
def _extract_methods(self, content: str, file_path: Path, base_line: int) -> list[Entity]:
|
||||||
|
methods = []
|
||||||
|
lines = content.split('\n')
|
||||||
|
method_patterns = [
|
||||||
|
r'(\s*)async\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?',
|
||||||
|
r'(\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*\{?',
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
for pattern in method_patterns:
|
||||||
|
match = re.match(pattern, line)
|
||||||
|
if match:
|
||||||
|
if len(match.groups()) == 3:
|
||||||
|
_, method_name, params = match.groups()
|
||||||
|
else:
|
||||||
|
method_name, params = match.group(2), match.group(3)
|
||||||
|
|
||||||
|
if method_name in ['if', 'while', 'for', 'switch', 'try', 'catch']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
start_line = base_line + i
|
||||||
|
end_line = base_line + self._find_braces_end(lines, i)
|
||||||
|
|
||||||
|
code_lines = lines[i:end_line]
|
||||||
|
code = '\n'.join(code_lines)
|
||||||
|
|
||||||
|
entity = Entity(
|
||||||
|
name=method_name,
|
||||||
|
entity_type=EntityType.METHOD,
|
||||||
|
file_path=file_path,
|
||||||
|
start_line=start_line,
|
||||||
|
end_line=end_line,
|
||||||
|
code=code,
|
||||||
|
attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]},
|
||||||
|
calls=self._extract_function_calls(code),
|
||||||
|
)
|
||||||
|
methods.append(entity)
|
||||||
|
break
|
||||||
|
|
||||||
|
return methods
|
||||||
|
|
||||||
|
def _extract_function_calls(self, code: str) -> list[str]:
|
||||||
|
calls = []
|
||||||
|
patterns = [
|
||||||
|
r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)\s*;',
|
||||||
|
r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^)]*\)\s*$',
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
for match in re.finditer(pattern, code):
|
||||||
|
func_name = match.group(1)
|
||||||
|
if func_name not in ['if', 'while', 'for', 'return', 'throw', 'new', 'console']:
|
||||||
|
calls.append(func_name)
|
||||||
|
return list(set(calls))
|
||||||
|
|
||||||
|
def _find_braces_end(self, lines: list[str], start_index: int) -> int:
|
||||||
|
brace_count = 0
|
||||||
|
in_string = False
|
||||||
|
string_char = None
|
||||||
|
|
||||||
|
for i, line in enumerate(lines[start_index:], start_index):
|
||||||
|
for j, char in enumerate(line):
|
||||||
|
if char in ['"', "'"] and (j == 0 or line[j-1] != '\\'):
|
||||||
|
if not in_string:
|
||||||
|
in_string = True
|
||||||
|
string_char = char
|
||||||
|
elif char == string_char:
|
||||||
|
in_string = False
|
||||||
|
string_char = None
|
||||||
|
elif not in_string and char == '{':
|
||||||
|
brace_count += 1
|
||||||
|
elif not in_string and char == '}':
|
||||||
|
brace_count -= 1
|
||||||
|
if brace_count == 0:
|
||||||
|
return i + 1
|
||||||
|
|
||||||
|
return len(lines)
|
||||||
|
|
||||||
|
def _find_expression_end(self, lines: list[str], start_index: int) -> int:
|
||||||
|
for i in range(start_index + 1, len(lines)):
|
||||||
|
line = lines[i].strip()
|
||||||
|
if line and not line.startswith('//') and not line.startswith('*'):
|
||||||
|
return i
|
||||||
|
return len(lines)
|
||||||
|
|
||||||
|
def extract_imports(self, content: str) -> list[str]:
|
||||||
|
imports = []
|
||||||
|
lines = content.split('\n')
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
import_match = re.match(r"import\s+(?:\{[^}]*\}|\*)\s+from\s+['\"]([^'\"]+)['\"жа", line)
|
||||||
|
if import_match:
|
||||||
|
imports.append(import_match.group(1))
|
||||||
|
|
||||||
|
require_match = re.match(r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*require\(['\"]([^'\"]+)['\"]\)", line)
|
||||||
|
if require_match:
|
||||||
|
imports.append(require_match.group(2))
|
||||||
|
|
||||||
|
return imports
|
||||||
|
|
||||||
|
def extract_calls(self, content: str) -> list[str]:
|
||||||
|
return self._extract_function_calls(content)
|
||||||
Reference in New Issue
Block a user