Add parsers module
This commit is contained in:
170
src/parsers/python.py
Normal file
170
src/parsers/python.py
Normal file
@@ -0,0 +1,170 @@
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
from src.parsers.base import BaseParser, ParserResult, Entity, EntityType
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
SUPPORTED_EXTENSIONS = [".py", ".pyi"]
|
||||
|
||||
def __init__(self):
|
||||
self._use_simple_parsing = True
|
||||
|
||||
def parse(self, file_path: Path, content: str) -> ParserResult:
|
||||
result = ParserResult(file_path=file_path, language="python")
|
||||
try:
|
||||
result.entities = self.extract_entities(content, file_path)
|
||||
result.imports = self.extract_imports(content)
|
||||
return result
|
||||
except Exception as e:
|
||||
result.errors.append(f"Parse error: {str(e)}")
|
||||
return result
|
||||
|
||||
def extract_entities(self, content: str, file_path: Path) -> list[Entity]:
|
||||
entities = []
|
||||
entities.extend(self._extract_functions(content, file_path))
|
||||
entities.extend(self._extract_classes(content, file_path))
|
||||
return entities
|
||||
|
||||
def _extract_functions(self, content: str, file_path: Path) -> list[Entity]:
|
||||
functions = []
|
||||
lines = content.split('\n')
|
||||
pattern = r'^(\s*)def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*:'
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
match = re.match(pattern, line)
|
||||
if match:
|
||||
indent = match.group(1)
|
||||
func_name = match.group(2)
|
||||
params = match.group(3)
|
||||
|
||||
start_line = i + 1
|
||||
end_line = self._find_block_end(lines, i, indent)
|
||||
|
||||
code_lines = lines[i:end_line]
|
||||
code = '\n'.join(code_lines)
|
||||
|
||||
entity = Entity(
|
||||
name=func_name,
|
||||
entity_type=EntityType.FUNCTION,
|
||||
file_path=file_path,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
code=code,
|
||||
attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]},
|
||||
calls=self._extract_function_calls(code),
|
||||
)
|
||||
functions.append(entity)
|
||||
|
||||
return functions
|
||||
|
||||
def _extract_classes(self, content: str, file_path: Path) -> list[Entity]:
|
||||
classes = []
|
||||
lines = content.split('\n')
|
||||
pattern = r'^(\s*)class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(\([^)]*\))?\s*:'
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
match = re.match(pattern, line)
|
||||
if match:
|
||||
indent = match.group(1)
|
||||
class_name = match.group(2)
|
||||
|
||||
start_line = i + 1
|
||||
end_line = self._find_block_end(lines, i, indent)
|
||||
|
||||
code_lines = lines[i:end_line]
|
||||
code = '\n'.join(code_lines)
|
||||
|
||||
class_content = '\n'.join(lines[i+1:end_line])
|
||||
methods = self._extract_methods_from_content(class_content, file_path, indent + ' ')
|
||||
|
||||
entity = Entity(
|
||||
name=class_name,
|
||||
entity_type=EntityType.CLASS,
|
||||
file_path=file_path,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
code=code,
|
||||
children=methods,
|
||||
)
|
||||
classes.append(entity)
|
||||
|
||||
return classes
|
||||
|
||||
def _extract_methods_from_content(self, content: str, file_path: Path, base_indent: str) -> list[Entity]:
|
||||
methods = []
|
||||
lines = content.split('\n')
|
||||
pattern = rf'^{re.escape(base_indent)}(\s*)def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*:'
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
match = re.match(pattern, line)
|
||||
if match:
|
||||
method_indent = match.group(1)
|
||||
method_name = match.group(2)
|
||||
params = match.group(3)
|
||||
|
||||
start_line_offset = i
|
||||
end_offset = self._find_block_end(lines, i, base_indent + method_indent)
|
||||
|
||||
full_start_line = start_line_offset + 1
|
||||
full_end_line = end_offset + 1
|
||||
|
||||
code_lines = lines[i:end_offset]
|
||||
code = '\n'.join(code_lines)
|
||||
|
||||
entity = Entity(
|
||||
name=method_name,
|
||||
entity_type=EntityType.METHOD,
|
||||
file_path=file_path,
|
||||
start_line=full_start_line,
|
||||
end_line=full_end_line,
|
||||
code=code,
|
||||
attributes={"parameters": [p.strip() for p in params.split(',') if p.strip()]},
|
||||
calls=self._extract_function_calls(code),
|
||||
)
|
||||
methods.append(entity)
|
||||
|
||||
return methods
|
||||
|
||||
def _extract_function_calls(self, code: str) -> list[str]:
|
||||
calls = []
|
||||
pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\('
|
||||
for match in re.finditer(pattern, code):
|
||||
func_name = match.group(1)
|
||||
if func_name not in ['if', 'while', 'for', 'with', 'assert', 'return', 'print', 'len', 'str', 'int', 'list', 'dict', 'range', 'open', 'super']:
|
||||
calls.append(func_name)
|
||||
return list(set(calls))
|
||||
|
||||
def _find_block_end(self, lines: list[str], start_index: int, indent: str) -> int:
|
||||
for i in range(start_index + 1, len(lines)):
|
||||
line = lines[i]
|
||||
if line.strip() and not line.startswith(indent) and not line.strip().startswith('#'):
|
||||
if line.strip():
|
||||
return i
|
||||
if line.strip().startswith(('elif ', 'else ', 'except ', 'finally ')):
|
||||
return i
|
||||
return len(lines)
|
||||
|
||||
def extract_imports(self, content: str) -> list[str]:
|
||||
imports = []
|
||||
lines = content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith('import '):
|
||||
import_part = line[7:].strip()
|
||||
for imp in import_part.split(','):
|
||||
imp = imp.strip()
|
||||
if ' as ' in imp:
|
||||
imp = imp.split(' as ')[0].strip()
|
||||
imports.append(imp)
|
||||
elif line.startswith('from '):
|
||||
match = re.match(r'from\s+([\w.]+)\s+import', line)
|
||||
if match:
|
||||
module = match.group(1)
|
||||
imports.append(module)
|
||||
|
||||
return imports
|
||||
|
||||
def extract_calls(self, content: str) -> list[str]:
|
||||
return self._extract_function_calls(content)
|
||||
Reference in New Issue
Block a user