Initial upload: CLI Explain Fix project with CI/CD workflow
This commit is contained in:
248
src/cli_explain_fix/parser.py
Normal file
248
src/cli_explain_fix/parser.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""Error pattern parsing module."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedError:
|
||||
"""Container for parsed error information."""
|
||||
error_type: str
|
||||
message: str
|
||||
language: str
|
||||
file_name: Optional[str] = None
|
||||
line_number: Optional[int] = None
|
||||
column_number: Optional[int] = None
|
||||
stack_frames: List[Dict[str, Any]] = field(default_factory=list)
|
||||
raw_input: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON output."""
|
||||
return {
|
||||
'error_type': self.error_type,
|
||||
'message': self.message,
|
||||
'language': self.language,
|
||||
'file_name': self.file_name,
|
||||
'line_number': self.line_number,
|
||||
'column_number': self.column_number,
|
||||
'stack_frames': self.stack_frames,
|
||||
}
|
||||
|
||||
|
||||
class ErrorParser:
|
||||
"""Parser for various error formats and languages."""
|
||||
|
||||
PYTHON_TRACEBACK_PATTERN = re.compile(
|
||||
r'^(?:Traceback \(most recent call last\):\n)?'
|
||||
r'(?P<frame> File "([^"]+)", line (?P<line>\d+), in .+\n'
|
||||
r' .+\n)?'
|
||||
r'(?P<exception>[A-Za-z]+Error): (?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
PYTHON_SIMPLE_PATTERN = re.compile(
|
||||
r'^(?P<exception>[A-Za-z]+Error): (?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
JS_ERROR_PATTERN = re.compile(
|
||||
r'^(?:TypeError|ReferenceError|SyntaxError|RangeError|Error): (?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
GO_PANIC_PATTERN = re.compile(
|
||||
r'^panic: (?P<message>.+)$\n'
|
||||
r'(?:\n.*goroutine \d+ \[.*\]:\n)?'
|
||||
r'(?P<frame>.*\n)*',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
RUST_PANIC_PATTERN = re.compile(
|
||||
r'^thread .* panicked at .*"(?P<message>.+)"',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
GENERIC_CLI_PATTERN = re.compile(
|
||||
r'^(?:error|Error|ERROR)(?::?)\s*(?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
JSON_PARSE_PATTERN = re.compile(
|
||||
r'^(?:JSONDecodeError|Expecting value|syntax error): (?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
YAML_PARSE_PATTERN = re.compile(
|
||||
r'^(?:YAMLError|ParserError|ScannerError): (?P<message>.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self.language_patterns = {
|
||||
'python': [
|
||||
(self.PYTHON_TRACEBACK_PATTERN, self._parse_python_traceback),
|
||||
(self.PYTHON_SIMPLE_PATTERN, self._parse_python_simple),
|
||||
],
|
||||
'javascript': [
|
||||
(self.JS_ERROR_PATTERN, self._parse_js_error),
|
||||
],
|
||||
'go': [
|
||||
(self.GO_PANIC_PATTERN, self._parse_go_panic),
|
||||
],
|
||||
'rust': [
|
||||
(self.RUST_PANIC_PATTERN, self._parse_rust_panic),
|
||||
],
|
||||
'json': [
|
||||
(self.JSON_PARSE_PATTERN, self._parse_json_error),
|
||||
],
|
||||
'yaml': [
|
||||
(self.YAML_PARSE_PATTERN, self._parse_yaml_error),
|
||||
],
|
||||
}
|
||||
|
||||
def detect_language(self, input_text: str) -> str:
|
||||
"""Detect the programming language from error format."""
|
||||
text = input_text.strip()
|
||||
|
||||
if 'Traceback (most recent call last):' in text or 'File "' in text:
|
||||
return 'python'
|
||||
if re.search(r'File ".+\.py", line \d+', text):
|
||||
return 'python'
|
||||
if re.search(r'panic:', text):
|
||||
return 'go'
|
||||
if re.search(r'thread .* panicked at', text):
|
||||
return 'rust'
|
||||
if re.search(r'goroutine \d+', text):
|
||||
return 'go'
|
||||
if re.search(r'(JSONDecodeError|Expecting value)', text):
|
||||
return 'json'
|
||||
if re.search(r'(YAMLError|ParserError|ScannerError)', text):
|
||||
return 'yaml'
|
||||
if re.search(r'^(TypeError|ReferenceError|SyntaxError|RangeError):', text, re.MULTILINE):
|
||||
return 'javascript'
|
||||
if re.search(r'[A-Za-z]+Error:', text):
|
||||
return 'python'
|
||||
if re.search(r'^error:|^Error:|^ERROR:', text, re.MULTILINE):
|
||||
return 'cli'
|
||||
|
||||
return 'unknown'
|
||||
|
||||
def parse(self, input_text: str, language: Optional[str] = None) -> ParsedError:
|
||||
"""Parse error text and extract structured information."""
|
||||
if language is None:
|
||||
language = self.detect_language(input_text)
|
||||
|
||||
detected_language = language
|
||||
|
||||
if language in self.language_patterns:
|
||||
for pattern, parser in self.language_patterns[language]:
|
||||
match = pattern.search(input_text)
|
||||
if match:
|
||||
return parser(match, input_text, detected_language)
|
||||
|
||||
if language == 'cli':
|
||||
match = self.GENERIC_CLI_PATTERN.search(input_text)
|
||||
if match:
|
||||
return ParsedError(
|
||||
error_type='GenericError',
|
||||
message=match.group('message').strip(),
|
||||
language='cli',
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
return ParsedError(
|
||||
error_type='UnknownError',
|
||||
message=input_text.strip() if input_text.strip() else 'Unknown error occurred',
|
||||
language=detected_language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_python_traceback(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse Python traceback format."""
|
||||
exception = match.group('exception')
|
||||
message = match.group('message')
|
||||
|
||||
stack_frames = []
|
||||
frame_match = re.findall(
|
||||
r' File "([^"]+)", line (\d+), in ([^\n]+)\n ([^\n]+)',
|
||||
input_text
|
||||
)
|
||||
for frame in frame_match:
|
||||
stack_frames.append({
|
||||
'file': frame[0],
|
||||
'line': int(frame[1]) if frame[1].isdigit() else None,
|
||||
'function': frame[2],
|
||||
'code': frame[3],
|
||||
})
|
||||
|
||||
file_name = None
|
||||
line_number = None
|
||||
if stack_frames:
|
||||
last_frame = stack_frames[-1]
|
||||
file_name = last_frame.get('file')
|
||||
line_number = last_frame.get('line')
|
||||
|
||||
return ParsedError(
|
||||
error_type=exception,
|
||||
message=message,
|
||||
language=language,
|
||||
file_name=file_name,
|
||||
line_number=line_number,
|
||||
stack_frames=stack_frames,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_python_simple(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse simple Python error format."""
|
||||
return ParsedError(
|
||||
error_type=match.group('exception'),
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_js_error(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse JavaScript error format."""
|
||||
return ParsedError(
|
||||
error_type=match.group(0).split(':')[0] if ':' in match.group(0) else 'Error',
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_go_panic(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse Go panic format."""
|
||||
return ParsedError(
|
||||
error_type='panic',
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_rust_panic(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse Rust panic format."""
|
||||
return ParsedError(
|
||||
error_type='panic',
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_json_error(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse JSON parse error format."""
|
||||
return ParsedError(
|
||||
error_type='JSONParseError',
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
|
||||
def _parse_yaml_error(self, match: re.Match, input_text: str, language: str) -> ParsedError:
|
||||
"""Parse YAML parse error format."""
|
||||
return ParsedError(
|
||||
error_type='YAMLParseError',
|
||||
message=match.group('message'),
|
||||
language=language,
|
||||
raw_input=input_text,
|
||||
)
|
||||
Reference in New Issue
Block a user