Add validators, TUI, and file watcher modules
Some checks failed
CI / test (push) Has been cancelled
Some checks failed
CI / test (push) Has been cancelled
This commit is contained in:
243
src/validators/validator.py
Normal file
243
src/validators/validator.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""Validator module for data format validation."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, List, Optional
|
||||
import json
|
||||
import yaml
|
||||
import tomlkit
|
||||
import pandas as pd
|
||||
from io import StringIO
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationError:
|
||||
"""Represents a validation error with location information."""
|
||||
line: Optional[int] = None
|
||||
column: Optional[int] = None
|
||||
message: str = None
|
||||
error_type: str = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of a validation operation."""
|
||||
valid: bool
|
||||
format_type: str
|
||||
errors: List[ValidationError] = field(default_factory=list)
|
||||
data: Any = None
|
||||
|
||||
|
||||
class Validator(ABC):
|
||||
"""Abstract base class for format validators."""
|
||||
|
||||
format_name: str = None
|
||||
|
||||
@abstractmethod
|
||||
def validate(self, content: str) -> ValidationResult:
|
||||
"""Validate content syntax.
|
||||
|
||||
Args:
|
||||
content: String content to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate_file(self, filepath: str) -> ValidationResult:
|
||||
"""Validate file content.
|
||||
|
||||
Args:
|
||||
filepath: Path to file
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
return self.validate(content)
|
||||
|
||||
|
||||
class JSONValidator(Validator):
|
||||
"""Validator for JSON format."""
|
||||
|
||||
format_name = 'json'
|
||||
|
||||
def validate(self, content: str) -> ValidationResult:
|
||||
"""Validate JSON syntax.
|
||||
|
||||
Args:
|
||||
content: JSON string to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
errors = []
|
||||
try:
|
||||
data = json.loads(content)
|
||||
return ValidationResult(valid=True, format_type='json', data=data)
|
||||
except json.JSONDecodeError as e:
|
||||
errors.append(ValidationError(
|
||||
line=e.lineno if hasattr(e, 'lineno') else None,
|
||||
column=e.colno if hasattr(e, 'colno') else None,
|
||||
message=str(e),
|
||||
error_type='syntax_error'
|
||||
))
|
||||
return ValidationResult(valid=False, format_type='json', errors=errors)
|
||||
|
||||
|
||||
class YAMLValidator(Validator):
|
||||
"""Validator for YAML format."""
|
||||
|
||||
format_name = 'yaml'
|
||||
|
||||
def validate(self, content: str) -> ValidationResult:
|
||||
"""Validate YAML syntax.
|
||||
|
||||
Args:
|
||||
content: YAML string to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
errors = []
|
||||
try:
|
||||
data = yaml.safe_load(content)
|
||||
return ValidationResult(valid=True, format_type='yaml', data=data)
|
||||
except yaml.YAMLError as e:
|
||||
if hasattr(e, 'problem'):
|
||||
errors.append(ValidationError(
|
||||
line=getattr(e, 'line', None),
|
||||
column=getattr(e, 'column', None),
|
||||
message=str(e),
|
||||
error_type='syntax_error'
|
||||
))
|
||||
else:
|
||||
errors.append(ValidationError(
|
||||
message=str(e),
|
||||
error_type='syntax_error'
|
||||
))
|
||||
return ValidationResult(valid=False, format_type='yaml', errors=errors)
|
||||
|
||||
|
||||
class TOMLValidator(Validator):
|
||||
"""Validator for TOML format."""
|
||||
|
||||
format_name = 'toml'
|
||||
|
||||
def validate(self, content: str) -> ValidationResult:
|
||||
"""Validate TOML syntax.
|
||||
|
||||
Args:
|
||||
content: TOML string to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
errors = []
|
||||
try:
|
||||
data = tomlkit.parse(content)
|
||||
return ValidationResult(valid=True, format_type='toml', data=data)
|
||||
except tomlkit.exceptions.ParseError as e:
|
||||
errors.append(ValidationError(
|
||||
line=getattr(e, 'line', None),
|
||||
column=getattr(e, 'col', None),
|
||||
message=str(e),
|
||||
error_type='syntax_error'
|
||||
))
|
||||
return ValidationResult(valid=False, format_type='toml', errors=errors)
|
||||
|
||||
|
||||
class CSVValidator(Validator):
|
||||
"""Validator for CSV format."""
|
||||
|
||||
format_name = 'csv'
|
||||
|
||||
def validate(self, content: str) -> ValidationResult:
|
||||
"""Validate CSV syntax.
|
||||
|
||||
Args:
|
||||
content: CSV string to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
errors = []
|
||||
try:
|
||||
df = pd.read_csv(StringIO(content))
|
||||
data = df.to_dict(orient='records')
|
||||
return ValidationResult(valid=True, format_type='csv', data=data)
|
||||
except pd.errors.EmptyDataError:
|
||||
errors.append(ValidationError(
|
||||
message="CSV file is empty",
|
||||
error_type='empty_data'
|
||||
))
|
||||
return ValidationResult(valid=False, format_type='csv', errors=errors)
|
||||
except pd.errors.ParserError as e:
|
||||
errors.append(ValidationError(
|
||||
message=str(e),
|
||||
error_type='parse_error'
|
||||
))
|
||||
return ValidationResult(valid=False, format_type='csv', errors=errors)
|
||||
|
||||
|
||||
def get_validator(format_name: str) -> Validator:
|
||||
"""Get validator instance for specified format.
|
||||
|
||||
Args:
|
||||
format_name: Format identifier (json, yaml, toml, csv)
|
||||
|
||||
Returns:
|
||||
Validator instance for the format
|
||||
|
||||
Raises:
|
||||
ValueError: If format is not supported
|
||||
"""
|
||||
format_map = {
|
||||
'json': JSONValidator,
|
||||
'yaml': YAMLValidator,
|
||||
'toml': TOMLValidator,
|
||||
'csv': CSVValidator,
|
||||
}
|
||||
|
||||
format_lower = format_name.lower()
|
||||
if format_lower not in format_map:
|
||||
raise ValueError(f"Unsupported format: {format_name}")
|
||||
|
||||
return format_map[format_lower]()
|
||||
|
||||
|
||||
def validate_data(content: str, format_name: str) -> ValidationResult:
|
||||
"""Validate data content in specified format.
|
||||
|
||||
Args:
|
||||
content: String content to validate
|
||||
format_name: Format of the content
|
||||
|
||||
Returns:
|
||||
ValidationResult with validation status and errors
|
||||
"""
|
||||
validator = get_validator(format_name)
|
||||
return validator.validate(content)
|
||||
|
||||
|
||||
def detect_format(filepath: str) -> Optional[str]:
|
||||
"""Detect format from file extension.
|
||||
|
||||
Args:
|
||||
filepath: Path to file
|
||||
|
||||
Returns:
|
||||
Detected format name or None
|
||||
"""
|
||||
import os
|
||||
ext = os.path.splitext(filepath)[1].lower()
|
||||
format_map = {
|
||||
'.json': 'json',
|
||||
'.yaml': 'yaml',
|
||||
'.yml': 'yaml',
|
||||
'.toml': 'toml',
|
||||
'.csv': 'csv',
|
||||
}
|
||||
return format_map.get(ext)
|
||||
Reference in New Issue
Block a user