From 27cc5818e4cc19383b002acda848377517ebadb8 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sun, 1 Feb 2026 19:02:45 +0000 Subject: [PATCH] Add validators, TUI, and file watcher modules --- src/validators/validator.py | 243 ++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 src/validators/validator.py diff --git a/src/validators/validator.py b/src/validators/validator.py new file mode 100644 index 0000000..013340a --- /dev/null +++ b/src/validators/validator.py @@ -0,0 +1,243 @@ +"""Validator module for data format validation.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, List, Optional +import json +import yaml +import tomlkit +import pandas as pd +from io import StringIO + + +@dataclass +class ValidationError: + """Represents a validation error with location information.""" + line: Optional[int] = None + column: Optional[int] = None + message: str = None + error_type: str = None + + +@dataclass +class ValidationResult: + """Result of a validation operation.""" + valid: bool + format_type: str + errors: List[ValidationError] = field(default_factory=list) + data: Any = None + + +class Validator(ABC): + """Abstract base class for format validators.""" + + format_name: str = None + + @abstractmethod + def validate(self, content: str) -> ValidationResult: + """Validate content syntax. + + Args: + content: String content to validate + + Returns: + ValidationResult with validation status and errors + """ + pass + + def validate_file(self, filepath: str) -> ValidationResult: + """Validate file content. + + Args: + filepath: Path to file + + Returns: + ValidationResult with validation status and errors + """ + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + return self.validate(content) + + +class JSONValidator(Validator): + """Validator for JSON format.""" + + format_name = 'json' + + def validate(self, content: str) -> ValidationResult: + """Validate JSON syntax. + + Args: + content: JSON string to validate + + Returns: + ValidationResult with validation status and errors + """ + errors = [] + try: + data = json.loads(content) + return ValidationResult(valid=True, format_type='json', data=data) + except json.JSONDecodeError as e: + errors.append(ValidationError( + line=e.lineno if hasattr(e, 'lineno') else None, + column=e.colno if hasattr(e, 'colno') else None, + message=str(e), + error_type='syntax_error' + )) + return ValidationResult(valid=False, format_type='json', errors=errors) + + +class YAMLValidator(Validator): + """Validator for YAML format.""" + + format_name = 'yaml' + + def validate(self, content: str) -> ValidationResult: + """Validate YAML syntax. + + Args: + content: YAML string to validate + + Returns: + ValidationResult with validation status and errors + """ + errors = [] + try: + data = yaml.safe_load(content) + return ValidationResult(valid=True, format_type='yaml', data=data) + except yaml.YAMLError as e: + if hasattr(e, 'problem'): + errors.append(ValidationError( + line=getattr(e, 'line', None), + column=getattr(e, 'column', None), + message=str(e), + error_type='syntax_error' + )) + else: + errors.append(ValidationError( + message=str(e), + error_type='syntax_error' + )) + return ValidationResult(valid=False, format_type='yaml', errors=errors) + + +class TOMLValidator(Validator): + """Validator for TOML format.""" + + format_name = 'toml' + + def validate(self, content: str) -> ValidationResult: + """Validate TOML syntax. + + Args: + content: TOML string to validate + + Returns: + ValidationResult with validation status and errors + """ + errors = [] + try: + data = tomlkit.parse(content) + return ValidationResult(valid=True, format_type='toml', data=data) + except tomlkit.exceptions.ParseError as e: + errors.append(ValidationError( + line=getattr(e, 'line', None), + column=getattr(e, 'col', None), + message=str(e), + error_type='syntax_error' + )) + return ValidationResult(valid=False, format_type='toml', errors=errors) + + +class CSVValidator(Validator): + """Validator for CSV format.""" + + format_name = 'csv' + + def validate(self, content: str) -> ValidationResult: + """Validate CSV syntax. + + Args: + content: CSV string to validate + + Returns: + ValidationResult with validation status and errors + """ + errors = [] + try: + df = pd.read_csv(StringIO(content)) + data = df.to_dict(orient='records') + return ValidationResult(valid=True, format_type='csv', data=data) + except pd.errors.EmptyDataError: + errors.append(ValidationError( + message="CSV file is empty", + error_type='empty_data' + )) + return ValidationResult(valid=False, format_type='csv', errors=errors) + except pd.errors.ParserError as e: + errors.append(ValidationError( + message=str(e), + error_type='parse_error' + )) + return ValidationResult(valid=False, format_type='csv', errors=errors) + + +def get_validator(format_name: str) -> Validator: + """Get validator instance for specified format. + + Args: + format_name: Format identifier (json, yaml, toml, csv) + + Returns: + Validator instance for the format + + Raises: + ValueError: If format is not supported + """ + format_map = { + 'json': JSONValidator, + 'yaml': YAMLValidator, + 'toml': TOMLValidator, + 'csv': CSVValidator, + } + + format_lower = format_name.lower() + if format_lower not in format_map: + raise ValueError(f"Unsupported format: {format_name}") + + return format_map[format_lower]() + + +def validate_data(content: str, format_name: str) -> ValidationResult: + """Validate data content in specified format. + + Args: + content: String content to validate + format_name: Format of the content + + Returns: + ValidationResult with validation status and errors + """ + validator = get_validator(format_name) + return validator.validate(content) + + +def detect_format(filepath: str) -> Optional[str]: + """Detect format from file extension. + + Args: + filepath: Path to file + + Returns: + Detected format name or None + """ + import os + ext = os.path.splitext(filepath)[1].lower() + format_map = { + '.json': 'json', + '.yaml': 'yaml', + '.yml': 'yaml', + '.toml': 'toml', + '.csv': 'csv', + } + return format_map.get(ext)