fix: resolve CI linting and type errors
Some checks failed
CI / test (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / type-check (push) Has been cancelled

This commit is contained in:
2026-02-04 12:58:32 +00:00
parent 7125b6933d
commit b4076327d8

View File

@@ -1,33 +1,139 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
"""Test results and formatting."""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
from .ab_test import ABTestSummary
from .metrics import MetricsSummary
@dataclass
class TestResult:
"""Result of a single test."""
test_id: str
prompt_name: str
provider: str
success: bool
response: str
metrics: "TestMetrics"
error: Optional[str] = None
metrics: Dict[str, Any] = field(default_factory=dict)
validation_results: Dict[str, bool] = field(default_factory=dict)
error_message: Optional[str] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
@dataclass
class ComparisonResult:
prompt_name: str
total_runs: int
successful_runs: int
failed_runs: int
avg_latency_ms: float
min_latency_ms: float
max_latency_ms: float
avg_tokens: float
avg_cost: float
success_rate: float
all_metrics: List["TestMetrics"]
class TestSessionResults:
"""Collection of test results."""
@dataclass
class TestReport:
test_id: str
timestamp: str
results: Dict[str, ComparisonResult]
summary: Dict[str, float]
name: str
results: List[TestResult] = field(default_factory=list)
metrics: MetricsSummary = field(default_factory=lambda: MetricsSummary(name=""))
ab_comparisons: Dict[str, ABTestSummary] = field(default_factory=dict)
start_time: datetime = field(default_factory=datetime.utcnow)
end_time: Optional[datetime] = None
__test__ = False
@property
def duration_seconds(self) -> float:
"""Get test duration in seconds."""
if self.end_time is None:
return 0.0
return (self.end_time - self.start_time).total_seconds()
@property
def success_count(self) -> int:
"""Count of successful tests."""
return sum(1 for r in self.results if r.success)
@property
def failure_count(self) -> int:
"""Count of failed tests."""
return sum(1 for r in self.results if not r.success)
@property
def pass_rate(self) -> float:
"""Calculate pass rate."""
if not self.results:
return 0.0
return self.success_count / len(self.results)
class ResultFormatter:
"""Format test results for display."""
@staticmethod
def format_text(results: TestSessionResults) -> str:
"""Format results as plain text."""
lines = [
f"Test Results: {results.name}",
f"Duration: {results.duration_seconds:.2f}s",
f"Passed: {results.success_count}/{len(results.results)} ({results.pass_rate:.1%})",
"",
]
for result in results.results:
status = "PASS" if result.success else "FAIL"
lines.append(f"[{status}] {result.prompt_name}")
if result.error_message:
lines.append(f" Error: {result.error_message}")
if result.metrics:
metrics_str = ", ".join(f"{k}: {v}" for k, v in result.metrics.items())
lines.append(f" Metrics: {metrics_str}")
return "\n".join(lines)
@staticmethod
def format_json(results: TestSessionResults) -> str:
"""Format results as JSON."""
import json
from datetime import datetime
def serialize(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
data = {
"test_id": results.test_id,
"name": results.name,
"duration_seconds": results.duration_seconds,
"summary": {
"total": len(results.results),
"passed": results.success_count,
"failed": results.failure_count,
"pass_rate": results.pass_rate,
},
"results": [
{
"test_id": r.test_id,
"prompt_name": r.prompt_name,
"provider": r.provider,
"success": r.success,
"response": r.response[:500] if r.response else "",
"metrics": r.metrics,
"validation_results": r.validation_results,
"error_message": r.error_message,
"timestamp": r.timestamp.isoformat(),
}
for r in results.results
],
}
return json.dumps(data, default=serialize, indent=2)
@staticmethod
def format_ab_comparison(comparisons: Dict[str, ABTestSummary]) -> str:
"""Format A/B test comparisons."""
lines = ["A/B Test Comparison", "=" * 40]
for name, summary in comparisons.items():
lines.append(f"\nPrompt: {name}")
lines.append(f" Runs: {summary.successful_runs}/{summary.total_runs}")
lines.append(f" Avg Latency: {summary.avg_latency_ms:.2f}ms")
lines.append(f" Avg Tokens: {summary.avg_tokens:.0f}")
lines.append(f" Avg Cost: ${summary.avg_cost:.4f}")
return "\n".join(lines)