fix: resolve CI linting and type errors
Some checks failed
CI / test (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / type-check (push) Has been cancelled

This commit is contained in:
2026-02-04 12:58:32 +00:00
parent 7125b6933d
commit b4076327d8

View File

@@ -1,33 +1,139 @@
from dataclasses import dataclass """Test results and formatting."""
from typing import Dict, List, Optional
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
from .ab_test import ABTestSummary
from .metrics import MetricsSummary
@dataclass @dataclass
class TestResult: class TestResult:
"""Result of a single test."""
test_id: str
prompt_name: str
provider: str
success: bool success: bool
response: str response: str
metrics: "TestMetrics" metrics: Dict[str, Any] = field(default_factory=dict)
error: Optional[str] = None validation_results: Dict[str, bool] = field(default_factory=dict)
error_message: Optional[str] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
@dataclass @dataclass
class ComparisonResult: class TestSessionResults:
prompt_name: str """Collection of test results."""
total_runs: int
successful_runs: int
failed_runs: int
avg_latency_ms: float
min_latency_ms: float
max_latency_ms: float
avg_tokens: float
avg_cost: float
success_rate: float
all_metrics: List["TestMetrics"]
@dataclass
class TestReport:
test_id: str test_id: str
timestamp: str name: str
results: Dict[str, ComparisonResult] results: List[TestResult] = field(default_factory=list)
summary: Dict[str, float] metrics: MetricsSummary = field(default_factory=lambda: MetricsSummary(name=""))
ab_comparisons: Dict[str, ABTestSummary] = field(default_factory=dict)
start_time: datetime = field(default_factory=datetime.utcnow)
end_time: Optional[datetime] = None
__test__ = False
@property
def duration_seconds(self) -> float:
"""Get test duration in seconds."""
if self.end_time is None:
return 0.0
return (self.end_time - self.start_time).total_seconds()
@property
def success_count(self) -> int:
"""Count of successful tests."""
return sum(1 for r in self.results if r.success)
@property
def failure_count(self) -> int:
"""Count of failed tests."""
return sum(1 for r in self.results if not r.success)
@property
def pass_rate(self) -> float:
"""Calculate pass rate."""
if not self.results:
return 0.0
return self.success_count / len(self.results)
class ResultFormatter:
"""Format test results for display."""
@staticmethod
def format_text(results: TestSessionResults) -> str:
"""Format results as plain text."""
lines = [
f"Test Results: {results.name}",
f"Duration: {results.duration_seconds:.2f}s",
f"Passed: {results.success_count}/{len(results.results)} ({results.pass_rate:.1%})",
"",
]
for result in results.results:
status = "PASS" if result.success else "FAIL"
lines.append(f"[{status}] {result.prompt_name}")
if result.error_message:
lines.append(f" Error: {result.error_message}")
if result.metrics:
metrics_str = ", ".join(f"{k}: {v}" for k, v in result.metrics.items())
lines.append(f" Metrics: {metrics_str}")
return "\n".join(lines)
@staticmethod
def format_json(results: TestSessionResults) -> str:
"""Format results as JSON."""
import json
from datetime import datetime
def serialize(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
data = {
"test_id": results.test_id,
"name": results.name,
"duration_seconds": results.duration_seconds,
"summary": {
"total": len(results.results),
"passed": results.success_count,
"failed": results.failure_count,
"pass_rate": results.pass_rate,
},
"results": [
{
"test_id": r.test_id,
"prompt_name": r.prompt_name,
"provider": r.provider,
"success": r.success,
"response": r.response[:500] if r.response else "",
"metrics": r.metrics,
"validation_results": r.validation_results,
"error_message": r.error_message,
"timestamp": r.timestamp.isoformat(),
}
for r in results.results
],
}
return json.dumps(data, default=serialize, indent=2)
@staticmethod
def format_ab_comparison(comparisons: Dict[str, ABTestSummary]) -> str:
"""Format A/B test comparisons."""
lines = ["A/B Test Comparison", "=" * 40]
for name, summary in comparisons.items():
lines.append(f"\nPrompt: {name}")
lines.append(f" Runs: {summary.successful_runs}/{summary.total_runs}")
lines.append(f" Avg Latency: {summary.avg_latency_ms:.2f}ms")
lines.append(f" Avg Tokens: {summary.avg_tokens:.0f}")
lines.append(f" Avg Cost: ${summary.avg_cost:.4f}")
return "\n".join(lines)