From b4076327d8f983fd86e3250e5ab5ca18d6803107 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Wed, 4 Feb 2026 12:58:32 +0000 Subject: [PATCH] fix: resolve CI linting and type errors --- src/promptforge/testing/results.py | 150 ++++++++++++++++++++++++----- 1 file changed, 128 insertions(+), 22 deletions(-) diff --git a/src/promptforge/testing/results.py b/src/promptforge/testing/results.py index 0b477dd..b008095 100644 --- a/src/promptforge/testing/results.py +++ b/src/promptforge/testing/results.py @@ -1,33 +1,139 @@ -from dataclasses import dataclass -from typing import Dict, List, Optional +"""Test results and formatting.""" + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +from .ab_test import ABTestSummary +from .metrics import MetricsSummary @dataclass class TestResult: + """Result of a single test.""" + + test_id: str + prompt_name: str + provider: str success: bool response: str - metrics: "TestMetrics" - error: Optional[str] = None + metrics: Dict[str, Any] = field(default_factory=dict) + validation_results: Dict[str, bool] = field(default_factory=dict) + error_message: Optional[str] = None + timestamp: datetime = field(default_factory=datetime.utcnow) @dataclass -class ComparisonResult: - prompt_name: str - total_runs: int - successful_runs: int - failed_runs: int - avg_latency_ms: float - min_latency_ms: float - max_latency_ms: float - avg_tokens: float - avg_cost: float - success_rate: float - all_metrics: List["TestMetrics"] +class TestSessionResults: + """Collection of test results.""" - -@dataclass -class TestReport: test_id: str - timestamp: str - results: Dict[str, ComparisonResult] - summary: Dict[str, float] + name: str + results: List[TestResult] = field(default_factory=list) + metrics: MetricsSummary = field(default_factory=lambda: MetricsSummary(name="")) + ab_comparisons: Dict[str, ABTestSummary] = field(default_factory=dict) + start_time: datetime = field(default_factory=datetime.utcnow) + end_time: Optional[datetime] = None + + __test__ = False + + @property + def duration_seconds(self) -> float: + """Get test duration in seconds.""" + if self.end_time is None: + return 0.0 + return (self.end_time - self.start_time).total_seconds() + + @property + def success_count(self) -> int: + """Count of successful tests.""" + return sum(1 for r in self.results if r.success) + + @property + def failure_count(self) -> int: + """Count of failed tests.""" + return sum(1 for r in self.results if not r.success) + + @property + def pass_rate(self) -> float: + """Calculate pass rate.""" + if not self.results: + return 0.0 + return self.success_count / len(self.results) + + +class ResultFormatter: + """Format test results for display.""" + + @staticmethod + def format_text(results: TestSessionResults) -> str: + """Format results as plain text.""" + lines = [ + f"Test Results: {results.name}", + f"Duration: {results.duration_seconds:.2f}s", + f"Passed: {results.success_count}/{len(results.results)} ({results.pass_rate:.1%})", + "", + ] + + for result in results.results: + status = "PASS" if result.success else "FAIL" + lines.append(f"[{status}] {result.prompt_name}") + if result.error_message: + lines.append(f" Error: {result.error_message}") + if result.metrics: + metrics_str = ", ".join(f"{k}: {v}" for k, v in result.metrics.items()) + lines.append(f" Metrics: {metrics_str}") + + return "\n".join(lines) + + @staticmethod + def format_json(results: TestSessionResults) -> str: + """Format results as JSON.""" + import json + from datetime import datetime + + def serialize(obj): + if isinstance(obj, datetime): + return obj.isoformat() + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + data = { + "test_id": results.test_id, + "name": results.name, + "duration_seconds": results.duration_seconds, + "summary": { + "total": len(results.results), + "passed": results.success_count, + "failed": results.failure_count, + "pass_rate": results.pass_rate, + }, + "results": [ + { + "test_id": r.test_id, + "prompt_name": r.prompt_name, + "provider": r.provider, + "success": r.success, + "response": r.response[:500] if r.response else "", + "metrics": r.metrics, + "validation_results": r.validation_results, + "error_message": r.error_message, + "timestamp": r.timestamp.isoformat(), + } + for r in results.results + ], + } + return json.dumps(data, default=serialize, indent=2) + + @staticmethod + def format_ab_comparison(comparisons: Dict[str, ABTestSummary]) -> str: + """Format A/B test comparisons.""" + lines = ["A/B Test Comparison", "=" * 40] + + for name, summary in comparisons.items(): + lines.append(f"\nPrompt: {name}") + lines.append(f" Runs: {summary.successful_runs}/{summary.total_runs}") + lines.append(f" Avg Latency: {summary.avg_latency_ms:.2f}ms") + lines.append(f" Avg Tokens: {summary.avg_tokens:.0f}") + lines.append(f" Avg Cost: ${summary.avg_cost:.4f}") + + return "\n".join(lines)