"""Statistics generator for HTTP traffic analytics.""" import re from collections import Counter, defaultdict from dataclasses import dataclass from typing import Any from rich.table import Table from http_log_explorer.models import HTTPEntry @dataclass class TrafficStats: """Container for traffic statistics.""" total_requests: int endpoint_count: dict[str, int] method_distribution: dict[str, int] status_breakdown: dict[int, int] content_type_distribution: dict[str, int] response_time_stats: dict[str, float] hosts: dict[str, int] class StatsGenerator: """Generate statistics from HTTP entries.""" def __init__(self, entries: list[HTTPEntry]) -> None: """Initialize with HTTP entries. Args: entries: List of HTTPEntry objects """ self.entries = entries def generate(self) -> TrafficStats: """Generate all statistics. Returns: TrafficStats object with all computed statistics """ return TrafficStats( total_requests=len(self.entries), endpoint_count=self.endpoint_count(), method_distribution=self.method_distribution(), status_breakdown=self.status_breakdown(), content_type_distribution=self.content_type_distribution(), response_time_stats=self.response_time_stats(), hosts=self.hosts(), ) def endpoint_count(self) -> dict[str, int]: """Count requests per endpoint pattern. Returns: Dictionary mapping endpoint patterns to counts """ counter: Counter[str] = Counter() for entry in self.entries: endpoint = self._normalize_endpoint(entry.endpoint) counter[endpoint] += 1 return dict(counter.most_common()) def method_distribution(self) -> dict[str, int]: """Get distribution of HTTP methods. Returns: Dictionary mapping methods to counts """ counter = Counter(e.request.method for e in self.entries) return dict(counter) def status_breakdown(self) -> dict[int, int]: """Get breakdown of status codes. Returns: Dictionary mapping status codes to counts """ counter = Counter(e.response.status for e in self.entries) return dict(sorted(counter.items())) def content_type_distribution(self) -> dict[str, int]: """Get distribution of content types. Returns: Dictionary mapping content types to counts """ counter: Counter[str] = Counter() for entry in self.entries: ct = entry.content_type or "unknown" main_type = ct.split(";")[0].strip() counter[main_type] += 1 return dict(counter.most_common()) def response_time_stats(self) -> dict[str, float]: """Calculate response time statistics. Returns: Dictionary with min, max, avg, median response times in ms """ times = [e.duration_ms for e in self.entries if e.duration_ms is not None] if not times: return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0} sorted_times = sorted(times) n = len(sorted_times) stats = { "min": float(sorted_times[0]), "max": float(sorted_times[-1]), "avg": float(sum(times) / n), "median": float(sorted_times[n // 2]), } p95_idx = int(n * 0.95) p99_idx = int(n * 0.99) stats["p95"] = float(sorted_times[min(p95_idx, n - 1)]) stats["p99"] = float(sorted_times[min(p99_idx, n - 1)]) return stats def hosts(self) -> dict[str, int]: """Get request count per host. Returns: Dictionary mapping hosts to counts """ counter = Counter(e.host for e in self.entries) return dict(counter.most_common()) def status_code_categories(self) -> dict[str, int]: """Get counts by status code category. Returns: Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts """ categories: dict[str, int] = { "1xx informational": 0, "2xx success": 0, "3xx redirection": 0, "4xx client error": 0, "5xx server error": 0, } for entry in self.entries: status = entry.response.status if 100 <= status < 200: categories["1xx informational"] += 1 elif 200 <= status < 300: categories["2xx success"] += 1 elif 300 <= status < 400: categories["3xx redirection"] += 1 elif 400 <= status < 500: categories["4xx client error"] += 1 elif 500 <= status < 600: categories["5xx server error"] += 1 return categories def endpoint_patterns(self) -> dict[str, int]: """Extract common endpoint patterns with path parameters. Returns: Dictionary mapping patterns to counts """ patterns: dict[str, int] = defaultdict(int) for entry in self.entries: pattern = self._extract_pattern(entry.endpoint) patterns[pattern] += 1 return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True)) def _normalize_endpoint(self, endpoint: str) -> str: """Normalize endpoint by removing IDs and versions.""" cleaned = re.sub(r"/\d+", "/{id}", endpoint) cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned) cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned) return cleaned def _extract_pattern(self, endpoint: str) -> str: """Extract endpoint pattern with parameter placeholders.""" parts = endpoint.split("/") normalized_parts = [] for part in parts: if not part: normalized_parts.append("") elif part.isdigit(): normalized_parts.append("{id}") elif self._is_uuid(part): normalized_parts.append("{uuid}") elif self._is_hash(part): normalized_parts.append("{hash}") else: normalized_parts.append(part) return "/".join(normalized_parts) def _is_uuid(self, s: str) -> bool: """Check if string looks like a UUID.""" uuid_pattern = re.compile( r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$", re.IGNORECASE, ) return bool(uuid_pattern.match(s)) def _is_hash(self, s: str) -> bool: """Check if string looks like a hash.""" hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE) return bool(hash_pattern.match(s)) def render_table(self, stats: TrafficStats | None = None) -> Table: """Render statistics as a Rich table. Args: stats: Pre-generated stats, or None to generate new Returns: Rich Table object """ if stats is None: stats = self.generate() table = Table(title="Traffic Statistics") table.add_column("Metric", style="cyan") table.add_column("Value", style="green") table.add_row("Total Requests", str(stats.total_requests)) method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())] table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A") status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())] table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A") rt = stats.response_time_stats if rt["avg"] > 0: table.add_row( "Response Time (avg)", f"{rt['avg']:.2f}ms", ) table.add_row( "Response Time (p95)", f"{rt['p95']:.2f}ms", ) top_endpoints = list(stats.endpoint_count.items())[:5] endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints] table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A") return table def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]: """Convert stats to dictionary. Args: stats: Pre-generated stats, or None to generate new Returns: Dictionary representation of stats """ if stats is None: stats = self.generate() return { "total_requests": stats.total_requests, "endpoint_count": stats.endpoint_count, "method_distribution": stats.method_distribution, "status_breakdown": stats.status_breakdown, "content_type_distribution": stats.content_type_distribution, "response_time_stats": stats.response_time_stats, "hosts": stats.hosts, "status_code_categories": self.status_code_categories(), }