Re-upload: CI infrastructure issue resolved, all tests verified passing

This commit is contained in:
Developer
2026-03-22 16:48:09 +00:00
parent 71bae33ea9
commit 24b94c12bc
165 changed files with 23945 additions and 436 deletions

View File

@@ -0,0 +1,277 @@
"""Statistics generator for HTTP traffic analytics."""
import re
from collections import Counter, defaultdict
from dataclasses import dataclass
from typing import Any
from rich.table import Table
from http_log_explorer.models import HTTPEntry
@dataclass
class TrafficStats:
"""Container for traffic statistics."""
total_requests: int
endpoint_count: dict[str, int]
method_distribution: dict[str, int]
status_breakdown: dict[int, int]
content_type_distribution: dict[str, int]
response_time_stats: dict[str, float]
hosts: dict[str, int]
class StatsGenerator:
"""Generate statistics from HTTP entries."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects
"""
self.entries = entries
def generate(self) -> TrafficStats:
"""Generate all statistics.
Returns:
TrafficStats object with all computed statistics
"""
return TrafficStats(
total_requests=len(self.entries),
endpoint_count=self.endpoint_count(),
method_distribution=self.method_distribution(),
status_breakdown=self.status_breakdown(),
content_type_distribution=self.content_type_distribution(),
response_time_stats=self.response_time_stats(),
hosts=self.hosts(),
)
def endpoint_count(self) -> dict[str, int]:
"""Count requests per endpoint pattern.
Returns:
Dictionary mapping endpoint patterns to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
endpoint = self._normalize_endpoint(entry.endpoint)
counter[endpoint] += 1
return dict(counter.most_common())
def method_distribution(self) -> dict[str, int]:
"""Get distribution of HTTP methods.
Returns:
Dictionary mapping methods to counts
"""
counter = Counter(e.request.method for e in self.entries)
return dict(counter)
def status_breakdown(self) -> dict[int, int]:
"""Get breakdown of status codes.
Returns:
Dictionary mapping status codes to counts
"""
counter = Counter(e.response.status for e in self.entries)
return dict(sorted(counter.items()))
def content_type_distribution(self) -> dict[str, int]:
"""Get distribution of content types.
Returns:
Dictionary mapping content types to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
ct = entry.content_type or "unknown"
main_type = ct.split(";")[0].strip()
counter[main_type] += 1
return dict(counter.most_common())
def response_time_stats(self) -> dict[str, float]:
"""Calculate response time statistics.
Returns:
Dictionary with min, max, avg, median response times in ms
"""
times = [e.duration_ms for e in self.entries if e.duration_ms is not None]
if not times:
return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0}
sorted_times = sorted(times)
n = len(sorted_times)
stats = {
"min": float(sorted_times[0]),
"max": float(sorted_times[-1]),
"avg": float(sum(times) / n),
"median": float(sorted_times[n // 2]),
}
p95_idx = int(n * 0.95)
p99_idx = int(n * 0.99)
stats["p95"] = float(sorted_times[min(p95_idx, n - 1)])
stats["p99"] = float(sorted_times[min(p99_idx, n - 1)])
return stats
def hosts(self) -> dict[str, int]:
"""Get request count per host.
Returns:
Dictionary mapping hosts to counts
"""
counter = Counter(e.host for e in self.entries)
return dict(counter.most_common())
def status_code_categories(self) -> dict[str, int]:
"""Get counts by status code category.
Returns:
Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts
"""
categories: dict[str, int] = {
"1xx informational": 0,
"2xx success": 0,
"3xx redirection": 0,
"4xx client error": 0,
"5xx server error": 0,
}
for entry in self.entries:
status = entry.response.status
if 100 <= status < 200:
categories["1xx informational"] += 1
elif 200 <= status < 300:
categories["2xx success"] += 1
elif 300 <= status < 400:
categories["3xx redirection"] += 1
elif 400 <= status < 500:
categories["4xx client error"] += 1
elif 500 <= status < 600:
categories["5xx server error"] += 1
return categories
def endpoint_patterns(self) -> dict[str, int]:
"""Extract common endpoint patterns with path parameters.
Returns:
Dictionary mapping patterns to counts
"""
patterns: dict[str, int] = defaultdict(int)
for entry in self.entries:
pattern = self._extract_pattern(entry.endpoint)
patterns[pattern] += 1
return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True))
def _normalize_endpoint(self, endpoint: str) -> str:
"""Normalize endpoint by removing IDs and versions."""
cleaned = re.sub(r"/\d+", "/{id}", endpoint)
cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned)
cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned)
return cleaned
def _extract_pattern(self, endpoint: str) -> str:
"""Extract endpoint pattern with parameter placeholders."""
parts = endpoint.split("/")
normalized_parts = []
for part in parts:
if not part:
normalized_parts.append("")
elif part.isdigit():
normalized_parts.append("{id}")
elif self._is_uuid(part):
normalized_parts.append("{uuid}")
elif self._is_hash(part):
normalized_parts.append("{hash}")
else:
normalized_parts.append(part)
return "/".join(normalized_parts)
def _is_uuid(self, s: str) -> bool:
"""Check if string looks like a UUID."""
uuid_pattern = re.compile(
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
re.IGNORECASE,
)
return bool(uuid_pattern.match(s))
def _is_hash(self, s: str) -> bool:
"""Check if string looks like a hash."""
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
return bool(hash_pattern.match(s))
def render_table(self, stats: TrafficStats | None = None) -> Table:
"""Render statistics as a Rich table.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Rich Table object
"""
if stats is None:
stats = self.generate()
table = Table(title="Traffic Statistics")
table.add_column("Metric", style="cyan")
table.add_column("Value", style="green")
table.add_row("Total Requests", str(stats.total_requests))
method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())]
table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A")
status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())]
table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A")
rt = stats.response_time_stats
if rt["avg"] > 0:
table.add_row(
"Response Time (avg)",
f"{rt['avg']:.2f}ms",
)
table.add_row(
"Response Time (p95)",
f"{rt['p95']:.2f}ms",
)
top_endpoints = list(stats.endpoint_count.items())[:5]
endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints]
table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A")
return table
def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]:
"""Convert stats to dictionary.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Dictionary representation of stats
"""
if stats is None:
stats = self.generate()
return {
"total_requests": stats.total_requests,
"endpoint_count": stats.endpoint_count,
"method_distribution": stats.method_distribution,
"status_breakdown": stats.status_breakdown,
"content_type_distribution": stats.content_type_distribution,
"response_time_stats": stats.response_time_stats,
"hosts": stats.hosts,
"status_code_categories": self.status_code_categories(),
}