Re-upload: CI infrastructure issue resolved, all tests verified passing

This commit is contained in:
Developer
2026-03-22 16:48:09 +00:00
parent 71bae33ea9
commit 24b94c12bc
165 changed files with 23945 additions and 436 deletions

View File

@@ -0,0 +1,7 @@
"""Analyzers for HTTP traffic."""
from http_log_explorer.analyzers.diff_engine import DiffEngine
from http_log_explorer.analyzers.stats_generator import StatsGenerator
from http_log_explorer.analyzers.traffic_analyzer import TrafficAnalyzer
__all__ = ["DiffEngine", "StatsGenerator", "TrafficAnalyzer"]

View File

@@ -0,0 +1,185 @@
"""Diff engine for comparing HTTP entries."""
import difflib
from http_log_explorer.models import DiffResult, HTTPEntry
class DiffEngine:
"""Engine for comparing HTTP request/response pairs."""
def diff(self, entry1: HTTPEntry, entry2: HTTPEntry) -> DiffResult:
"""Compare two HTTP entries.
Args:
entry1: First HTTPEntry
entry2: Second HTTPEntry
Returns:
DiffResult with differences
"""
result = DiffResult(
entry1_id=entry1.id,
entry2_id=entry2.id,
)
result.url_changed = entry1.request.url != entry2.request.url
result.status_changed = entry1.response.status != entry2.response.status
result.status1 = entry1.response.status
result.status2 = entry2.response.status
result.request_headers_diff = self.headers_diff(
entry1.request.headers,
entry2.request.headers,
)
result.response_headers_diff = self.headers_diff(
entry1.response.headers,
entry2.response.headers,
)
result.request_body_diff = self.body_diff(
entry1.request.body,
entry2.request.body,
)
result.response_body_diff = self.body_diff(
entry1.response.body,
entry2.response.body,
)
return result
def headers_diff(
self, headers1: dict[str, str], headers2: dict[str, str]
) -> list[str]:
"""Compare two header dictionaries.
Args:
headers1: First headers dict
headers2: Second headers dict
Returns:
List of diff lines
"""
all_keys = set(headers1.keys()) | set(headers2.keys())
diff_lines: list[str] = []
for key in sorted(all_keys):
val1 = headers1.get(key)
val2 = headers2.get(key)
if val1 != val2:
if val1 is None:
diff_lines.append(f"+ {key}: {val2}")
elif val2 is None:
diff_lines.append(f"- {key}: {val1}")
else:
diff_lines.append(f"- {key}: {val1}")
diff_lines.append(f"+ {key}: {val2}")
return diff_lines
def body_diff(
self, body1: str | None, body2: str | None
) -> list[str]:
"""Compare two body strings.
Args:
body1: First body
body2: Second body
Returns:
List of diff lines (unified format)
"""
if body1 == body2:
return []
b1 = body1 or ""
b2 = body2 or ""
lines1 = b1.splitlines(keepends=True)
lines2 = b2.splitlines(keepends=True)
if not lines1 and not lines2:
return []
diff = list(difflib.unified_diff(
lines1,
lines2,
fromfile="before",
tofile="after",
lineterm="",
))
return diff
def unified_diff_output(self, diff_result: DiffResult) -> str:
"""Generate a human-readable unified diff output.
Args:
diff_result: The diff result
Returns:
Formatted string with all differences
"""
lines: list[str] = []
lines.append(f"=== Diff: {diff_result.entry1_id} vs {diff_result.entry2_id} ===")
lines.append("")
if diff_result.url_changed:
lines.append(f"URL changed: {diff_result.url_changed}")
if diff_result.status_changed:
lines.append(f"Status: {diff_result.status1} -> {diff_result.status2}")
if diff_result.request_headers_diff:
lines.append("")
lines.append("--- Request Headers ---")
lines.extend(diff_result.request_headers_diff)
if diff_result.request_body_diff:
lines.append("")
lines.append("--- Request Body ---")
lines.extend(diff_result.request_body_diff)
if diff_result.response_headers_diff:
lines.append("")
lines.append("--- Response Headers ---")
lines.extend(diff_result.response_headers_diff)
if diff_result.response_body_diff:
lines.append("")
lines.append("--- Response Body ---")
lines.extend(diff_result.response_body_diff)
if not any([
diff_result.url_changed,
diff_result.status_changed,
diff_result.request_headers_diff,
diff_result.request_body_diff,
diff_result.response_headers_diff,
diff_result.response_body_diff,
]):
lines.append("No differences found.")
return "\n".join(lines)
def has_differences(self, diff_result: DiffResult) -> bool:
"""Check if there are any differences.
Args:
diff_result: The diff result
Returns:
True if there are any differences
"""
return bool(
diff_result.url_changed
or diff_result.status_changed
or diff_result.request_headers_diff
or diff_result.request_body_diff
or diff_result.response_headers_diff
or diff_result.response_body_diff
)

View File

@@ -0,0 +1,277 @@
"""Statistics generator for HTTP traffic analytics."""
import re
from collections import Counter, defaultdict
from dataclasses import dataclass
from typing import Any
from rich.table import Table
from http_log_explorer.models import HTTPEntry
@dataclass
class TrafficStats:
"""Container for traffic statistics."""
total_requests: int
endpoint_count: dict[str, int]
method_distribution: dict[str, int]
status_breakdown: dict[int, int]
content_type_distribution: dict[str, int]
response_time_stats: dict[str, float]
hosts: dict[str, int]
class StatsGenerator:
"""Generate statistics from HTTP entries."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects
"""
self.entries = entries
def generate(self) -> TrafficStats:
"""Generate all statistics.
Returns:
TrafficStats object with all computed statistics
"""
return TrafficStats(
total_requests=len(self.entries),
endpoint_count=self.endpoint_count(),
method_distribution=self.method_distribution(),
status_breakdown=self.status_breakdown(),
content_type_distribution=self.content_type_distribution(),
response_time_stats=self.response_time_stats(),
hosts=self.hosts(),
)
def endpoint_count(self) -> dict[str, int]:
"""Count requests per endpoint pattern.
Returns:
Dictionary mapping endpoint patterns to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
endpoint = self._normalize_endpoint(entry.endpoint)
counter[endpoint] += 1
return dict(counter.most_common())
def method_distribution(self) -> dict[str, int]:
"""Get distribution of HTTP methods.
Returns:
Dictionary mapping methods to counts
"""
counter = Counter(e.request.method for e in self.entries)
return dict(counter)
def status_breakdown(self) -> dict[int, int]:
"""Get breakdown of status codes.
Returns:
Dictionary mapping status codes to counts
"""
counter = Counter(e.response.status for e in self.entries)
return dict(sorted(counter.items()))
def content_type_distribution(self) -> dict[str, int]:
"""Get distribution of content types.
Returns:
Dictionary mapping content types to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
ct = entry.content_type or "unknown"
main_type = ct.split(";")[0].strip()
counter[main_type] += 1
return dict(counter.most_common())
def response_time_stats(self) -> dict[str, float]:
"""Calculate response time statistics.
Returns:
Dictionary with min, max, avg, median response times in ms
"""
times = [e.duration_ms for e in self.entries if e.duration_ms is not None]
if not times:
return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0}
sorted_times = sorted(times)
n = len(sorted_times)
stats = {
"min": float(sorted_times[0]),
"max": float(sorted_times[-1]),
"avg": float(sum(times) / n),
"median": float(sorted_times[n // 2]),
}
p95_idx = int(n * 0.95)
p99_idx = int(n * 0.99)
stats["p95"] = float(sorted_times[min(p95_idx, n - 1)])
stats["p99"] = float(sorted_times[min(p99_idx, n - 1)])
return stats
def hosts(self) -> dict[str, int]:
"""Get request count per host.
Returns:
Dictionary mapping hosts to counts
"""
counter = Counter(e.host for e in self.entries)
return dict(counter.most_common())
def status_code_categories(self) -> dict[str, int]:
"""Get counts by status code category.
Returns:
Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts
"""
categories: dict[str, int] = {
"1xx informational": 0,
"2xx success": 0,
"3xx redirection": 0,
"4xx client error": 0,
"5xx server error": 0,
}
for entry in self.entries:
status = entry.response.status
if 100 <= status < 200:
categories["1xx informational"] += 1
elif 200 <= status < 300:
categories["2xx success"] += 1
elif 300 <= status < 400:
categories["3xx redirection"] += 1
elif 400 <= status < 500:
categories["4xx client error"] += 1
elif 500 <= status < 600:
categories["5xx server error"] += 1
return categories
def endpoint_patterns(self) -> dict[str, int]:
"""Extract common endpoint patterns with path parameters.
Returns:
Dictionary mapping patterns to counts
"""
patterns: dict[str, int] = defaultdict(int)
for entry in self.entries:
pattern = self._extract_pattern(entry.endpoint)
patterns[pattern] += 1
return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True))
def _normalize_endpoint(self, endpoint: str) -> str:
"""Normalize endpoint by removing IDs and versions."""
cleaned = re.sub(r"/\d+", "/{id}", endpoint)
cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned)
cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned)
return cleaned
def _extract_pattern(self, endpoint: str) -> str:
"""Extract endpoint pattern with parameter placeholders."""
parts = endpoint.split("/")
normalized_parts = []
for part in parts:
if not part:
normalized_parts.append("")
elif part.isdigit():
normalized_parts.append("{id}")
elif self._is_uuid(part):
normalized_parts.append("{uuid}")
elif self._is_hash(part):
normalized_parts.append("{hash}")
else:
normalized_parts.append(part)
return "/".join(normalized_parts)
def _is_uuid(self, s: str) -> bool:
"""Check if string looks like a UUID."""
uuid_pattern = re.compile(
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
re.IGNORECASE,
)
return bool(uuid_pattern.match(s))
def _is_hash(self, s: str) -> bool:
"""Check if string looks like a hash."""
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
return bool(hash_pattern.match(s))
def render_table(self, stats: TrafficStats | None = None) -> Table:
"""Render statistics as a Rich table.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Rich Table object
"""
if stats is None:
stats = self.generate()
table = Table(title="Traffic Statistics")
table.add_column("Metric", style="cyan")
table.add_column("Value", style="green")
table.add_row("Total Requests", str(stats.total_requests))
method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())]
table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A")
status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())]
table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A")
rt = stats.response_time_stats
if rt["avg"] > 0:
table.add_row(
"Response Time (avg)",
f"{rt['avg']:.2f}ms",
)
table.add_row(
"Response Time (p95)",
f"{rt['p95']:.2f}ms",
)
top_endpoints = list(stats.endpoint_count.items())[:5]
endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints]
table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A")
return table
def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]:
"""Convert stats to dictionary.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Dictionary representation of stats
"""
if stats is None:
stats = self.generate()
return {
"total_requests": stats.total_requests,
"endpoint_count": stats.endpoint_count,
"method_distribution": stats.method_distribution,
"status_breakdown": stats.status_breakdown,
"content_type_distribution": stats.content_type_distribution,
"response_time_stats": stats.response_time_stats,
"hosts": stats.hosts,
"status_code_categories": self.status_code_categories(),
}

View File

@@ -0,0 +1,196 @@
"""Traffic analyzer for filtering HTTP entries."""
import re
from collections.abc import Callable
from http_log_explorer.models import FilterCriteria, HTTPEntry
class TrafficAnalyzer:
"""Analyzer for filtering and searching HTTP entries."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects to analyze
"""
self.entries = entries
def filter(self, criteria: FilterCriteria) -> list[HTTPEntry]:
"""Filter entries based on criteria.
Args:
criteria: FilterCriteria object with filtering rules
Returns:
Filtered list of HTTPEntry objects
"""
predicates: list[Callable[[HTTPEntry], bool]] = []
if criteria.methods:
predicates.append(lambda e: e.request.method in criteria.methods)
if criteria.status_codes:
predicates.append(lambda e: e.response.status in criteria.status_codes)
if criteria.url_pattern:
pattern = re.compile(criteria.url_pattern)
predicates.append(lambda e: bool(pattern.search(e.request.url)))
if criteria.content_types:
predicates.append(lambda e: bool(e.content_type and any(ct in e.content_type for ct in criteria.content_types)))
if criteria.start_time:
predicates.append(lambda e: bool(e.timestamp and e.timestamp >= criteria.start_time))
if criteria.end_time:
predicates.append(lambda e: bool(e.timestamp and e.timestamp <= criteria.end_time))
if criteria.min_response_time_ms is not None:
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms >= criteria.min_response_time_ms))
if criteria.max_response_time_ms is not None:
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms <= criteria.max_response_time_ms))
if criteria.request_body_contains:
predicates.append(
lambda e: bool(e.request.body and criteria.request_body_contains in e.request.body)
)
if criteria.response_body_contains:
predicates.append(
lambda e: bool(e.response.body and criteria.response_body_contains in e.response.body)
)
if not predicates:
return list(self.entries)
return [entry for entry in self.entries if all(pred(entry) for pred in predicates)]
def by_method(self, methods: list[str]) -> list[HTTPEntry]:
"""Filter by HTTP methods.
Args:
methods: List of methods (GET, POST, PUT, DELETE, etc.)
Returns:
Filtered entries
"""
criteria = FilterCriteria(methods=methods)
return self.filter(criteria)
def by_status(self, status_codes: list[int]) -> list[HTTPEntry]:
"""Filter by status codes.
Args:
status_codes: List of status codes to include
Returns:
Filtered entries
"""
criteria = FilterCriteria(status_codes=status_codes)
return self.filter(criteria)
def by_url(self, url_pattern: str) -> list[HTTPEntry]:
"""Filter by URL pattern.
Args:
url_pattern: Regular expression pattern to match URLs
Returns:
Filtered entries
"""
criteria = FilterCriteria(url_pattern=url_pattern)
return self.filter(criteria)
def by_content_type(self, content_types: list[str]) -> list[HTTPEntry]:
"""Filter by content types.
Args:
content_types: List of content type substrings to match
Returns:
Filtered entries
"""
criteria = FilterCriteria(content_types=content_types)
return self.filter(criteria)
def by_status_range(self, min_status: int, max_status: int) -> list[HTTPEntry]:
"""Filter by status code range.
Args:
min_status: Minimum status code (inclusive)
max_status: Maximum status code (inclusive)
Returns:
Filtered entries
"""
all_in_range = list(range(min_status, max_status + 1))
return self.by_status(all_in_range)
def successful_requests(self) -> list[HTTPEntry]:
"""Get all 2xx responses.
Returns:
Entries with 2xx status codes
"""
return self.by_status_range(200, 299)
def client_errors(self) -> list[HTTPEntry]:
"""Get all 4xx responses.
Returns:
Entries with 4xx status codes
"""
return self.by_status_range(400, 499)
def server_errors(self) -> list[HTTPEntry]:
"""Get all 5xx responses.
Returns:
Entries with 5xx status codes
"""
return self.by_status_range(500, 599)
def search(self, query: str, case_sensitive: bool = False) -> list[HTTPEntry]:
"""Search across URL, request body, and response body.
Args:
query: Search string
case_sensitive: Whether search should be case sensitive
Returns:
Entries matching the query
"""
search_query = query if case_sensitive else query.lower()
def matches(entry: HTTPEntry) -> bool:
url = entry.request.url if case_sensitive else entry.request.url.lower()
if search_query in url:
return True
if entry.request.body:
body = entry.request.body if case_sensitive else entry.request.body.lower()
if search_query in body:
return True
if entry.response.body:
body = entry.response.body if case_sensitive else entry.response.body.lower()
if search_query in body:
return True
return False
return [e for e in self.entries if matches(e)]
def get_entry_by_id(self, entry_id: str) -> HTTPEntry | None:
"""Get a specific entry by its ID.
Args:
entry_id: The entry ID to find
Returns:
The HTTPEntry or None if not found
"""
for entry in self.entries:
if entry.id == entry_id:
return entry
return None