Re-upload: CI infrastructure issue resolved, all tests verified passing

2026-03-22 16:48:09 +00:00
parent 71bae33ea9
commit 24b94c12bc
165 changed files with 23945 additions and 436 deletions
--- a/http_log_explorer/analyzers/init.py
+++ b/http_log_explorer/analyzers/init.py
@@ -0,0 +1,7 @@
+"""Analyzers for HTTP traffic."""
+
+from http_log_explorer.analyzers.diff_engine import DiffEngine
+from http_log_explorer.analyzers.stats_generator import StatsGenerator
+from http_log_explorer.analyzers.traffic_analyzer import TrafficAnalyzer
+
+__all__ = ["DiffEngine", "StatsGenerator", "TrafficAnalyzer"]
--- a/http_log_explorer/analyzers/diff_engine.py
+++ b/http_log_explorer/analyzers/diff_engine.py
@@ -0,0 +1,185 @@
+"""Diff engine for comparing HTTP entries."""
+
+import difflib
+
+from http_log_explorer.models import DiffResult, HTTPEntry
+
+
+class DiffEngine:
+    """Engine for comparing HTTP request/response pairs."""
+
+    def diff(self, entry1: HTTPEntry, entry2: HTTPEntry) -> DiffResult:
+        """Compare two HTTP entries.
+
+        Args:
+            entry1: First HTTPEntry
+            entry2: Second HTTPEntry
+
+        Returns:
+            DiffResult with differences
+        """
+        result = DiffResult(
+            entry1_id=entry1.id,
+            entry2_id=entry2.id,
+        )
+
+        result.url_changed = entry1.request.url != entry2.request.url
+
+        result.status_changed = entry1.response.status != entry2.response.status
+        result.status1 = entry1.response.status
+        result.status2 = entry2.response.status
+
+        result.request_headers_diff = self.headers_diff(
+            entry1.request.headers,
+            entry2.request.headers,
+        )
+
+        result.response_headers_diff = self.headers_diff(
+            entry1.response.headers,
+            entry2.response.headers,
+        )
+
+        result.request_body_diff = self.body_diff(
+            entry1.request.body,
+            entry2.request.body,
+        )
+
+        result.response_body_diff = self.body_diff(
+            entry1.response.body,
+            entry2.response.body,
+        )
+
+        return result
+
+    def headers_diff(
+        self, headers1: dict[str, str], headers2: dict[str, str]
+    ) -> list[str]:
+        """Compare two header dictionaries.
+
+        Args:
+            headers1: First headers dict
+            headers2: Second headers dict
+
+        Returns:
+            List of diff lines
+        """
+        all_keys = set(headers1.keys()) | set(headers2.keys())
+        diff_lines: list[str] = []
+
+        for key in sorted(all_keys):
+            val1 = headers1.get(key)
+            val2 = headers2.get(key)
+
+            if val1 != val2:
+                if val1 is None:
+                    diff_lines.append(f"+ {key}: {val2}")
+                elif val2 is None:
+                    diff_lines.append(f"- {key}: {val1}")
+                else:
+                    diff_lines.append(f"- {key}: {val1}")
+                    diff_lines.append(f"+ {key}: {val2}")
+
+        return diff_lines
+
+    def body_diff(
+        self, body1: str | None, body2: str | None
+    ) -> list[str]:
+        """Compare two body strings.
+
+        Args:
+            body1: First body
+            body2: Second body
+
+        Returns:
+            List of diff lines (unified format)
+        """
+        if body1 == body2:
+            return []
+
+        b1 = body1 or ""
+        b2 = body2 or ""
+
+        lines1 = b1.splitlines(keepends=True)
+        lines2 = b2.splitlines(keepends=True)
+
+        if not lines1 and not lines2:
+            return []
+
+        diff = list(difflib.unified_diff(
+            lines1,
+            lines2,
+            fromfile="before",
+            tofile="after",
+            lineterm="",
+        ))
+
+        return diff
+
+    def unified_diff_output(self, diff_result: DiffResult) -> str:
+        """Generate a human-readable unified diff output.
+
+        Args:
+            diff_result: The diff result
+
+        Returns:
+            Formatted string with all differences
+        """
+        lines: list[str] = []
+        lines.append(f"=== Diff: {diff_result.entry1_id} vs {diff_result.entry2_id} ===")
+        lines.append("")
+
+        if diff_result.url_changed:
+            lines.append(f"URL changed: {diff_result.url_changed}")
+
+        if diff_result.status_changed:
+            lines.append(f"Status: {diff_result.status1} -> {diff_result.status2}")
+
+        if diff_result.request_headers_diff:
+            lines.append("")
+            lines.append("--- Request Headers ---")
+            lines.extend(diff_result.request_headers_diff)
+
+        if diff_result.request_body_diff:
+            lines.append("")
+            lines.append("--- Request Body ---")
+            lines.extend(diff_result.request_body_diff)
+
+        if diff_result.response_headers_diff:
+            lines.append("")
+            lines.append("--- Response Headers ---")
+            lines.extend(diff_result.response_headers_diff)
+
+        if diff_result.response_body_diff:
+            lines.append("")
+            lines.append("--- Response Body ---")
+            lines.extend(diff_result.response_body_diff)
+
+        if not any([
+            diff_result.url_changed,
+            diff_result.status_changed,
+            diff_result.request_headers_diff,
+            diff_result.request_body_diff,
+            diff_result.response_headers_diff,
+            diff_result.response_body_diff,
+        ]):
+            lines.append("No differences found.")
+
+        return "\n".join(lines)
+
+    def has_differences(self, diff_result: DiffResult) -> bool:
+        """Check if there are any differences.
+
+        Args:
+            diff_result: The diff result
+
+        Returns:
+            True if there are any differences
+        """
+        return bool(
+            diff_result.url_changed
+            or diff_result.status_changed
+            or diff_result.request_headers_diff
+            or diff_result.request_body_diff
+            or diff_result.response_headers_diff
+            or diff_result.response_body_diff
+        )
--- a/http_log_explorer/analyzers/stats_generator.py
+++ b/http_log_explorer/analyzers/stats_generator.py
@@ -0,0 +1,277 @@
+"""Statistics generator for HTTP traffic analytics."""
+
+import re
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from typing import Any
+
+from rich.table import Table
+
+from http_log_explorer.models import HTTPEntry
+
+
+@dataclass
+class TrafficStats:
+    """Container for traffic statistics."""
+
+    total_requests: int
+    endpoint_count: dict[str, int]
+    method_distribution: dict[str, int]
+    status_breakdown: dict[int, int]
+    content_type_distribution: dict[str, int]
+    response_time_stats: dict[str, float]
+    hosts: dict[str, int]
+
+
+class StatsGenerator:
+    """Generate statistics from HTTP entries."""
+
+    def __init__(self, entries: list[HTTPEntry]) -> None:
+        """Initialize with HTTP entries.
+
+        Args:
+            entries: List of HTTPEntry objects
+        """
+        self.entries = entries
+
+    def generate(self) -> TrafficStats:
+        """Generate all statistics.
+
+        Returns:
+            TrafficStats object with all computed statistics
+        """
+        return TrafficStats(
+            total_requests=len(self.entries),
+            endpoint_count=self.endpoint_count(),
+            method_distribution=self.method_distribution(),
+            status_breakdown=self.status_breakdown(),
+            content_type_distribution=self.content_type_distribution(),
+            response_time_stats=self.response_time_stats(),
+            hosts=self.hosts(),
+        )
+
+    def endpoint_count(self) -> dict[str, int]:
+        """Count requests per endpoint pattern.
+
+        Returns:
+            Dictionary mapping endpoint patterns to counts
+        """
+        counter: Counter[str] = Counter()
+        for entry in self.entries:
+            endpoint = self._normalize_endpoint(entry.endpoint)
+            counter[endpoint] += 1
+        return dict(counter.most_common())
+
+    def method_distribution(self) -> dict[str, int]:
+        """Get distribution of HTTP methods.
+
+        Returns:
+            Dictionary mapping methods to counts
+        """
+        counter = Counter(e.request.method for e in self.entries)
+        return dict(counter)
+
+    def status_breakdown(self) -> dict[int, int]:
+        """Get breakdown of status codes.
+
+        Returns:
+            Dictionary mapping status codes to counts
+        """
+        counter = Counter(e.response.status for e in self.entries)
+        return dict(sorted(counter.items()))
+
+    def content_type_distribution(self) -> dict[str, int]:
+        """Get distribution of content types.
+
+        Returns:
+            Dictionary mapping content types to counts
+        """
+        counter: Counter[str] = Counter()
+        for entry in self.entries:
+            ct = entry.content_type or "unknown"
+            main_type = ct.split(";")[0].strip()
+            counter[main_type] += 1
+        return dict(counter.most_common())
+
+    def response_time_stats(self) -> dict[str, float]:
+        """Calculate response time statistics.
+
+        Returns:
+            Dictionary with min, max, avg, median response times in ms
+        """
+        times = [e.duration_ms for e in self.entries if e.duration_ms is not None]
+        if not times:
+            return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0}
+
+        sorted_times = sorted(times)
+        n = len(sorted_times)
+
+        stats = {
+            "min": float(sorted_times[0]),
+            "max": float(sorted_times[-1]),
+            "avg": float(sum(times) / n),
+            "median": float(sorted_times[n // 2]),
+        }
+
+        p95_idx = int(n * 0.95)
+        p99_idx = int(n * 0.99)
+        stats["p95"] = float(sorted_times[min(p95_idx, n - 1)])
+        stats["p99"] = float(sorted_times[min(p99_idx, n - 1)])
+
+        return stats
+
+    def hosts(self) -> dict[str, int]:
+        """Get request count per host.
+
+        Returns:
+            Dictionary mapping hosts to counts
+        """
+        counter = Counter(e.host for e in self.entries)
+        return dict(counter.most_common())
+
+    def status_code_categories(self) -> dict[str, int]:
+        """Get counts by status code category.
+
+        Returns:
+            Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts
+        """
+        categories: dict[str, int] = {
+            "1xx informational": 0,
+            "2xx success": 0,
+            "3xx redirection": 0,
+            "4xx client error": 0,
+            "5xx server error": 0,
+        }
+
+        for entry in self.entries:
+            status = entry.response.status
+            if 100 <= status < 200:
+                categories["1xx informational"] += 1
+            elif 200 <= status < 300:
+                categories["2xx success"] += 1
+            elif 300 <= status < 400:
+                categories["3xx redirection"] += 1
+            elif 400 <= status < 500:
+                categories["4xx client error"] += 1
+            elif 500 <= status < 600:
+                categories["5xx server error"] += 1
+
+        return categories
+
+    def endpoint_patterns(self) -> dict[str, int]:
+        """Extract common endpoint patterns with path parameters.
+
+        Returns:
+            Dictionary mapping patterns to counts
+        """
+        patterns: dict[str, int] = defaultdict(int)
+
+        for entry in self.entries:
+            pattern = self._extract_pattern(entry.endpoint)
+            patterns[pattern] += 1
+
+        return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True))
+
+    def _normalize_endpoint(self, endpoint: str) -> str:
+        """Normalize endpoint by removing IDs and versions."""
+        cleaned = re.sub(r"/\d+", "/{id}", endpoint)
+        cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned)
+        cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned)
+        return cleaned
+
+    def _extract_pattern(self, endpoint: str) -> str:
+        """Extract endpoint pattern with parameter placeholders."""
+        parts = endpoint.split("/")
+        normalized_parts = []
+
+        for part in parts:
+            if not part:
+                normalized_parts.append("")
+            elif part.isdigit():
+                normalized_parts.append("{id}")
+            elif self._is_uuid(part):
+                normalized_parts.append("{uuid}")
+            elif self._is_hash(part):
+                normalized_parts.append("{hash}")
+            else:
+                normalized_parts.append(part)
+
+        return "/".join(normalized_parts)
+
+    def _is_uuid(self, s: str) -> bool:
+        """Check if string looks like a UUID."""
+        uuid_pattern = re.compile(
+            r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
+            re.IGNORECASE,
+        )
+        return bool(uuid_pattern.match(s))
+
+    def _is_hash(self, s: str) -> bool:
+        """Check if string looks like a hash."""
+        hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
+        return bool(hash_pattern.match(s))
+
+    def render_table(self, stats: TrafficStats | None = None) -> Table:
+        """Render statistics as a Rich table.
+
+        Args:
+            stats: Pre-generated stats, or None to generate new
+
+        Returns:
+            Rich Table object
+        """
+        if stats is None:
+            stats = self.generate()
+
+        table = Table(title="Traffic Statistics")
+
+        table.add_column("Metric", style="cyan")
+        table.add_column("Value", style="green")
+
+        table.add_row("Total Requests", str(stats.total_requests))
+
+        method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())]
+        table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A")
+
+        status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())]
+        table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A")
+
+        rt = stats.response_time_stats
+        if rt["avg"] > 0:
+            table.add_row(
+                "Response Time (avg)",
+                f"{rt['avg']:.2f}ms",
+            )
+            table.add_row(
+                "Response Time (p95)",
+                f"{rt['p95']:.2f}ms",
+            )
+
+        top_endpoints = list(stats.endpoint_count.items())[:5]
+        endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints]
+        table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A")
+
+        return table
+
+    def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]:
+        """Convert stats to dictionary.
+
+        Args:
+            stats: Pre-generated stats, or None to generate new
+
+        Returns:
+            Dictionary representation of stats
+        """
+        if stats is None:
+            stats = self.generate()
+
+        return {
+            "total_requests": stats.total_requests,
+            "endpoint_count": stats.endpoint_count,
+            "method_distribution": stats.method_distribution,
+            "status_breakdown": stats.status_breakdown,
+            "content_type_distribution": stats.content_type_distribution,
+            "response_time_stats": stats.response_time_stats,
+            "hosts": stats.hosts,
+            "status_code_categories": self.status_code_categories(),
+        }
--- a/http_log_explorer/analyzers/traffic_analyzer.py
+++ b/http_log_explorer/analyzers/traffic_analyzer.py
@@ -0,0 +1,196 @@
+"""Traffic analyzer for filtering HTTP entries."""
+
+import re
+from collections.abc import Callable
+
+from http_log_explorer.models import FilterCriteria, HTTPEntry
+
+
+class TrafficAnalyzer:
+    """Analyzer for filtering and searching HTTP entries."""
+
+    def __init__(self, entries: list[HTTPEntry]) -> None:
+        """Initialize with HTTP entries.
+
+        Args:
+            entries: List of HTTPEntry objects to analyze
+        """
+        self.entries = entries
+
+    def filter(self, criteria: FilterCriteria) -> list[HTTPEntry]:
+        """Filter entries based on criteria.
+
+        Args:
+            criteria: FilterCriteria object with filtering rules
+
+        Returns:
+            Filtered list of HTTPEntry objects
+        """
+        predicates: list[Callable[[HTTPEntry], bool]] = []
+
+        if criteria.methods:
+            predicates.append(lambda e: e.request.method in criteria.methods)
+
+        if criteria.status_codes:
+            predicates.append(lambda e: e.response.status in criteria.status_codes)
+
+        if criteria.url_pattern:
+            pattern = re.compile(criteria.url_pattern)
+            predicates.append(lambda e: bool(pattern.search(e.request.url)))
+
+        if criteria.content_types:
+            predicates.append(lambda e: bool(e.content_type and any(ct in e.content_type for ct in criteria.content_types)))
+
+        if criteria.start_time:
+            predicates.append(lambda e: bool(e.timestamp and e.timestamp >= criteria.start_time))
+
+        if criteria.end_time:
+            predicates.append(lambda e: bool(e.timestamp and e.timestamp <= criteria.end_time))
+
+        if criteria.min_response_time_ms is not None:
+            predicates.append(lambda e: bool(e.duration_ms and e.duration_ms >= criteria.min_response_time_ms))
+
+        if criteria.max_response_time_ms is not None:
+            predicates.append(lambda e: bool(e.duration_ms and e.duration_ms <= criteria.max_response_time_ms))
+
+        if criteria.request_body_contains:
+            predicates.append(
+                lambda e: bool(e.request.body and criteria.request_body_contains in e.request.body)
+            )
+
+        if criteria.response_body_contains:
+            predicates.append(
+                lambda e: bool(e.response.body and criteria.response_body_contains in e.response.body)
+            )
+
+        if not predicates:
+            return list(self.entries)
+
+        return [entry for entry in self.entries if all(pred(entry) for pred in predicates)]
+
+    def by_method(self, methods: list[str]) -> list[HTTPEntry]:
+        """Filter by HTTP methods.
+
+        Args:
+            methods: List of methods (GET, POST, PUT, DELETE, etc.)
+
+        Returns:
+            Filtered entries
+        """
+        criteria = FilterCriteria(methods=methods)
+        return self.filter(criteria)
+
+    def by_status(self, status_codes: list[int]) -> list[HTTPEntry]:
+        """Filter by status codes.
+
+        Args:
+            status_codes: List of status codes to include
+
+        Returns:
+            Filtered entries
+        """
+        criteria = FilterCriteria(status_codes=status_codes)
+        return self.filter(criteria)
+
+    def by_url(self, url_pattern: str) -> list[HTTPEntry]:
+        """Filter by URL pattern.
+
+        Args:
+            url_pattern: Regular expression pattern to match URLs
+
+        Returns:
+            Filtered entries
+        """
+        criteria = FilterCriteria(url_pattern=url_pattern)
+        return self.filter(criteria)
+
+    def by_content_type(self, content_types: list[str]) -> list[HTTPEntry]:
+        """Filter by content types.
+
+        Args:
+            content_types: List of content type substrings to match
+
+        Returns:
+            Filtered entries
+        """
+        criteria = FilterCriteria(content_types=content_types)
+        return self.filter(criteria)
+
+    def by_status_range(self, min_status: int, max_status: int) -> list[HTTPEntry]:
+        """Filter by status code range.
+
+        Args:
+            min_status: Minimum status code (inclusive)
+            max_status: Maximum status code (inclusive)
+
+        Returns:
+            Filtered entries
+        """
+        all_in_range = list(range(min_status, max_status + 1))
+        return self.by_status(all_in_range)
+
+    def successful_requests(self) -> list[HTTPEntry]:
+        """Get all 2xx responses.
+
+        Returns:
+            Entries with 2xx status codes
+        """
+        return self.by_status_range(200, 299)
+
+    def client_errors(self) -> list[HTTPEntry]:
+        """Get all 4xx responses.
+
+        Returns:
+            Entries with 4xx status codes
+        """
+        return self.by_status_range(400, 499)
+
+    def server_errors(self) -> list[HTTPEntry]:
+        """Get all 5xx responses.
+
+        Returns:
+            Entries with 5xx status codes
+        """
+        return self.by_status_range(500, 599)
+
+    def search(self, query: str, case_sensitive: bool = False) -> list[HTTPEntry]:
+        """Search across URL, request body, and response body.
+
+        Args:
+            query: Search string
+            case_sensitive: Whether search should be case sensitive
+
+        Returns:
+            Entries matching the query
+        """
+        search_query = query if case_sensitive else query.lower()
+
+        def matches(entry: HTTPEntry) -> bool:
+            url = entry.request.url if case_sensitive else entry.request.url.lower()
+            if search_query in url:
+                return True
+            if entry.request.body:
+                body = entry.request.body if case_sensitive else entry.request.body.lower()
+                if search_query in body:
+                    return True
+            if entry.response.body:
+                body = entry.response.body if case_sensitive else entry.response.body.lower()
+                if search_query in body:
+                    return True
+            return False
+
+        return [e for e in self.entries if matches(e)]
+
+    def get_entry_by_id(self, entry_id: str) -> HTTPEntry | None:
+        """Get a specific entry by its ID.
+
+        Args:
+            entry_id: The entry ID to find
+
+        Returns:
+            The HTTPEntry or None if not found
+        """
+        for entry in self.entries:
+            if entry.id == entry_id:
+                return entry
+        return None