Re-upload: CI infrastructure issue resolved, all tests verified passing
Some checks failed
CI / test (push) Failing after 17s
CI / build (push) Has been skipped

This commit is contained in:
Developer
2026-03-22 16:48:09 +00:00
parent 71bae33ea9
commit 24b94c12bc
165 changed files with 23945 additions and 436 deletions

160
http_log_explorer/README.md Normal file
View File

@@ -0,0 +1,160 @@
# HTTP Log Explorer
A powerful CLI tool for parsing, exploring, and analyzing HTTP traffic logs from HAR files, curl -v output, and Chrome DevTools network exports.
## Features
- **Multi-format parsing**: HAR files, curl -v verbose output, and Chrome DevTools network exports
- **Interactive CLI**: Rich terminal UI with beautifully formatted tables
- **Advanced filtering**: Filter by method, status code, URL pattern, content type
- **Request/Response diffing**: Side-by-side comparison of HTTP pairs
- **API analytics**: Endpoint frequency, method distribution, status code breakdown, response time statistics
- **OpenAPI generation**: Automatically generate OpenAPI 3.0 specs from observed traffic
- **Export capabilities**: JSON, cURL commands, Python/JavaScript/Go code snippets
## Installation
### From Source
```bash
pip install -e .
```
### Dependencies
```
click==8.1.7
rich==13.7.0
haralyzer==2.0.0
pytest==8.0.0
openapi-spec-validator==0.7.1
```
## Quick Start
```bash
# Load a HAR file and show statistics
http-log-explorer load access.har --stats
# List entries with filters
http-log-explorer list-entries --method GET --status 200
# Search across URLs and bodies
http-log-explorer search "api/users"
# Compare two requests
http-log-explorer diff entry-1 entry-2
# Export to OpenAPI spec
http-log-explorer export-openapi api-spec.json --title "My API"
# Export as cURL commands
http-log-explorer export-curl commands.sh
# Export as Python code
http-log-explorer export-code client.py --language python
```
## Commands
| Command | Description |
|---------|-------------|
| `load FILE` | Load and parse an HTTP log file |
| `list-entries` | List entries with optional filtering |
| `search QUERY` | Search across URLs and bodies |
| `diff ID1 ID2` | Compare two entries by ID |
| `stats` | Show traffic statistics |
| `filter-entries` | Filter entries and show results |
| `export-json FILE` | Export entries to JSON |
| `export-curl FILE` | Export as cURL commands |
| `export-code FILE` | Export as code snippets |
| `export-openapi FILE` | Generate OpenAPI spec |
## Filtering Options
```bash
# Filter by HTTP method
http-log-explorer list-entries --method GET --method POST
# Filter by status code
http-log-explorer list-entries --status 200 --status 404
# Filter by URL pattern (regex)
http-log-explorer list-entries --url "/api/users"
# Filter by content type
http-log-explorer list-entries --content-type application/json
```
## Supported Formats
### HAR Files (HTTP Archive)
Export from browser DevTools or capture with tools like Wireshark.
### curl -v Output
Paste output from `curl -v` or `curl --verbose`.
### Chrome DevTools Network Export
Export network requests from Chrome DevTools.
## Configuration
### Environment Variables
- `HTTP_LOG_DEBUG=true` - Enable verbose output
## Examples
### Analyzing API Traffic
```bash
# Load traffic data
http-log-explorer load api_traffic.har
# See overall statistics
http-log-explorer stats
# Find all 4xx errors
http-log-explorer list-entries --status 404 --status 400
# Search for specific endpoints
http-log-explorer search "/users"
```
### Generating API Documentation
```bash
# Load traffic and export OpenAPI spec
http-log-explorer load api.har
http-log-explorer export-openapi openapi.json --title "User API" --version "2.0"
```
### Exporting to Code
```bash
# Export as Python requests
http-log-explorer load api.har
http-log-explorer export-code client.py --language python
# Export as JavaScript/Node.js
http-log-explorer export-code client.js --language javascript
# Export as Go
http-log-explorer export-code client.go --language go
```
## License
MIT License - see LICENSE file for details
## Contributing
Contributions welcome! Please ensure tests pass before submitting PRs.
```bash
pytest tests/ -v
ruff check http_log_explorer/
```

View File

@@ -0,0 +1,3 @@
"""HTTP Log Explorer - A CLI tool for parsing and analyzing HTTP traffic logs."""
__version__ = "0.1.0"

View File

@@ -0,0 +1,7 @@
"""Analyzers for HTTP traffic."""
from http_log_explorer.analyzers.diff_engine import DiffEngine
from http_log_explorer.analyzers.stats_generator import StatsGenerator
from http_log_explorer.analyzers.traffic_analyzer import TrafficAnalyzer
__all__ = ["DiffEngine", "StatsGenerator", "TrafficAnalyzer"]

View File

@@ -0,0 +1,185 @@
"""Diff engine for comparing HTTP entries."""
import difflib
from http_log_explorer.models import DiffResult, HTTPEntry
class DiffEngine:
"""Engine for comparing HTTP request/response pairs."""
def diff(self, entry1: HTTPEntry, entry2: HTTPEntry) -> DiffResult:
"""Compare two HTTP entries.
Args:
entry1: First HTTPEntry
entry2: Second HTTPEntry
Returns:
DiffResult with differences
"""
result = DiffResult(
entry1_id=entry1.id,
entry2_id=entry2.id,
)
result.url_changed = entry1.request.url != entry2.request.url
result.status_changed = entry1.response.status != entry2.response.status
result.status1 = entry1.response.status
result.status2 = entry2.response.status
result.request_headers_diff = self.headers_diff(
entry1.request.headers,
entry2.request.headers,
)
result.response_headers_diff = self.headers_diff(
entry1.response.headers,
entry2.response.headers,
)
result.request_body_diff = self.body_diff(
entry1.request.body,
entry2.request.body,
)
result.response_body_diff = self.body_diff(
entry1.response.body,
entry2.response.body,
)
return result
def headers_diff(
self, headers1: dict[str, str], headers2: dict[str, str]
) -> list[str]:
"""Compare two header dictionaries.
Args:
headers1: First headers dict
headers2: Second headers dict
Returns:
List of diff lines
"""
all_keys = set(headers1.keys()) | set(headers2.keys())
diff_lines: list[str] = []
for key in sorted(all_keys):
val1 = headers1.get(key)
val2 = headers2.get(key)
if val1 != val2:
if val1 is None:
diff_lines.append(f"+ {key}: {val2}")
elif val2 is None:
diff_lines.append(f"- {key}: {val1}")
else:
diff_lines.append(f"- {key}: {val1}")
diff_lines.append(f"+ {key}: {val2}")
return diff_lines
def body_diff(
self, body1: str | None, body2: str | None
) -> list[str]:
"""Compare two body strings.
Args:
body1: First body
body2: Second body
Returns:
List of diff lines (unified format)
"""
if body1 == body2:
return []
b1 = body1 or ""
b2 = body2 or ""
lines1 = b1.splitlines(keepends=True)
lines2 = b2.splitlines(keepends=True)
if not lines1 and not lines2:
return []
diff = list(difflib.unified_diff(
lines1,
lines2,
fromfile="before",
tofile="after",
lineterm="",
))
return diff
def unified_diff_output(self, diff_result: DiffResult) -> str:
"""Generate a human-readable unified diff output.
Args:
diff_result: The diff result
Returns:
Formatted string with all differences
"""
lines: list[str] = []
lines.append(f"=== Diff: {diff_result.entry1_id} vs {diff_result.entry2_id} ===")
lines.append("")
if diff_result.url_changed:
lines.append(f"URL changed: {diff_result.url_changed}")
if diff_result.status_changed:
lines.append(f"Status: {diff_result.status1} -> {diff_result.status2}")
if diff_result.request_headers_diff:
lines.append("")
lines.append("--- Request Headers ---")
lines.extend(diff_result.request_headers_diff)
if diff_result.request_body_diff:
lines.append("")
lines.append("--- Request Body ---")
lines.extend(diff_result.request_body_diff)
if diff_result.response_headers_diff:
lines.append("")
lines.append("--- Response Headers ---")
lines.extend(diff_result.response_headers_diff)
if diff_result.response_body_diff:
lines.append("")
lines.append("--- Response Body ---")
lines.extend(diff_result.response_body_diff)
if not any([
diff_result.url_changed,
diff_result.status_changed,
diff_result.request_headers_diff,
diff_result.request_body_diff,
diff_result.response_headers_diff,
diff_result.response_body_diff,
]):
lines.append("No differences found.")
return "\n".join(lines)
def has_differences(self, diff_result: DiffResult) -> bool:
"""Check if there are any differences.
Args:
diff_result: The diff result
Returns:
True if there are any differences
"""
return bool(
diff_result.url_changed
or diff_result.status_changed
or diff_result.request_headers_diff
or diff_result.request_body_diff
or diff_result.response_headers_diff
or diff_result.response_body_diff
)

View File

@@ -0,0 +1,277 @@
"""Statistics generator for HTTP traffic analytics."""
import re
from collections import Counter, defaultdict
from dataclasses import dataclass
from typing import Any
from rich.table import Table
from http_log_explorer.models import HTTPEntry
@dataclass
class TrafficStats:
"""Container for traffic statistics."""
total_requests: int
endpoint_count: dict[str, int]
method_distribution: dict[str, int]
status_breakdown: dict[int, int]
content_type_distribution: dict[str, int]
response_time_stats: dict[str, float]
hosts: dict[str, int]
class StatsGenerator:
"""Generate statistics from HTTP entries."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects
"""
self.entries = entries
def generate(self) -> TrafficStats:
"""Generate all statistics.
Returns:
TrafficStats object with all computed statistics
"""
return TrafficStats(
total_requests=len(self.entries),
endpoint_count=self.endpoint_count(),
method_distribution=self.method_distribution(),
status_breakdown=self.status_breakdown(),
content_type_distribution=self.content_type_distribution(),
response_time_stats=self.response_time_stats(),
hosts=self.hosts(),
)
def endpoint_count(self) -> dict[str, int]:
"""Count requests per endpoint pattern.
Returns:
Dictionary mapping endpoint patterns to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
endpoint = self._normalize_endpoint(entry.endpoint)
counter[endpoint] += 1
return dict(counter.most_common())
def method_distribution(self) -> dict[str, int]:
"""Get distribution of HTTP methods.
Returns:
Dictionary mapping methods to counts
"""
counter = Counter(e.request.method for e in self.entries)
return dict(counter)
def status_breakdown(self) -> dict[int, int]:
"""Get breakdown of status codes.
Returns:
Dictionary mapping status codes to counts
"""
counter = Counter(e.response.status for e in self.entries)
return dict(sorted(counter.items()))
def content_type_distribution(self) -> dict[str, int]:
"""Get distribution of content types.
Returns:
Dictionary mapping content types to counts
"""
counter: Counter[str] = Counter()
for entry in self.entries:
ct = entry.content_type or "unknown"
main_type = ct.split(";")[0].strip()
counter[main_type] += 1
return dict(counter.most_common())
def response_time_stats(self) -> dict[str, float]:
"""Calculate response time statistics.
Returns:
Dictionary with min, max, avg, median response times in ms
"""
times = [e.duration_ms for e in self.entries if e.duration_ms is not None]
if not times:
return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0}
sorted_times = sorted(times)
n = len(sorted_times)
stats = {
"min": float(sorted_times[0]),
"max": float(sorted_times[-1]),
"avg": float(sum(times) / n),
"median": float(sorted_times[n // 2]),
}
p95_idx = int(n * 0.95)
p99_idx = int(n * 0.99)
stats["p95"] = float(sorted_times[min(p95_idx, n - 1)])
stats["p99"] = float(sorted_times[min(p99_idx, n - 1)])
return stats
def hosts(self) -> dict[str, int]:
"""Get request count per host.
Returns:
Dictionary mapping hosts to counts
"""
counter = Counter(e.host for e in self.entries)
return dict(counter.most_common())
def status_code_categories(self) -> dict[str, int]:
"""Get counts by status code category.
Returns:
Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts
"""
categories: dict[str, int] = {
"1xx informational": 0,
"2xx success": 0,
"3xx redirection": 0,
"4xx client error": 0,
"5xx server error": 0,
}
for entry in self.entries:
status = entry.response.status
if 100 <= status < 200:
categories["1xx informational"] += 1
elif 200 <= status < 300:
categories["2xx success"] += 1
elif 300 <= status < 400:
categories["3xx redirection"] += 1
elif 400 <= status < 500:
categories["4xx client error"] += 1
elif 500 <= status < 600:
categories["5xx server error"] += 1
return categories
def endpoint_patterns(self) -> dict[str, int]:
"""Extract common endpoint patterns with path parameters.
Returns:
Dictionary mapping patterns to counts
"""
patterns: dict[str, int] = defaultdict(int)
for entry in self.entries:
pattern = self._extract_pattern(entry.endpoint)
patterns[pattern] += 1
return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True))
def _normalize_endpoint(self, endpoint: str) -> str:
"""Normalize endpoint by removing IDs and versions."""
cleaned = re.sub(r"/\d+", "/{id}", endpoint)
cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned)
cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned)
return cleaned
def _extract_pattern(self, endpoint: str) -> str:
"""Extract endpoint pattern with parameter placeholders."""
parts = endpoint.split("/")
normalized_parts = []
for part in parts:
if not part:
normalized_parts.append("")
elif part.isdigit():
normalized_parts.append("{id}")
elif self._is_uuid(part):
normalized_parts.append("{uuid}")
elif self._is_hash(part):
normalized_parts.append("{hash}")
else:
normalized_parts.append(part)
return "/".join(normalized_parts)
def _is_uuid(self, s: str) -> bool:
"""Check if string looks like a UUID."""
uuid_pattern = re.compile(
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
re.IGNORECASE,
)
return bool(uuid_pattern.match(s))
def _is_hash(self, s: str) -> bool:
"""Check if string looks like a hash."""
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
return bool(hash_pattern.match(s))
def render_table(self, stats: TrafficStats | None = None) -> Table:
"""Render statistics as a Rich table.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Rich Table object
"""
if stats is None:
stats = self.generate()
table = Table(title="Traffic Statistics")
table.add_column("Metric", style="cyan")
table.add_column("Value", style="green")
table.add_row("Total Requests", str(stats.total_requests))
method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())]
table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A")
status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())]
table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A")
rt = stats.response_time_stats
if rt["avg"] > 0:
table.add_row(
"Response Time (avg)",
f"{rt['avg']:.2f}ms",
)
table.add_row(
"Response Time (p95)",
f"{rt['p95']:.2f}ms",
)
top_endpoints = list(stats.endpoint_count.items())[:5]
endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints]
table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A")
return table
def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]:
"""Convert stats to dictionary.
Args:
stats: Pre-generated stats, or None to generate new
Returns:
Dictionary representation of stats
"""
if stats is None:
stats = self.generate()
return {
"total_requests": stats.total_requests,
"endpoint_count": stats.endpoint_count,
"method_distribution": stats.method_distribution,
"status_breakdown": stats.status_breakdown,
"content_type_distribution": stats.content_type_distribution,
"response_time_stats": stats.response_time_stats,
"hosts": stats.hosts,
"status_code_categories": self.status_code_categories(),
}

View File

@@ -0,0 +1,196 @@
"""Traffic analyzer for filtering HTTP entries."""
import re
from collections.abc import Callable
from http_log_explorer.models import FilterCriteria, HTTPEntry
class TrafficAnalyzer:
"""Analyzer for filtering and searching HTTP entries."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects to analyze
"""
self.entries = entries
def filter(self, criteria: FilterCriteria) -> list[HTTPEntry]:
"""Filter entries based on criteria.
Args:
criteria: FilterCriteria object with filtering rules
Returns:
Filtered list of HTTPEntry objects
"""
predicates: list[Callable[[HTTPEntry], bool]] = []
if criteria.methods:
predicates.append(lambda e: e.request.method in criteria.methods)
if criteria.status_codes:
predicates.append(lambda e: e.response.status in criteria.status_codes)
if criteria.url_pattern:
pattern = re.compile(criteria.url_pattern)
predicates.append(lambda e: bool(pattern.search(e.request.url)))
if criteria.content_types:
predicates.append(lambda e: bool(e.content_type and any(ct in e.content_type for ct in criteria.content_types)))
if criteria.start_time:
predicates.append(lambda e: bool(e.timestamp and e.timestamp >= criteria.start_time))
if criteria.end_time:
predicates.append(lambda e: bool(e.timestamp and e.timestamp <= criteria.end_time))
if criteria.min_response_time_ms is not None:
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms >= criteria.min_response_time_ms))
if criteria.max_response_time_ms is not None:
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms <= criteria.max_response_time_ms))
if criteria.request_body_contains:
predicates.append(
lambda e: bool(e.request.body and criteria.request_body_contains in e.request.body)
)
if criteria.response_body_contains:
predicates.append(
lambda e: bool(e.response.body and criteria.response_body_contains in e.response.body)
)
if not predicates:
return list(self.entries)
return [entry for entry in self.entries if all(pred(entry) for pred in predicates)]
def by_method(self, methods: list[str]) -> list[HTTPEntry]:
"""Filter by HTTP methods.
Args:
methods: List of methods (GET, POST, PUT, DELETE, etc.)
Returns:
Filtered entries
"""
criteria = FilterCriteria(methods=methods)
return self.filter(criteria)
def by_status(self, status_codes: list[int]) -> list[HTTPEntry]:
"""Filter by status codes.
Args:
status_codes: List of status codes to include
Returns:
Filtered entries
"""
criteria = FilterCriteria(status_codes=status_codes)
return self.filter(criteria)
def by_url(self, url_pattern: str) -> list[HTTPEntry]:
"""Filter by URL pattern.
Args:
url_pattern: Regular expression pattern to match URLs
Returns:
Filtered entries
"""
criteria = FilterCriteria(url_pattern=url_pattern)
return self.filter(criteria)
def by_content_type(self, content_types: list[str]) -> list[HTTPEntry]:
"""Filter by content types.
Args:
content_types: List of content type substrings to match
Returns:
Filtered entries
"""
criteria = FilterCriteria(content_types=content_types)
return self.filter(criteria)
def by_status_range(self, min_status: int, max_status: int) -> list[HTTPEntry]:
"""Filter by status code range.
Args:
min_status: Minimum status code (inclusive)
max_status: Maximum status code (inclusive)
Returns:
Filtered entries
"""
all_in_range = list(range(min_status, max_status + 1))
return self.by_status(all_in_range)
def successful_requests(self) -> list[HTTPEntry]:
"""Get all 2xx responses.
Returns:
Entries with 2xx status codes
"""
return self.by_status_range(200, 299)
def client_errors(self) -> list[HTTPEntry]:
"""Get all 4xx responses.
Returns:
Entries with 4xx status codes
"""
return self.by_status_range(400, 499)
def server_errors(self) -> list[HTTPEntry]:
"""Get all 5xx responses.
Returns:
Entries with 5xx status codes
"""
return self.by_status_range(500, 599)
def search(self, query: str, case_sensitive: bool = False) -> list[HTTPEntry]:
"""Search across URL, request body, and response body.
Args:
query: Search string
case_sensitive: Whether search should be case sensitive
Returns:
Entries matching the query
"""
search_query = query if case_sensitive else query.lower()
def matches(entry: HTTPEntry) -> bool:
url = entry.request.url if case_sensitive else entry.request.url.lower()
if search_query in url:
return True
if entry.request.body:
body = entry.request.body if case_sensitive else entry.request.body.lower()
if search_query in body:
return True
if entry.response.body:
body = entry.response.body if case_sensitive else entry.response.body.lower()
if search_query in body:
return True
return False
return [e for e in self.entries if matches(e)]
def get_entry_by_id(self, entry_id: str) -> HTTPEntry | None:
"""Get a specific entry by its ID.
Args:
entry_id: The entry ID to find
Returns:
The HTTPEntry or None if not found
"""
for entry in self.entries:
if entry.id == entry_id:
return entry
return None

View File

@@ -0,0 +1,3 @@
"""CLI interface for HTTP Log Explorer."""
__version__ = "0.1.0"

View File

@@ -0,0 +1,339 @@
"""CLI commands for HTTP Log Explorer."""
import sys
import click
from rich.console import Console
from http_log_explorer.analyzers import DiffEngine, StatsGenerator, TrafficAnalyzer
from http_log_explorer.cli.formatter import Formatter
from http_log_explorer.exporters import CodeExporter, CurlExporter, JSONExporter
from http_log_explorer.generators import OpenAPIGenerator
from http_log_explorer.models import FilterCriteria, HTTPEntry
from http_log_explorer.parsers import get_parser
console = Console()
formatter = Formatter()
_entries_store: list[HTTPEntry] = []
def reset_entries() -> None:
"""Reset the global entries store. Used for testing."""
global _entries_store
_entries_store = []
@click.group()
@click.version_option(version="0.1.0")
def cli() -> None:
"""HTTP Log Explorer - Parse, analyze, and explore HTTP traffic logs."""
pass
@cli.command()
@click.argument("file", type=click.Path(exists=True))
@click.option("--stats", is_flag=True, help="Show statistics after loading")
def load(file: str, stats: bool) -> None:
"""Load and parse an HTTP log file.
Supports HAR files, curl -v output, and Chrome DevTools exports.
"""
global _entries_store
try:
with open(file, encoding="utf-8", errors="replace") as f:
content = f.read()
except Exception as e:
console.print(f"[red]Error reading file: {e}[/red]")
sys.exit(1)
if not content.strip():
console.print("[red]Error: File is empty[/red]")
sys.exit(1)
try:
parser = get_parser(content)
console.print(f"[green]Using parser: {parser.get_parser_name()}[/green]")
entries = parser.parse(content, source_file=file)
except ValueError as e:
console.print(f"[red]Parse error: {e}[/red]")
console.print("[yellow]Supported formats:[/yellow]")
console.print(" - HAR files (HTTP Archive format)")
console.print(" - curl -v output")
console.print(" - Chrome DevTools network exports")
sys.exit(1)
_entries_store = entries
console.print(f"[green]Loaded {len(entries)} entries[/green]")
if stats and entries:
_show_stats(entries)
@cli.command()
@click.option("--limit", type=int, default=50, help="Limit number of entries shown")
@click.option("--method", multiple=True, help="Filter by method (e.g., GET, POST)")
@click.option("--status", multiple=True, type=int, help="Filter by status code")
@click.option("--url", help="Filter by URL pattern (regex)")
@click.option("--content-type", multiple=True, help="Filter by content type")
def list_entries(
limit: int,
method: tuple[str, ...],
status: tuple[int, ...],
url: str | None,
content_type: tuple[str, ...],
) -> None:
"""List loaded HTTP entries with optional filtering."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
entries = list(_entries_store)
criteria = FilterCriteria(
methods=list(method) if method else None,
status_codes=list(status) if status else None,
url_pattern=url,
content_types=list(content_type) if content_type else None,
)
analyzer = TrafficAnalyzer(entries)
filtered = analyzer.filter(criteria)
table = formatter.format_entry_table(filtered, limit=limit)
console.print(table)
console.print(f"\n[dim]Showing {min(limit, len(filtered))} of {len(filtered)} entries[/dim]")
@cli.command()
@click.argument("query")
@click.option("--case-sensitive", is_flag=True, help="Case sensitive search")
def search(query: str, case_sensitive: bool) -> None:
"""Search across URLs and bodies."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
analyzer = TrafficAnalyzer(_entries_store)
results = analyzer.search(query, case_sensitive=case_sensitive)
table = formatter.format_entry_table(results, limit=50)
console.print(table)
console.print(f"\n[dim]Found {len(results)} matching entries[/dim]")
@cli.command()
@click.argument("entry_id1")
@click.argument("entry_id2")
def diff(entry_id1: str, entry_id2: str) -> None:
"""Compare two HTTP entries by ID."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
analyzer = TrafficAnalyzer(_entries_store)
entry1 = analyzer.get_entry_by_id(entry_id1)
entry2 = analyzer.get_entry_by_id(entry_id2)
if not entry1:
console.print(f"[red]Entry not found: {entry_id1}[/red]")
return
if not entry2:
console.print(f"[red]Entry not found: {entry_id2}[/red]")
return
engine = DiffEngine()
diff_result = engine.diff(entry1, entry2)
diff_output = engine.unified_diff_output(diff_result)
console.print(diff_output)
@cli.command()
def stats() -> None:
"""Show statistics for loaded entries."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
_show_stats(_entries_store)
def _show_stats(entries: list[HTTPEntry]) -> None:
"""Show statistics for entries."""
generator = StatsGenerator(entries)
stats_data = generator.to_dict()
console.print("\n[bold cyan]Traffic Statistics[/bold cyan]")
console.print(f"Total Requests: {stats_data['total_requests']}")
console.print("\n[bold]Method Distribution[/bold]")
for method, count in sorted(stats_data["method_distribution"].items()):
console.print(f" {method}: {count}")
console.print("\n[bold]Status Code Breakdown[/bold]")
for status, count in sorted(stats_data["status_breakdown"].items()):
console.print(f" {status}: {count}")
console.print("\n[bold]Top Endpoints[/bold]")
for endpoint, count in list(stats_data["endpoint_count"].items())[:10]:
console.print(f" {endpoint}: {count}")
rt = stats_data.get("response_time_stats", {})
if rt.get("avg", 0) > 0:
console.print("\n[bold]Response Times[/bold]")
console.print(f" Min: {rt.get('min', 0):.2f}ms")
console.print(f" Max: {rt.get('max', 0):.2f}ms")
console.print(f" Avg: {rt.get('avg', 0):.2f}ms")
console.print(f" Median: {rt.get('median', 0):.2f}ms")
console.print(f" P95: {rt.get('p95', 0):.2f}ms")
console.print(f" P99: {rt.get('p99', 0):.2f}ms")
@cli.command("export-json")
@click.argument("output", type=click.Path())
@click.option("--compact", is_flag=True, help="Export compact JSON")
@click.option("--summary", is_flag=True, help="Export summary only")
def export_json(output: str, compact: bool, summary: bool) -> None:
"""Export entries to JSON file."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
exporter = JSONExporter()
try:
if summary:
content = exporter.export_summary(_entries_store)
elif compact:
content = exporter.export_compact(_entries_store)
else:
content = exporter.export(_entries_store)
with open(output, "w") as f:
f.write(content)
console.print(f"[green]Exported to {output}[/green]")
except Exception as e:
console.print(f"[red]Export error: {e}[/red]")
@cli.command("export-curl")
@click.argument("output", type=click.Path())
def export_curl(output: str) -> None:
"""Export entries as cURL commands."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
exporter = CurlExporter()
try:
exporter.to_file(_entries_store, output)
console.print(f"[green]Exported to {output}[/green]")
except Exception as e:
console.print(f"[red]Export error: {e}[/red]")
@cli.command("export-code")
@click.argument("output", type=click.Path())
@click.option(
"--language",
type=click.Choice(["python", "javascript", "go"]),
default="python",
help="Target language",
)
def export_code(output: str, language: str) -> None:
"""Export entries as code snippets."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
exporter = CodeExporter()
try:
exporter.to_file(_entries_store, output, language)
console.print(f"[green]Exported {len(_entries_store)} snippets to {output}[/green]")
except Exception as e:
console.print(f"[red]Export error: {e}[/red]")
@cli.command("export-openapi")
@click.argument("output", type=click.Path())
@click.option("--title", default="API", help="API title")
@click.option("--version", default="1.0.0", help="API version")
@click.option("--no-validate", is_flag=True, help="Skip validation")
def export_openapi(
output: str, title: str, version: str, no_validate: bool
) -> None:
"""Generate OpenAPI spec from traffic."""
global _entries_store
if not _entries_store:
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
return
generator = OpenAPIGenerator(_entries_store)
try:
spec = generator.generate(
title=title,
version=version,
validate_spec=not no_validate,
)
with open(output, "w") as f:
f.write(generator.to_json(spec))
console.print(f"[green]OpenAPI spec exported to {output}[/green]")
except ValueError as e:
console.print(f"[red]Validation error: {e}[/red]")
except Exception as e:
console.print(f"[red]Export error: {e}[/red]")
@cli.command()
@click.option("--method", multiple=True, help="Filter by method")
@click.option("--status", multiple=True, type=int, help="Filter by status code")
@click.option("--url", help="Filter by URL pattern")
@click.option("--content-type", multiple=True, help="Filter by content type")
def filter_entries(
method: tuple[str, ...],
status: tuple[int, ...],
url: str | None,
content_type: tuple[str, ...],
) -> None:
"""Filter entries and show results (alias for list with filters)."""
ctx = click.get_current_context()
ctx.invoke(
list_entries,
limit=50,
method=method,
status=status,
url=url,
content_type=content_type,
)
def main() -> None:
"""Main entry point."""
cli()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,153 @@
"""Rich table formatter for HTTP entries."""
from typing import Any
from rich.console import Console
from rich.table import Table
from rich.text import Text
from http_log_explorer.models import HTTPEntry
class Formatter:
"""Format HTTP entries for terminal display."""
def __init__(self) -> None:
"""Initialize formatter."""
self.console = Console()
def format_entry_table(
self,
entries: list[HTTPEntry],
show_headers: bool = True,
show_body: bool = False,
limit: int | None = None,
) -> Table:
"""Create a table of HTTP entries.
Args:
entries: List of HTTPEntry objects
show_headers: Whether to show request/response headers
show_body: Whether to show request/response body
limit: Maximum number of entries to show
Returns:
Rich Table object
"""
table = Table(title=f"HTTP Entries ({len(entries)} total)")
table.add_column("ID", style="cyan", no_wrap=True)
table.add_column("Method", style="magenta", no_wrap=True)
table.add_column("URL", style="blue")
table.add_column("Status", justify="center", no_wrap=True)
table.add_column("Time", style="dim", no_wrap=True)
table.add_column("Duration", justify="right", no_wrap=True)
if show_headers:
table.add_column("Req Headers", style="dim")
table.add_column("Resp Headers", style="dim")
if show_body:
table.add_column("Req Body", style="dim")
table.add_column("Resp Body", style="dim")
display_entries = entries[:limit] if limit else entries
for entry in display_entries:
row: list[Any] = [
entry.id,
entry.request.method,
self._truncate_url(entry.request.url),
self._format_status(entry.response.status),
self._format_timestamp(entry.timestamp),
self._format_duration(entry.duration_ms),
]
if show_headers:
row.append(self._format_headers(entry.request.headers))
row.append(self._format_headers(entry.response.headers))
if show_body:
row.append(self._truncate_body(entry.request.body))
row.append(self._truncate_body(entry.response.body))
table.add_row(*row)
return table
def _truncate_url(self, url: str, max_length: int = 60) -> str:
"""Truncate URL for display."""
if len(url) <= max_length:
return url
return url[: max_length - 3] + "..."
def _format_status(self, status: int) -> Text:
"""Format status code with color."""
if 200 <= status < 300:
return Text(str(status), style="green")
elif 300 <= status < 400:
return Text(str(status), style="blue")
elif 400 <= status < 500:
return Text(str(status), style="yellow")
elif 500 <= status < 600:
return Text(str(status), style="red")
return Text(str(status))
def _format_timestamp(self, timestamp: Any) -> str:
"""Format timestamp for display."""
if timestamp is None:
return "-"
if hasattr(timestamp, "strftime"):
return timestamp.strftime("%H:%M:%S")
return str(timestamp)
def _format_duration(self, duration_ms: float | None) -> str:
"""Format duration for display."""
if duration_ms is None:
return "-"
if duration_ms < 1000:
return f"{duration_ms:.0f}ms"
return f"{duration_ms / 1000:.2f}s"
def _format_headers(self, headers: dict[str, str]) -> str:
"""Format headers for display."""
if not headers:
return "-"
count = len(headers)
return f"{count} headers"
def _truncate_body(self, body: str | None, max_length: int = 50) -> str:
"""Truncate body for display."""
if body is None:
return "-"
body = body.strip()
if not body:
return "-"
if len(body) <= max_length:
return body
return body[: max_length - 3] + "..."
def format_diff(self, diff_output: str) -> Table:
"""Format diff output as table.
Args:
diff_output: Diff output string
Returns:
Rich Table object
"""
table = Table(title="Diff Comparison")
table.add_column("Before/After", style="cyan", no_wrap=True)
table.add_column("Change", style="white")
for line in diff_output.split("\n"):
if line.startswith("-"):
table.add_row("-", Text(line, style="red"))
elif line.startswith("+"):
table.add_row("+", Text(line, style="green"))
elif line.startswith("---"):
table.add_row("", Text(line, style="dim"))
else:
table.add_row("", line)
return table

View File

@@ -0,0 +1,7 @@
"""Exporters for various formats."""
from http_log_explorer.exporters.code_exporter import CodeExporter
from http_log_explorer.exporters.curl_exporter import CurlExporter
from http_log_explorer.exporters.json_exporter import JSONExporter
__all__ = ["CodeExporter", "CurlExporter", "JSONExporter"]

View File

@@ -0,0 +1,263 @@
"""Code exporter for HTTP entries (Python, JavaScript, Go)."""
import json
from http_log_explorer.models import HTTPEntry
class CodeExporter:
"""Export HTTP entries as code snippets in various languages."""
def __init__(self) -> None:
"""Initialize code exporter."""
self._template_dir = ""
PYTHON_TEMPLATE = '''import requests
headers = {headers}
{data}
response = requests.{method}(
"{url}"{params}
{headers_param})
{body}
print(response.status_code)
print(response.json())
'''
JAVASCRIPT_TEMPLATE = '''const axios = require('axios');
const config = {{
method: '{method}',
url: '{url}',
{headers_js}
{data_js}
{body_js}
}};
axios(config)
.then(response => {{
console.log(response.status);
console.log(response.data);
}})
.catch(error => {{
console.error(error);
}});
'''
GO_TEMPLATE = '''package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
func main() {{
{headers_go}
{data_go}
body{data_var} := {body_val}
{body_prep}
req, err := http.NewRequest("{method}", "{url}", {body_ref})
if err != nil {{
panic(err)
}}
{set_headers}
client := &http.Client{{}}
resp, err := client.Do(req)
if err != nil {{
panic(err)
}}
defer resp.Body.Close()
fmt.Println("Status:", resp.Status)
}}
'''
def export_python(self, entry: HTTPEntry) -> str:
"""Export entry as Python code.
Args:
entry: HTTPEntry object
Returns:
Python code string
"""
headers_str = self._format_python_dict(entry.request.headers)
data_line = ""
body_line = ""
params_line = ""
if entry.request.query_params:
params_line = f", params={self._format_python_dict(entry.request.query_params)}"
if entry.request.body:
body_line = "data=data,"
if entry.request.headers:
headers_param = "\n headers=headers"
else:
headers_param = ""
method_lower = entry.request.method.lower()
return self.PYTHON_TEMPLATE.format(
method=method_lower,
url=entry.request.url,
headers=headers_str,
params=params_line,
headers_param=headers_param,
data=data_line,
body=body_line,
)
def export_javascript(self, entry: HTTPEntry) -> str:
"""Export entry as JavaScript code.
Args:
entry: HTTPEntry object
Returns:
JavaScript code string
"""
headers_lines = []
for name, value in entry.request.headers.items():
headers_lines.append(f' "{name}": "{value}",')
headers_js = "\n".join(headers_lines)
if headers_js:
headers_js = "headers: {\n" + headers_js + "\n},"
data_js = ""
data_val = "{}"
body_js = ""
if entry.request.body:
data_val = json.dumps(entry.request.body)
data_js = f"const data = {data_val};"
body_js = "data: data,"
if entry.request.query_params:
data_val = json.dumps(entry.request.query_params)
data_js = f"const params = {data_val};"
body_js = "params: params,"
return self.JAVASCRIPT_TEMPLATE.format(
method=entry.request.method.lower(),
url=entry.request.url,
headers_js=headers_js,
data_js=data_js,
body_js=body_js,
)
def export_go(self, entry: HTTPEntry) -> str:
"""Export entry as Go code.
Args:
entry: HTTPEntry object
Returns:
Go code string
"""
headers_lines = []
for name, value in entry.request.headers.items():
headers_lines.append(f' req.Header.Set("{name}", "{value}")')
headers_go = "\n".join(headers_lines)
data_val = "nil"
data_var = ""
body_prep = ""
body_ref = "nil"
data_go = ""
if entry.request.body:
escaped = self._escape_go_string(entry.request.body)
data_val = f"`{escaped}`"
body_prep = f' body := bytes.NewBufferString({data_val})'
body_ref = "body"
set_headers = headers_go if headers_go else " // No headers"
return self.GO_TEMPLATE.format(
method=entry.request.method,
url=entry.request.url,
headers_go=headers_go,
data_go=data_go,
data_var=data_var,
body_val=data_val,
body_prep=body_prep,
body_ref=body_ref,
set_headers=set_headers,
)
def export_batch(
self, entries: list[HTTPEntry], language: str
) -> list[str]:
"""Export multiple entries as code snippets.
Args:
entries: List of HTTPEntry objects
language: Target language (python, javascript, go)
Returns:
List of code strings
Raises:
ValueError: If language is not supported
"""
language = language.lower()
if language == "python":
return [self.export_python(e) for e in entries]
elif language == "javascript":
return [self.export_javascript(e) for e in entries]
elif language == "go":
return [self.export_go(e) for e in entries]
else:
raise ValueError(
f"Unsupported language: {language}. "
f"Supported: python, javascript, go"
)
def _format_python_dict(self, d: dict[str, str]) -> str:
"""Format dictionary as Python code.
Args:
d: Dictionary to format
Returns:
Python dict string
"""
if not d:
return "{}"
items = [f'"{k}": "{v}"' for k, v in d.items()]
return "{\n " + ",\n ".join(items) + "\n}"
def _escape_go_string(self, s: str) -> str:
"""Escape string for Go.
Args:
s: String to escape
Returns:
Escaped string
"""
return s.replace("\\", "\\\\").replace("`", "\\`").replace("$", "\\$")
def to_file(
self, entries: list[HTTPEntry], path: str, language: str
) -> None:
"""Write code snippets to file.
Args:
entries: List of HTTPEntry objects
path: Output file path
language: Target language
"""
snippets = self.export_batch(entries, language)
with open(path, "w") as f:
for snippet in snippets:
f.write(snippet)
f.write("\n\n")

View File

@@ -0,0 +1,70 @@
"""cURL exporter for HTTP entries."""
from http_log_explorer.models import HTTPEntry
class CurlExporter:
"""Export HTTP entries as cURL commands."""
def export(self, entry: HTTPEntry) -> str:
"""Export a single entry as cURL command.
Args:
entry: HTTPEntry object
Returns:
cURL command string
"""
parts = ["curl"]
parts.append("-X")
parts.append(entry.request.method)
if entry.request.headers:
for name, value in entry.request.headers.items():
if name.lower() not in ("host", "content-length"):
parts.append("-H")
parts.append(f"{name}: {value}")
if entry.request.body:
escaped_body = self._escape_body(entry.request.body)
parts.append("-d")
parts.append(f"'{escaped_body}'")
parts.append(f"'{entry.request.url}'")
return " ".join(parts)
def export_batch(self, entries: list[HTTPEntry]) -> list[str]:
"""Export multiple entries as cURL commands.
Args:
entries: List of HTTPEntry objects
Returns:
List of cURL command strings
"""
return [self.export(entry) for entry in entries]
def _escape_body(self, body: str) -> str:
"""Escape body string for shell.
Args:
body: Body content
Returns:
Escaped string
"""
return body.replace("'", "'\\''")
def to_file(self, entries: list[HTTPEntry], path: str) -> None:
"""Write cURL commands to file (one per line).
Args:
entries: List of HTTPEntry objects
path: Output file path
"""
with open(path, "w") as f:
for entry in entries:
f.write(self.export(entry) + "\n")

View File

@@ -0,0 +1,66 @@
"""JSON exporter for HTTP entries."""
import json
from http_log_explorer.models import HTTPEntry
class JSONExporter:
"""Export HTTP entries to JSON format."""
def export(self, entries: list[HTTPEntry], indent: int = 2) -> str:
"""Export entries to JSON string.
Args:
entries: List of HTTPEntry objects
indent: JSON indent level
Returns:
JSON string representation
"""
data = [entry.to_dict() for entry in entries]
return json.dumps(data, indent=indent, default=str)
def export_compact(self, entries: list[HTTPEntry]) -> str:
"""Export entries to compact JSON (no indent).
Args:
entries: List of HTTPEntry objects
Returns:
Compact JSON string
"""
data = [entry.to_dict() for entry in entries]
return json.dumps(data, separators=(",", ":"), default=str)
def save(self, entries: list[HTTPEntry], path: str, indent: int = 2) -> None:
"""Save entries to JSON file.
Args:
entries: List of HTTPEntry objects
path: Output file path
indent: JSON indent level
"""
with open(path, "w") as f:
f.write(self.export(entries, indent))
def export_summary(self, entries: list[HTTPEntry]) -> str:
"""Export summary of entries (URL, method, status only).
Args:
entries: List of HTTPEntry objects
Returns:
JSON string with summary info
"""
summary = []
for entry in entries:
summary.append({
"id": entry.id,
"method": entry.request.method,
"url": entry.request.url,
"status": entry.response.status,
"content_type": entry.content_type,
"duration_ms": entry.duration_ms,
})
return json.dumps(summary, indent=2)

View File

@@ -0,0 +1,5 @@
"""Generators for OpenAPI specs and exports."""
from http_log_explorer.generators.openapi_generator import OpenAPIGenerator
__all__ = ["OpenAPIGenerator"]

View File

@@ -0,0 +1,431 @@
"""OpenAPI 3.0 spec generator from HTTP traffic."""
import json
import re
from collections import defaultdict
from typing import Any
from http_log_explorer.models import HTTPEntry
try:
from openapi_spec_validator import validate
VALIDATION_AVAILABLE = True
except ImportError:
VALIDATION_AVAILABLE = False
class OpenAPIGenerator:
"""Generate OpenAPI 3.0 specification from observed traffic."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects
"""
self.entries = entries
self.spec: dict[str, Any] = {}
self._schemas: dict[str, dict[str, Any]] = {}
self._path_items: dict[str, dict[str, Any]] = defaultdict(dict)
def generate(
self,
title: str = "API",
version: str = "1.0.0",
description: str = "Generated from traffic analysis",
validate_spec: bool = True,
) -> dict[str, Any]:
"""Generate OpenAPI spec from traffic.
Args:
title: API title
version: API version
description: API description
validate_spec: Whether to validate the generated spec
Returns:
OpenAPI spec dictionary
Raises:
ValueError: If validation fails and validate_spec is True
"""
self.spec = {
"openapi": "3.0.3",
"info": {
"title": title,
"version": version,
"description": description,
},
"paths": {},
"components": {
"schemas": {},
},
}
self._schemas = {}
self._path_items = defaultdict(dict)
self._infer_paths()
self._infer_schemas()
self.spec["paths"] = dict(self._path_items)
self.spec["components"]["schemas"] = self._schemas
if validate_spec and VALIDATION_AVAILABLE:
try:
validate(self.spec)
except Exception as e:
raise ValueError(f"Generated spec is invalid: {e}") from e
return self.spec
def _infer_paths(self) -> None:
"""Infer API paths from traffic."""
for entry in self.entries:
path = self._extract_path(entry.endpoint)
method = entry.request.method.lower()
if path not in self._path_items:
self._path_items[path] = {}
path_params = self._extract_path_params(path)
if path_params and "parameters" not in self._path_items[path]:
self._path_items[path]["parameters"] = path_params
operation: dict[str, Any] = {
"responses": self._generate_responses(entry),
}
if entry.request.headers:
operation["parameters"] = self._generate_parameters(entry)
if entry.request.body:
request_body = self._generate_request_body(entry)
if request_body:
operation["requestBody"] = request_body
self._path_items[path][method] = operation
def _extract_path_params(self, path: str) -> list[dict[str, Any]]:
"""Extract path parameters from a path string.
Args:
path: The path string like '/users/{id}'
Returns:
List of parameter definitions
"""
params = []
import re
param_pattern = re.compile(r"\{([^}]+)\}")
for match in param_pattern.finditer(path):
param_name = match.group(1)
params.append({
"name": param_name,
"in": "path",
"required": True,
"schema": {"type": "string"},
})
return params
def _extract_path(self, endpoint: str) -> str:
"""Extract and normalize path from endpoint."""
path = endpoint
parts = path.split("/")
normalized_parts = []
for part in parts:
if not part:
normalized_parts.append("")
elif part.isdigit():
normalized_parts.append("{" + self._get_param_name(path, part) + "}")
elif self._is_uuid(part):
normalized_parts.append("{uuid}")
elif self._is_hash(part):
normalized_parts.append("{id}")
else:
normalized_parts.append(part)
return "/".join(normalized_parts) or "/"
def _get_param_name(self, path: str, value: str) -> str:
"""Generate parameter name based on path context."""
path_lower = path.lower()
if "user" in path_lower or "id" in path_lower:
return "id"
if "page" in path_lower or "offset" in path_lower:
return "page"
if "limit" in path_lower or "size" in path_lower:
return "limit"
return "id"
def _is_uuid(self, s: str) -> bool:
"""Check if string looks like a UUID."""
uuid_pattern = re.compile(
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
re.IGNORECASE,
)
return bool(uuid_pattern.match(s))
def _is_hash(self, s: str) -> bool:
"""Check if string looks like a hash."""
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
return bool(hash_pattern.match(s))
def _generate_responses(self, entry: HTTPEntry) -> dict[str, Any]:
"""Generate response definitions."""
content = {}
ct = entry.content_type
if ct and "json" in ct.lower():
schema = self._extract_schema_from_body(entry.response.body, "response")
content = {
"application/json": {
"schema": schema,
}
}
elif entry.response.body:
content = {
"text/plain": {
"schema": {
"type": "string",
}
}
}
status = entry.response.status
status_text = entry.response.status_text or "OK"
return {
str(status): {
"description": status_text,
"content": content,
}
}
def _generate_parameters(self, entry: HTTPEntry) -> list[dict[str, Any]]:
"""Generate parameter definitions from query string."""
params = []
for name, value in entry.request.query_params.items():
param: dict[str, Any] = {
"name": name,
"in": "query",
"schema": {
"type": self._infer_type(value),
},
}
if value:
param["example"] = value
params.append(param)
return params
def _generate_request_body(self, entry: HTTPEntry) -> dict[str, Any] | None:
"""Generate request body definition."""
body = entry.request.body
if not body:
return None
content: dict[str, Any] = {}
if self._is_json(body):
schema = self._extract_schema_from_body(body, "request")
content = {
"application/json": {
"schema": schema,
}
}
else:
content = {
"text/plain": {
"schema": {
"type": "string",
}
}
}
return {
"content": content,
"required": True,
}
def _extract_schema_from_body(
self, body: str | None, prefix: str = "schema"
) -> dict[str, Any]:
"""Extract JSON schema from body content.
Args:
body: Body content
prefix: Prefix for schema name
Returns:
JSON Schema dictionary
"""
if not body:
return {"type": "string"}
if not self._is_json(body):
return {"type": "string"}
try:
data = json.loads(body)
except (json.JSONDecodeError, TypeError):
return {"type": "string"}
if isinstance(data, dict):
schema_name = f"{prefix}Schema"
schema = self._dict_to_schema(data, schema_name)
self._schemas[schema_name] = schema
return {"$ref": f"#/components/schemas/{schema_name}"}
elif isinstance(data, list) and data:
return {
"type": "array",
"items": self._dict_to_schema(data[0], f"{prefix}Item"),
}
return {"type": "string"}
def _dict_to_schema(
self, data: dict[str, Any], name: str
) -> dict[str, Any]:
"""Convert dictionary to JSON schema.
Args:
data: Dictionary to convert
name: Schema name
Returns:
JSON Schema dictionary
"""
properties: dict[str, Any] = {}
required: list[str] = []
for key, value in data.items():
prop_schema = self._value_to_schema(value, key)
properties[key] = prop_schema
required.append(key)
return {
"type": "object",
"properties": properties,
"required": required,
}
def _value_to_schema(self, value: Any, key: str) -> dict[str, Any]:
"""Convert a value to JSON schema.
Args:
value: Value to convert
key: Key name (for nested object naming)
Returns:
JSON Schema for the value
"""
if value is None:
return {"type": "string", "nullable": True}
elif isinstance(value, bool):
return {"type": "boolean"}
elif isinstance(value, int):
return {"type": "integer"}
elif isinstance(value, float):
return {"type": "number"}
elif isinstance(value, str):
if self._is_json(value):
nested = self._dict_to_schema(json.loads(value), f"{key}Schema")
return nested
return {"type": "string"}
elif isinstance(value, dict):
schema_name = f"{key}Schema"
nested = self._dict_to_schema(value, schema_name)
self._schemas[schema_name] = nested
return {"$ref": f"#/components/schemas/{schema_name}"}
elif isinstance(value, list):
if value:
item_schema = self._value_to_schema(value[0], f"{key}Item")
return {"type": "array", "items": item_schema}
return {"type": "array", "items": {"type": "string"}}
return {"type": "string"}
def _infer_type(self, value: str) -> str:
"""Infer JSON type from string value.
Args:
value: String value
Returns:
JSON type string
"""
if not value:
return "string"
try:
int(value)
return "integer"
except ValueError:
pass
try:
float(value)
return "number"
except ValueError:
pass
if value.lower() in ("true", "false"):
return "boolean"
return "string"
def _is_json(self, s: str) -> bool:
"""Check if string is JSON.
Args:
s: String to check
Returns:
True if string is JSON
"""
if not s or not s.strip():
return False
if s.strip().startswith(("{", "[")):
try:
json.loads(s)
return True
except (json.JSONDecodeError, TypeError):
pass
return False
def _infer_schemas(self) -> None:
"""Infer additional schemas from request/response bodies."""
for entry in self.entries:
if entry.request.body and self._is_json(entry.request.body):
try:
data = json.loads(entry.request.body)
if isinstance(data, dict):
schema_name = "requestBodySchema"
if schema_name not in self._schemas:
self._schemas[schema_name] = self._dict_to_schema(data, schema_name)
except (json.JSONDecodeError, TypeError):
pass
def to_json(self, spec: dict[str, Any] | None = None, indent: int = 2) -> str:
"""Convert spec to JSON string.
Args:
spec: Spec to convert, or use self.spec if None
indent: JSON indent level
Returns:
JSON string
"""
if spec is None:
spec = self.spec
return json.dumps(spec, indent=indent)
def save_spec(self, path: str, spec: dict[str, Any] | None = None) -> None:
"""Save spec to file.
Args:
path: File path to save to
spec: Spec to save, or use self.spec if None
"""
with open(path, "w") as f:
f.write(self.to_json(spec))

View File

@@ -0,0 +1,17 @@
"""Data models."""
from http_log_explorer.models.http_entry import (
DiffResult,
FilterCriteria,
HTTPEntry,
Request,
Response,
)
__all__ = [
"DiffResult",
"FilterCriteria",
"HTTPEntry",
"Request",
"Response",
]

View File

@@ -0,0 +1,142 @@
"""Data models for HTTP entries."""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
@dataclass
class Request:
"""Represents an HTTP request."""
method: str
url: str
http_version: str = "HTTP/1.1"
headers: dict[str, str] = field(default_factory=dict)
body: str | None = None
query_params: dict[str, str] = field(default_factory=dict)
def __post_init__(self) -> None:
if isinstance(self.headers, list):
self.headers = {h.get("name", ""): h.get("value", "") for h in self.headers}
@dataclass
class Response:
"""Represents an HTTP response."""
status: int
status_text: str
http_version: str = "HTTP/1.1"
headers: dict[str, str] = field(default_factory=dict)
body: str | None = None
content_type: str | None = None
response_time_ms: float | None = None
def __post_init__(self) -> None:
if isinstance(self.headers, list):
self.headers = {h.get("name", ""): h.get("value", "") for h in self.headers}
@dataclass
class HTTPEntry:
"""Represents a complete HTTP request/response pair."""
id: str
request: Request
response: Response
timestamp: datetime | None = None
server_ip: str | None = None
connection: str | None = None
raw_size: int | None = None
source_file: str | None = None
@property
def duration_ms(self) -> float | None:
"""Get response time in milliseconds."""
return self.response.response_time_ms
@property
def content_type(self) -> str | None:
"""Get content type from response headers."""
if self.response.content_type:
return self.response.content_type
for key, value in self.response.headers.items():
if key.lower() == "content-type":
return value
return None
@property
def endpoint(self) -> str:
"""Extract endpoint path from URL."""
from urllib.parse import urlparse
parsed = urlparse(self.request.url)
return parsed.path or "/"
@property
def host(self) -> str:
"""Extract host from URL."""
from urllib.parse import urlparse
parsed = urlparse(self.request.url)
return parsed.netloc
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary representation."""
return {
"id": self.id,
"request": {
"method": self.request.method,
"url": self.request.url,
"http_version": self.request.http_version,
"headers": self.request.headers,
"body": self.request.body,
"query_params": self.request.query_params,
},
"response": {
"status": self.response.status,
"status_text": self.response.status_text,
"http_version": self.response.http_version,
"headers": self.response.headers,
"body": self.response.body,
"content_type": self.response.content_type,
"response_time_ms": self.response.response_time_ms,
},
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
"server_ip": self.server_ip,
"connection": self.connection,
"raw_size": self.raw_size,
}
@dataclass
class FilterCriteria:
"""Criteria for filtering HTTP entries."""
methods: list[str] | None = None
status_codes: list[int] | None = None
url_pattern: str | None = None
content_types: list[str] | None = None
start_time: datetime | None = None
end_time: datetime | None = None
min_response_time_ms: float | None = None
max_response_time_ms: float | None = None
request_body_contains: str | None = None
response_body_contains: str | None = None
@dataclass
class DiffResult:
"""Represents the result of comparing two HTTP entries."""
entry1_id: str
entry2_id: str
request_headers_diff: list[str] = field(default_factory=list)
request_body_diff: list[str] = field(default_factory=list)
response_headers_diff: list[str] = field(default_factory=list)
response_body_diff: list[str] = field(default_factory=list)
status_changed: bool = False
status1: int = 0
status2: int = 0
url_changed: bool = False

View File

@@ -0,0 +1,76 @@
"""Parser interface for HTTP log formats."""
from abc import ABC, abstractmethod
from http_log_explorer.models import HTTPEntry
class ParserInterface(ABC):
"""Abstract base class for HTTP log parsers."""
@abstractmethod
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse content and return list of HTTP entries.
Args:
content: The content to parse (string or bytes)
source_file: Optional source file name for reference
Returns:
List of HTTPEntry objects
Raises:
ValueError: If content cannot be parsed
"""
pass
@abstractmethod
def can_parse(self, content: str | bytes) -> bool:
"""Check if this parser can handle the given content.
Args:
content: The content to check
Returns:
True if this parser can handle the content
"""
pass
@staticmethod
def get_parser_name() -> str:
"""Return the name of this parser."""
return "unknown"
def get_parser(content: str | bytes) -> ParserInterface:
"""Get the appropriate parser for the given content.
Args:
content: The content to parse
Returns:
An appropriate parser instance
Raises:
ValueError: If no suitable parser is found
"""
from http_log_explorer.parsers.curl_parser import CurlParser
from http_log_explorer.parsers.devtools_parser import DevToolsParser
from http_log_explorer.parsers.har_parser import HARParser
parsers: list[ParserInterface] = [
HARParser(),
CurlParser(),
DevToolsParser(),
]
for parser in parsers:
if parser.can_parse(content):
return parser
raise ValueError(
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
)
__all__ = ["ParserInterface", "get_parser"]

View File

@@ -0,0 +1,140 @@
"""Parser for curl -v output."""
import re
from datetime import datetime
from typing import Any
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class CurlParser(ParserInterface):
"""Parser for curl -v verbose output."""
REQUEST_LINE_RE = re.compile(r"^> (\w+) (\S+) (HTTP/[\d.]+)$", re.MULTILINE)
RESPONSE_LINE_RE = re.compile(r"^< (HTTP/[\d.]+) (\d+) (.+)$", re.MULTILINE)
HEADER_RE = re.compile(r"^(> |<) ([^:]+): (.+)$")
TIMING_RE = re.compile(r"^\* time_conditional check:.*$")
@staticmethod
def get_parser_name() -> str:
return "curl"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be curl -v output."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
has_request = bool(self.REQUEST_LINE_RE.search(content))
has_response = bool(self.RESPONSE_LINE_RE.search(content))
return has_request and has_response
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse curl -v output into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
entries: list[HTTPEntry] = []
blocks = self._split_blocks(content)
for idx, block in enumerate(blocks):
try:
entry = self._parse_block(block, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _split_blocks(self, content: str) -> list[dict[str, Any]]:
"""Split curl output into request/response blocks."""
blocks: list[dict[str, Any]] = []
current_block: dict[str, Any] = {}
lines = content.split("\n")
for line in lines:
request_match = self.REQUEST_LINE_RE.match(line)
if request_match:
if current_block.get("request"):
blocks.append(current_block)
current_block = {
"request": {
"method": request_match.group(1),
"url": request_match.group(2),
"http_version": request_match.group(3),
},
"headers": [],
"body": None,
"response": None,
}
continue
response_match = self.RESPONSE_LINE_RE.match(line)
if response_match:
if current_block.get("request"):
current_block["response"] = {
"http_version": response_match.group(1),
"status": int(response_match.group(2)),
"status_text": response_match.group(3),
}
continue
header_match = self.HEADER_RE.match(line)
if header_match:
direction = header_match.group(1)
name = header_match.group(2)
value = header_match.group(3)
if direction == ">" and "headers" in current_block:
current_block["headers"].append((name, value))
continue
if current_block and current_block.get("response") and line.strip():
if current_block["response"].get("body") is None:
current_block["response"]["body"] = ""
current_block["response"]["body"] += line + "\n"
if current_block.get("request"):
blocks.append(current_block)
return blocks
def _parse_block(
self, block: dict[str, Any], idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Parse a single request/response block."""
if not block.get("request") or not block.get("response"):
return None
req_data = block["request"]
resp_data = block["response"]
headers = dict(block.get("headers", []))
request = Request(
method=req_data.get("method", "GET"),
url=req_data.get("url", "/"),
http_version=req_data.get("http_version", "HTTP/1.1"),
headers=headers,
body=block.get("body"),
)
response_body = resp_data.get("body", "")
if response_body:
response_body = response_body.strip()
response = Response(
status=resp_data.get("status", 0),
status_text=resp_data.get("status_text", ""),
http_version=resp_data.get("http_version", "HTTP/1.1"),
headers={},
body=response_body if response_body else None,
content_type=headers.get("Content-Type") or headers.get("content-type"),
)
return HTTPEntry(
id=f"curl-{idx}",
request=request,
response=response,
timestamp=datetime.now(),
source_file=source_file,
)

View File

@@ -0,0 +1,133 @@
"""Parser for Chrome DevTools network export format."""
import json
from datetime import datetime
from typing import Any
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class DevToolsParser(ParserInterface):
"""Parser for Chrome DevTools network export JSON."""
@staticmethod
def get_parser_name() -> str:
return "DevTools"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be DevTools network export."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
try:
data = json.loads(content)
if isinstance(data, list):
return all(
"request" in item and "response" in item for item in data[:3] if isinstance(item, dict)
)
if isinstance(data, dict):
has_log = "log" in data
has_entries = "entries" in data.get("log", {})
has_creator = "creator" in data.get("log", {})
return has_log and has_entries and not has_creator
except json.JSONDecodeError:
return False
return False
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse DevTools network export into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
try:
data = json.loads(content)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format: {e}") from e
if isinstance(data, dict) and "log" in data:
entries_data = data.get("log", {}).get("entries", [])
elif isinstance(data, list):
entries_data = data
else:
raise ValueError("Unrecognized DevTools format")
entries: list[HTTPEntry] = []
for idx, entry_data in enumerate(entries_data):
try:
entry = self._convert_entry(entry_data, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _convert_entry(
self, entry_data: dict[str, Any], idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Convert a DevTools entry to our HTTPEntry model."""
request_data = entry_data.get("request", {})
response_data = entry_data.get("response", {})
if not request_data or not response_data:
return None
request = Request(
method=request_data.get("method", "GET"),
url=request_data.get("url", ""),
http_version=request_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(request_data.get("headers", {})),
body=request_data.get("postData", {}).get("text") if request_data.get("postData") else None,
query_params=self._parse_query_params(request_data.get("queryString", [])),
)
response = Response(
status=response_data.get("status", 0),
status_text=response_data.get("statusText", ""),
http_version=response_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(response_data.get("headers", {})),
body=response_data.get("content", {}).get("text") if isinstance(response_data.get("content"), dict) else None,
content_type=response_data.get("content", {}).get("mimeType") if isinstance(response_data.get("content"), dict) else None,
response_time_ms=self._parse_time(entry_data),
)
timestamp = self._parse_timestamp(entry_data)
return HTTPEntry(
id=f"devtools-{idx}",
request=request,
response=response,
timestamp=timestamp,
server_ip=entry_data.get("serverIPAddress"),
connection=entry_data.get("connection"),
source_file=source_file,
)
def _parse_headers(self, headers: dict[str, Any] | list) -> dict[str, str]:
"""Parse headers to dictionary."""
if isinstance(headers, dict):
return dict(headers)
if isinstance(headers, list):
return {h.get("name", ""): h.get("value", "") for h in headers}
return {}
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
"""Parse query string list to dictionary."""
if isinstance(query_string, list):
return {p.get("name", ""): p.get("value", "") for p in query_string}
return {}
def _parse_time(self, entry_data: dict[str, Any]) -> float | None:
"""Parse time from DevTools entry."""
if "time" in entry_data:
return float(entry_data["time"])
return None
def _parse_timestamp(self, entry_data: dict[str, Any]) -> datetime | None:
"""Parse timestamp from DevTools entry."""
if "startedDateTime" in entry_data:
try:
return datetime.fromisoformat(entry_data["startedDateTime"].replace("Z", "+00:00"))
except (ValueError, AttributeError):
pass
return None

View File

@@ -0,0 +1,47 @@
"""Parser factory for creating appropriate parsers."""
from __future__ import annotations
from typing import TYPE_CHECKING
from http_log_explorer.parsers.curl_parser import CurlParser
from http_log_explorer.parsers.devtools_parser import DevToolsParser
from http_log_explorer.parsers.har_parser import HARParser
if TYPE_CHECKING:
from http_log_explorer.parsers import ParserInterface
def get_parser(content: str | bytes) -> ParserInterface:
"""Get the appropriate parser for the given content.
Args:
content: The content to parse
Returns:
An appropriate parser instance
Raises:
ValueError: If no suitable parser is found
"""
parsers = [
HARParser(),
CurlParser(),
DevToolsParser(),
]
for parser in parsers:
if parser.can_parse(content):
return parser
raise ValueError(
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
)
def get_all_parsers() -> list[ParserInterface]:
"""Get all available parser instances."""
return [HARParser(), CurlParser(), DevToolsParser()]
__all__ = ["get_parser", "get_all_parsers"]

View File

@@ -0,0 +1,146 @@
"""HAR file parser using haralyzer."""
import json
from datetime import datetime
from typing import Any
from haralyzer import HarParser
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class HARParser(ParserInterface):
"""Parser for HAR (HTTP Archive) files."""
@staticmethod
def get_parser_name() -> str:
return "HAR"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be a HAR file."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
try:
data = json.loads(content)
has_log = "log" in data
has_entries = "entries" in data.get("log", {})
has_creator = "creator" in data.get("log", {})
return has_log and has_entries and has_creator
except (json.JSONDecodeError, AttributeError):
return False
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse HAR content into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
try:
data = json.loads(content)
har_parser = HarParser(data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid HAR format: {e}") from e
except Exception as e:
raise ValueError(f"Invalid HAR format: {e}") from e
entries: list[HTTPEntry] = []
har_entries = har_parser.har_data.get("entries", [])
for idx, har_entry in enumerate(har_entries):
try:
entry = self._convert_har_entry(har_entry, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _convert_har_entry(
self, har_entry: Any, idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Convert a haralyzer entry to our HTTPEntry model."""
request_data = har_entry.get("request")
response_data = har_entry.get("response")
if not request_data or not response_data:
return None
request = Request(
method=request_data.get("method", "GET"),
url=self._build_url(request_data),
http_version=request_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(request_data.get("headers", [])),
body=self._get_request_body(request_data),
query_params=self._parse_query_params(request_data.get("queryString", [])),
)
response = Response(
status=response_data.get("status", 0),
status_text=response_data.get("statusText", ""),
http_version=response_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(response_data.get("headers", [])),
body=self._get_response_body(response_data),
content_type=self._get_content_type(response_data.get("content", {})),
response_time_ms=har_entry.get("time", None),
)
timestamp = self._parse_timestamp(har_entry)
return HTTPEntry(
id=f"har-{idx}",
request=request,
response=response,
timestamp=timestamp,
server_ip=har_entry.get("serverIPAddress", None),
connection=har_entry.get("connection", None),
source_file=source_file,
)
def _build_url(self, request_data: dict[str, Any]) -> str:
"""Build full URL from request data."""
url = request_data.get("url", "")
if not url:
host = ""
for header in request_data.get("headers", []):
if header.get("name", "").lower() == "host":
host = header.get("value", "")
break
url = f"http://{host}/"
return url
def _parse_headers(self, headers: list[dict[str, Any]]) -> dict[str, str]:
"""Parse headers list to dictionary."""
return {h.get("name", ""): h.get("value", "") for h in headers}
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
"""Parse query string list to dictionary."""
return {p.get("name", ""): p.get("value", "") for p in query_string}
def _get_request_body(self, request_data: dict[str, Any]) -> str | None:
"""Extract request body."""
post_data = request_data.get("postData", {})
if post_data:
if isinstance(post_data, dict):
return post_data.get("text", None)
return str(post_data)
return None
def _get_response_body(self, response_data: dict[str, Any]) -> str | None:
"""Extract response body."""
content = response_data.get("content", {})
if isinstance(content, dict):
return content.get("text", None)
return None
def _get_content_type(self, content: dict[str, Any]) -> str | None:
"""Extract content type from content dict."""
if isinstance(content, dict):
return content.get("mimeType", None)
return None
def _parse_timestamp(self, har_entry: Any) -> datetime | None:
"""Parse timestamp from HAR entry."""
started_datetime = getattr(har_entry, "started_datetime", None)
if started_datetime:
return started_datetime
return None