Re-upload: CI infrastructure issue resolved, all tests verified passing
This commit is contained in:
160
http_log_explorer/README.md
Normal file
160
http_log_explorer/README.md
Normal file
@@ -0,0 +1,160 @@
|
||||
# HTTP Log Explorer
|
||||
|
||||
A powerful CLI tool for parsing, exploring, and analyzing HTTP traffic logs from HAR files, curl -v output, and Chrome DevTools network exports.
|
||||
|
||||
## Features
|
||||
|
||||
- **Multi-format parsing**: HAR files, curl -v verbose output, and Chrome DevTools network exports
|
||||
- **Interactive CLI**: Rich terminal UI with beautifully formatted tables
|
||||
- **Advanced filtering**: Filter by method, status code, URL pattern, content type
|
||||
- **Request/Response diffing**: Side-by-side comparison of HTTP pairs
|
||||
- **API analytics**: Endpoint frequency, method distribution, status code breakdown, response time statistics
|
||||
- **OpenAPI generation**: Automatically generate OpenAPI 3.0 specs from observed traffic
|
||||
- **Export capabilities**: JSON, cURL commands, Python/JavaScript/Go code snippets
|
||||
|
||||
## Installation
|
||||
|
||||
### From Source
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Dependencies
|
||||
|
||||
```
|
||||
click==8.1.7
|
||||
rich==13.7.0
|
||||
haralyzer==2.0.0
|
||||
pytest==8.0.0
|
||||
openapi-spec-validator==0.7.1
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Load a HAR file and show statistics
|
||||
http-log-explorer load access.har --stats
|
||||
|
||||
# List entries with filters
|
||||
http-log-explorer list-entries --method GET --status 200
|
||||
|
||||
# Search across URLs and bodies
|
||||
http-log-explorer search "api/users"
|
||||
|
||||
# Compare two requests
|
||||
http-log-explorer diff entry-1 entry-2
|
||||
|
||||
# Export to OpenAPI spec
|
||||
http-log-explorer export-openapi api-spec.json --title "My API"
|
||||
|
||||
# Export as cURL commands
|
||||
http-log-explorer export-curl commands.sh
|
||||
|
||||
# Export as Python code
|
||||
http-log-explorer export-code client.py --language python
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `load FILE` | Load and parse an HTTP log file |
|
||||
| `list-entries` | List entries with optional filtering |
|
||||
| `search QUERY` | Search across URLs and bodies |
|
||||
| `diff ID1 ID2` | Compare two entries by ID |
|
||||
| `stats` | Show traffic statistics |
|
||||
| `filter-entries` | Filter entries and show results |
|
||||
| `export-json FILE` | Export entries to JSON |
|
||||
| `export-curl FILE` | Export as cURL commands |
|
||||
| `export-code FILE` | Export as code snippets |
|
||||
| `export-openapi FILE` | Generate OpenAPI spec |
|
||||
|
||||
## Filtering Options
|
||||
|
||||
```bash
|
||||
# Filter by HTTP method
|
||||
http-log-explorer list-entries --method GET --method POST
|
||||
|
||||
# Filter by status code
|
||||
http-log-explorer list-entries --status 200 --status 404
|
||||
|
||||
# Filter by URL pattern (regex)
|
||||
http-log-explorer list-entries --url "/api/users"
|
||||
|
||||
# Filter by content type
|
||||
http-log-explorer list-entries --content-type application/json
|
||||
```
|
||||
|
||||
## Supported Formats
|
||||
|
||||
### HAR Files (HTTP Archive)
|
||||
|
||||
Export from browser DevTools or capture with tools like Wireshark.
|
||||
|
||||
### curl -v Output
|
||||
|
||||
Paste output from `curl -v` or `curl --verbose`.
|
||||
|
||||
### Chrome DevTools Network Export
|
||||
|
||||
Export network requests from Chrome DevTools.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- `HTTP_LOG_DEBUG=true` - Enable verbose output
|
||||
|
||||
## Examples
|
||||
|
||||
### Analyzing API Traffic
|
||||
|
||||
```bash
|
||||
# Load traffic data
|
||||
http-log-explorer load api_traffic.har
|
||||
|
||||
# See overall statistics
|
||||
http-log-explorer stats
|
||||
|
||||
# Find all 4xx errors
|
||||
http-log-explorer list-entries --status 404 --status 400
|
||||
|
||||
# Search for specific endpoints
|
||||
http-log-explorer search "/users"
|
||||
```
|
||||
|
||||
### Generating API Documentation
|
||||
|
||||
```bash
|
||||
# Load traffic and export OpenAPI spec
|
||||
http-log-explorer load api.har
|
||||
http-log-explorer export-openapi openapi.json --title "User API" --version "2.0"
|
||||
```
|
||||
|
||||
### Exporting to Code
|
||||
|
||||
```bash
|
||||
# Export as Python requests
|
||||
http-log-explorer load api.har
|
||||
http-log-explorer export-code client.py --language python
|
||||
|
||||
# Export as JavaScript/Node.js
|
||||
http-log-explorer export-code client.js --language javascript
|
||||
|
||||
# Export as Go
|
||||
http-log-explorer export-code client.go --language go
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see LICENSE file for details
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions welcome! Please ensure tests pass before submitting PRs.
|
||||
|
||||
```bash
|
||||
pytest tests/ -v
|
||||
ruff check http_log_explorer/
|
||||
```
|
||||
3
http_log_explorer/__init__.py
Normal file
3
http_log_explorer/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""HTTP Log Explorer - A CLI tool for parsing and analyzing HTTP traffic logs."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
7
http_log_explorer/analyzers/__init__.py
Normal file
7
http_log_explorer/analyzers/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Analyzers for HTTP traffic."""
|
||||
|
||||
from http_log_explorer.analyzers.diff_engine import DiffEngine
|
||||
from http_log_explorer.analyzers.stats_generator import StatsGenerator
|
||||
from http_log_explorer.analyzers.traffic_analyzer import TrafficAnalyzer
|
||||
|
||||
__all__ = ["DiffEngine", "StatsGenerator", "TrafficAnalyzer"]
|
||||
185
http_log_explorer/analyzers/diff_engine.py
Normal file
185
http_log_explorer/analyzers/diff_engine.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""Diff engine for comparing HTTP entries."""
|
||||
|
||||
import difflib
|
||||
|
||||
from http_log_explorer.models import DiffResult, HTTPEntry
|
||||
|
||||
|
||||
class DiffEngine:
|
||||
"""Engine for comparing HTTP request/response pairs."""
|
||||
|
||||
def diff(self, entry1: HTTPEntry, entry2: HTTPEntry) -> DiffResult:
|
||||
"""Compare two HTTP entries.
|
||||
|
||||
Args:
|
||||
entry1: First HTTPEntry
|
||||
entry2: Second HTTPEntry
|
||||
|
||||
Returns:
|
||||
DiffResult with differences
|
||||
"""
|
||||
result = DiffResult(
|
||||
entry1_id=entry1.id,
|
||||
entry2_id=entry2.id,
|
||||
)
|
||||
|
||||
result.url_changed = entry1.request.url != entry2.request.url
|
||||
|
||||
result.status_changed = entry1.response.status != entry2.response.status
|
||||
result.status1 = entry1.response.status
|
||||
result.status2 = entry2.response.status
|
||||
|
||||
result.request_headers_diff = self.headers_diff(
|
||||
entry1.request.headers,
|
||||
entry2.request.headers,
|
||||
)
|
||||
|
||||
result.response_headers_diff = self.headers_diff(
|
||||
entry1.response.headers,
|
||||
entry2.response.headers,
|
||||
)
|
||||
|
||||
result.request_body_diff = self.body_diff(
|
||||
entry1.request.body,
|
||||
entry2.request.body,
|
||||
)
|
||||
|
||||
result.response_body_diff = self.body_diff(
|
||||
entry1.response.body,
|
||||
entry2.response.body,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def headers_diff(
|
||||
self, headers1: dict[str, str], headers2: dict[str, str]
|
||||
) -> list[str]:
|
||||
"""Compare two header dictionaries.
|
||||
|
||||
Args:
|
||||
headers1: First headers dict
|
||||
headers2: Second headers dict
|
||||
|
||||
Returns:
|
||||
List of diff lines
|
||||
"""
|
||||
all_keys = set(headers1.keys()) | set(headers2.keys())
|
||||
diff_lines: list[str] = []
|
||||
|
||||
for key in sorted(all_keys):
|
||||
val1 = headers1.get(key)
|
||||
val2 = headers2.get(key)
|
||||
|
||||
if val1 != val2:
|
||||
if val1 is None:
|
||||
diff_lines.append(f"+ {key}: {val2}")
|
||||
elif val2 is None:
|
||||
diff_lines.append(f"- {key}: {val1}")
|
||||
else:
|
||||
diff_lines.append(f"- {key}: {val1}")
|
||||
diff_lines.append(f"+ {key}: {val2}")
|
||||
|
||||
return diff_lines
|
||||
|
||||
def body_diff(
|
||||
self, body1: str | None, body2: str | None
|
||||
) -> list[str]:
|
||||
"""Compare two body strings.
|
||||
|
||||
Args:
|
||||
body1: First body
|
||||
body2: Second body
|
||||
|
||||
Returns:
|
||||
List of diff lines (unified format)
|
||||
"""
|
||||
if body1 == body2:
|
||||
return []
|
||||
|
||||
b1 = body1 or ""
|
||||
b2 = body2 or ""
|
||||
|
||||
lines1 = b1.splitlines(keepends=True)
|
||||
lines2 = b2.splitlines(keepends=True)
|
||||
|
||||
if not lines1 and not lines2:
|
||||
return []
|
||||
|
||||
diff = list(difflib.unified_diff(
|
||||
lines1,
|
||||
lines2,
|
||||
fromfile="before",
|
||||
tofile="after",
|
||||
lineterm="",
|
||||
))
|
||||
|
||||
return diff
|
||||
|
||||
def unified_diff_output(self, diff_result: DiffResult) -> str:
|
||||
"""Generate a human-readable unified diff output.
|
||||
|
||||
Args:
|
||||
diff_result: The diff result
|
||||
|
||||
Returns:
|
||||
Formatted string with all differences
|
||||
"""
|
||||
lines: list[str] = []
|
||||
lines.append(f"=== Diff: {diff_result.entry1_id} vs {diff_result.entry2_id} ===")
|
||||
lines.append("")
|
||||
|
||||
if diff_result.url_changed:
|
||||
lines.append(f"URL changed: {diff_result.url_changed}")
|
||||
|
||||
if diff_result.status_changed:
|
||||
lines.append(f"Status: {diff_result.status1} -> {diff_result.status2}")
|
||||
|
||||
if diff_result.request_headers_diff:
|
||||
lines.append("")
|
||||
lines.append("--- Request Headers ---")
|
||||
lines.extend(diff_result.request_headers_diff)
|
||||
|
||||
if diff_result.request_body_diff:
|
||||
lines.append("")
|
||||
lines.append("--- Request Body ---")
|
||||
lines.extend(diff_result.request_body_diff)
|
||||
|
||||
if diff_result.response_headers_diff:
|
||||
lines.append("")
|
||||
lines.append("--- Response Headers ---")
|
||||
lines.extend(diff_result.response_headers_diff)
|
||||
|
||||
if diff_result.response_body_diff:
|
||||
lines.append("")
|
||||
lines.append("--- Response Body ---")
|
||||
lines.extend(diff_result.response_body_diff)
|
||||
|
||||
if not any([
|
||||
diff_result.url_changed,
|
||||
diff_result.status_changed,
|
||||
diff_result.request_headers_diff,
|
||||
diff_result.request_body_diff,
|
||||
diff_result.response_headers_diff,
|
||||
diff_result.response_body_diff,
|
||||
]):
|
||||
lines.append("No differences found.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def has_differences(self, diff_result: DiffResult) -> bool:
|
||||
"""Check if there are any differences.
|
||||
|
||||
Args:
|
||||
diff_result: The diff result
|
||||
|
||||
Returns:
|
||||
True if there are any differences
|
||||
"""
|
||||
return bool(
|
||||
diff_result.url_changed
|
||||
or diff_result.status_changed
|
||||
or diff_result.request_headers_diff
|
||||
or diff_result.request_body_diff
|
||||
or diff_result.response_headers_diff
|
||||
or diff_result.response_body_diff
|
||||
)
|
||||
277
http_log_explorer/analyzers/stats_generator.py
Normal file
277
http_log_explorer/analyzers/stats_generator.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""Statistics generator for HTTP traffic analytics."""
|
||||
|
||||
import re
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from rich.table import Table
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrafficStats:
|
||||
"""Container for traffic statistics."""
|
||||
|
||||
total_requests: int
|
||||
endpoint_count: dict[str, int]
|
||||
method_distribution: dict[str, int]
|
||||
status_breakdown: dict[int, int]
|
||||
content_type_distribution: dict[str, int]
|
||||
response_time_stats: dict[str, float]
|
||||
hosts: dict[str, int]
|
||||
|
||||
|
||||
class StatsGenerator:
|
||||
"""Generate statistics from HTTP entries."""
|
||||
|
||||
def __init__(self, entries: list[HTTPEntry]) -> None:
|
||||
"""Initialize with HTTP entries.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
"""
|
||||
self.entries = entries
|
||||
|
||||
def generate(self) -> TrafficStats:
|
||||
"""Generate all statistics.
|
||||
|
||||
Returns:
|
||||
TrafficStats object with all computed statistics
|
||||
"""
|
||||
return TrafficStats(
|
||||
total_requests=len(self.entries),
|
||||
endpoint_count=self.endpoint_count(),
|
||||
method_distribution=self.method_distribution(),
|
||||
status_breakdown=self.status_breakdown(),
|
||||
content_type_distribution=self.content_type_distribution(),
|
||||
response_time_stats=self.response_time_stats(),
|
||||
hosts=self.hosts(),
|
||||
)
|
||||
|
||||
def endpoint_count(self) -> dict[str, int]:
|
||||
"""Count requests per endpoint pattern.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping endpoint patterns to counts
|
||||
"""
|
||||
counter: Counter[str] = Counter()
|
||||
for entry in self.entries:
|
||||
endpoint = self._normalize_endpoint(entry.endpoint)
|
||||
counter[endpoint] += 1
|
||||
return dict(counter.most_common())
|
||||
|
||||
def method_distribution(self) -> dict[str, int]:
|
||||
"""Get distribution of HTTP methods.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping methods to counts
|
||||
"""
|
||||
counter = Counter(e.request.method for e in self.entries)
|
||||
return dict(counter)
|
||||
|
||||
def status_breakdown(self) -> dict[int, int]:
|
||||
"""Get breakdown of status codes.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping status codes to counts
|
||||
"""
|
||||
counter = Counter(e.response.status for e in self.entries)
|
||||
return dict(sorted(counter.items()))
|
||||
|
||||
def content_type_distribution(self) -> dict[str, int]:
|
||||
"""Get distribution of content types.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping content types to counts
|
||||
"""
|
||||
counter: Counter[str] = Counter()
|
||||
for entry in self.entries:
|
||||
ct = entry.content_type or "unknown"
|
||||
main_type = ct.split(";")[0].strip()
|
||||
counter[main_type] += 1
|
||||
return dict(counter.most_common())
|
||||
|
||||
def response_time_stats(self) -> dict[str, float]:
|
||||
"""Calculate response time statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with min, max, avg, median response times in ms
|
||||
"""
|
||||
times = [e.duration_ms for e in self.entries if e.duration_ms is not None]
|
||||
if not times:
|
||||
return {"min": 0.0, "max": 0.0, "avg": 0.0, "median": 0.0, "p95": 0.0, "p99": 0.0}
|
||||
|
||||
sorted_times = sorted(times)
|
||||
n = len(sorted_times)
|
||||
|
||||
stats = {
|
||||
"min": float(sorted_times[0]),
|
||||
"max": float(sorted_times[-1]),
|
||||
"avg": float(sum(times) / n),
|
||||
"median": float(sorted_times[n // 2]),
|
||||
}
|
||||
|
||||
p95_idx = int(n * 0.95)
|
||||
p99_idx = int(n * 0.99)
|
||||
stats["p95"] = float(sorted_times[min(p95_idx, n - 1)])
|
||||
stats["p99"] = float(sorted_times[min(p99_idx, n - 1)])
|
||||
|
||||
return stats
|
||||
|
||||
def hosts(self) -> dict[str, int]:
|
||||
"""Get request count per host.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping hosts to counts
|
||||
"""
|
||||
counter = Counter(e.host for e in self.entries)
|
||||
return dict(counter.most_common())
|
||||
|
||||
def status_code_categories(self) -> dict[str, int]:
|
||||
"""Get counts by status code category.
|
||||
|
||||
Returns:
|
||||
Dictionary with 1xx, 2xx, 3xx, 4xx, 5xx counts
|
||||
"""
|
||||
categories: dict[str, int] = {
|
||||
"1xx informational": 0,
|
||||
"2xx success": 0,
|
||||
"3xx redirection": 0,
|
||||
"4xx client error": 0,
|
||||
"5xx server error": 0,
|
||||
}
|
||||
|
||||
for entry in self.entries:
|
||||
status = entry.response.status
|
||||
if 100 <= status < 200:
|
||||
categories["1xx informational"] += 1
|
||||
elif 200 <= status < 300:
|
||||
categories["2xx success"] += 1
|
||||
elif 300 <= status < 400:
|
||||
categories["3xx redirection"] += 1
|
||||
elif 400 <= status < 500:
|
||||
categories["4xx client error"] += 1
|
||||
elif 500 <= status < 600:
|
||||
categories["5xx server error"] += 1
|
||||
|
||||
return categories
|
||||
|
||||
def endpoint_patterns(self) -> dict[str, int]:
|
||||
"""Extract common endpoint patterns with path parameters.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping patterns to counts
|
||||
"""
|
||||
patterns: dict[str, int] = defaultdict(int)
|
||||
|
||||
for entry in self.entries:
|
||||
pattern = self._extract_pattern(entry.endpoint)
|
||||
patterns[pattern] += 1
|
||||
|
||||
return dict(sorted(patterns.items(), key=lambda x: x[1], reverse=True))
|
||||
|
||||
def _normalize_endpoint(self, endpoint: str) -> str:
|
||||
"""Normalize endpoint by removing IDs and versions."""
|
||||
cleaned = re.sub(r"/\d+", "/{id}", endpoint)
|
||||
cleaned = re.sub(r"/[a-f0-9-]{36}", "/{uuid}", cleaned)
|
||||
cleaned = re.sub(r"/v\d+(?:\.\d+)?", "", cleaned)
|
||||
return cleaned
|
||||
|
||||
def _extract_pattern(self, endpoint: str) -> str:
|
||||
"""Extract endpoint pattern with parameter placeholders."""
|
||||
parts = endpoint.split("/")
|
||||
normalized_parts = []
|
||||
|
||||
for part in parts:
|
||||
if not part:
|
||||
normalized_parts.append("")
|
||||
elif part.isdigit():
|
||||
normalized_parts.append("{id}")
|
||||
elif self._is_uuid(part):
|
||||
normalized_parts.append("{uuid}")
|
||||
elif self._is_hash(part):
|
||||
normalized_parts.append("{hash}")
|
||||
else:
|
||||
normalized_parts.append(part)
|
||||
|
||||
return "/".join(normalized_parts)
|
||||
|
||||
def _is_uuid(self, s: str) -> bool:
|
||||
"""Check if string looks like a UUID."""
|
||||
uuid_pattern = re.compile(
|
||||
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
return bool(uuid_pattern.match(s))
|
||||
|
||||
def _is_hash(self, s: str) -> bool:
|
||||
"""Check if string looks like a hash."""
|
||||
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
|
||||
return bool(hash_pattern.match(s))
|
||||
|
||||
def render_table(self, stats: TrafficStats | None = None) -> Table:
|
||||
"""Render statistics as a Rich table.
|
||||
|
||||
Args:
|
||||
stats: Pre-generated stats, or None to generate new
|
||||
|
||||
Returns:
|
||||
Rich Table object
|
||||
"""
|
||||
if stats is None:
|
||||
stats = self.generate()
|
||||
|
||||
table = Table(title="Traffic Statistics")
|
||||
|
||||
table.add_column("Metric", style="cyan")
|
||||
table.add_column("Value", style="green")
|
||||
|
||||
table.add_row("Total Requests", str(stats.total_requests))
|
||||
|
||||
method_rows = [f"{m}: {c}" for m, c in sorted(stats.method_distribution.items())]
|
||||
table.add_row("Methods", ", ".join(method_rows) if method_rows else "N/A")
|
||||
|
||||
status_rows = [f"{s}: {c}" for s, c in sorted(stats.status_breakdown.items())]
|
||||
table.add_row("Status Codes", ", ".join(status_rows) if status_rows else "N/A")
|
||||
|
||||
rt = stats.response_time_stats
|
||||
if rt["avg"] > 0:
|
||||
table.add_row(
|
||||
"Response Time (avg)",
|
||||
f"{rt['avg']:.2f}ms",
|
||||
)
|
||||
table.add_row(
|
||||
"Response Time (p95)",
|
||||
f"{rt['p95']:.2f}ms",
|
||||
)
|
||||
|
||||
top_endpoints = list(stats.endpoint_count.items())[:5]
|
||||
endpoint_rows = [f"{e}: {c}" for e, c in top_endpoints]
|
||||
table.add_row("Top Endpoints", ", ".join(endpoint_rows) if endpoint_rows else "N/A")
|
||||
|
||||
return table
|
||||
|
||||
def to_dict(self, stats: TrafficStats | None = None) -> dict[str, Any]:
|
||||
"""Convert stats to dictionary.
|
||||
|
||||
Args:
|
||||
stats: Pre-generated stats, or None to generate new
|
||||
|
||||
Returns:
|
||||
Dictionary representation of stats
|
||||
"""
|
||||
if stats is None:
|
||||
stats = self.generate()
|
||||
|
||||
return {
|
||||
"total_requests": stats.total_requests,
|
||||
"endpoint_count": stats.endpoint_count,
|
||||
"method_distribution": stats.method_distribution,
|
||||
"status_breakdown": stats.status_breakdown,
|
||||
"content_type_distribution": stats.content_type_distribution,
|
||||
"response_time_stats": stats.response_time_stats,
|
||||
"hosts": stats.hosts,
|
||||
"status_code_categories": self.status_code_categories(),
|
||||
}
|
||||
196
http_log_explorer/analyzers/traffic_analyzer.py
Normal file
196
http_log_explorer/analyzers/traffic_analyzer.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Traffic analyzer for filtering HTTP entries."""
|
||||
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
|
||||
from http_log_explorer.models import FilterCriteria, HTTPEntry
|
||||
|
||||
|
||||
class TrafficAnalyzer:
|
||||
"""Analyzer for filtering and searching HTTP entries."""
|
||||
|
||||
def __init__(self, entries: list[HTTPEntry]) -> None:
|
||||
"""Initialize with HTTP entries.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects to analyze
|
||||
"""
|
||||
self.entries = entries
|
||||
|
||||
def filter(self, criteria: FilterCriteria) -> list[HTTPEntry]:
|
||||
"""Filter entries based on criteria.
|
||||
|
||||
Args:
|
||||
criteria: FilterCriteria object with filtering rules
|
||||
|
||||
Returns:
|
||||
Filtered list of HTTPEntry objects
|
||||
"""
|
||||
predicates: list[Callable[[HTTPEntry], bool]] = []
|
||||
|
||||
if criteria.methods:
|
||||
predicates.append(lambda e: e.request.method in criteria.methods)
|
||||
|
||||
if criteria.status_codes:
|
||||
predicates.append(lambda e: e.response.status in criteria.status_codes)
|
||||
|
||||
if criteria.url_pattern:
|
||||
pattern = re.compile(criteria.url_pattern)
|
||||
predicates.append(lambda e: bool(pattern.search(e.request.url)))
|
||||
|
||||
if criteria.content_types:
|
||||
predicates.append(lambda e: bool(e.content_type and any(ct in e.content_type for ct in criteria.content_types)))
|
||||
|
||||
if criteria.start_time:
|
||||
predicates.append(lambda e: bool(e.timestamp and e.timestamp >= criteria.start_time))
|
||||
|
||||
if criteria.end_time:
|
||||
predicates.append(lambda e: bool(e.timestamp and e.timestamp <= criteria.end_time))
|
||||
|
||||
if criteria.min_response_time_ms is not None:
|
||||
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms >= criteria.min_response_time_ms))
|
||||
|
||||
if criteria.max_response_time_ms is not None:
|
||||
predicates.append(lambda e: bool(e.duration_ms and e.duration_ms <= criteria.max_response_time_ms))
|
||||
|
||||
if criteria.request_body_contains:
|
||||
predicates.append(
|
||||
lambda e: bool(e.request.body and criteria.request_body_contains in e.request.body)
|
||||
)
|
||||
|
||||
if criteria.response_body_contains:
|
||||
predicates.append(
|
||||
lambda e: bool(e.response.body and criteria.response_body_contains in e.response.body)
|
||||
)
|
||||
|
||||
if not predicates:
|
||||
return list(self.entries)
|
||||
|
||||
return [entry for entry in self.entries if all(pred(entry) for pred in predicates)]
|
||||
|
||||
def by_method(self, methods: list[str]) -> list[HTTPEntry]:
|
||||
"""Filter by HTTP methods.
|
||||
|
||||
Args:
|
||||
methods: List of methods (GET, POST, PUT, DELETE, etc.)
|
||||
|
||||
Returns:
|
||||
Filtered entries
|
||||
"""
|
||||
criteria = FilterCriteria(methods=methods)
|
||||
return self.filter(criteria)
|
||||
|
||||
def by_status(self, status_codes: list[int]) -> list[HTTPEntry]:
|
||||
"""Filter by status codes.
|
||||
|
||||
Args:
|
||||
status_codes: List of status codes to include
|
||||
|
||||
Returns:
|
||||
Filtered entries
|
||||
"""
|
||||
criteria = FilterCriteria(status_codes=status_codes)
|
||||
return self.filter(criteria)
|
||||
|
||||
def by_url(self, url_pattern: str) -> list[HTTPEntry]:
|
||||
"""Filter by URL pattern.
|
||||
|
||||
Args:
|
||||
url_pattern: Regular expression pattern to match URLs
|
||||
|
||||
Returns:
|
||||
Filtered entries
|
||||
"""
|
||||
criteria = FilterCriteria(url_pattern=url_pattern)
|
||||
return self.filter(criteria)
|
||||
|
||||
def by_content_type(self, content_types: list[str]) -> list[HTTPEntry]:
|
||||
"""Filter by content types.
|
||||
|
||||
Args:
|
||||
content_types: List of content type substrings to match
|
||||
|
||||
Returns:
|
||||
Filtered entries
|
||||
"""
|
||||
criteria = FilterCriteria(content_types=content_types)
|
||||
return self.filter(criteria)
|
||||
|
||||
def by_status_range(self, min_status: int, max_status: int) -> list[HTTPEntry]:
|
||||
"""Filter by status code range.
|
||||
|
||||
Args:
|
||||
min_status: Minimum status code (inclusive)
|
||||
max_status: Maximum status code (inclusive)
|
||||
|
||||
Returns:
|
||||
Filtered entries
|
||||
"""
|
||||
all_in_range = list(range(min_status, max_status + 1))
|
||||
return self.by_status(all_in_range)
|
||||
|
||||
def successful_requests(self) -> list[HTTPEntry]:
|
||||
"""Get all 2xx responses.
|
||||
|
||||
Returns:
|
||||
Entries with 2xx status codes
|
||||
"""
|
||||
return self.by_status_range(200, 299)
|
||||
|
||||
def client_errors(self) -> list[HTTPEntry]:
|
||||
"""Get all 4xx responses.
|
||||
|
||||
Returns:
|
||||
Entries with 4xx status codes
|
||||
"""
|
||||
return self.by_status_range(400, 499)
|
||||
|
||||
def server_errors(self) -> list[HTTPEntry]:
|
||||
"""Get all 5xx responses.
|
||||
|
||||
Returns:
|
||||
Entries with 5xx status codes
|
||||
"""
|
||||
return self.by_status_range(500, 599)
|
||||
|
||||
def search(self, query: str, case_sensitive: bool = False) -> list[HTTPEntry]:
|
||||
"""Search across URL, request body, and response body.
|
||||
|
||||
Args:
|
||||
query: Search string
|
||||
case_sensitive: Whether search should be case sensitive
|
||||
|
||||
Returns:
|
||||
Entries matching the query
|
||||
"""
|
||||
search_query = query if case_sensitive else query.lower()
|
||||
|
||||
def matches(entry: HTTPEntry) -> bool:
|
||||
url = entry.request.url if case_sensitive else entry.request.url.lower()
|
||||
if search_query in url:
|
||||
return True
|
||||
if entry.request.body:
|
||||
body = entry.request.body if case_sensitive else entry.request.body.lower()
|
||||
if search_query in body:
|
||||
return True
|
||||
if entry.response.body:
|
||||
body = entry.response.body if case_sensitive else entry.response.body.lower()
|
||||
if search_query in body:
|
||||
return True
|
||||
return False
|
||||
|
||||
return [e for e in self.entries if matches(e)]
|
||||
|
||||
def get_entry_by_id(self, entry_id: str) -> HTTPEntry | None:
|
||||
"""Get a specific entry by its ID.
|
||||
|
||||
Args:
|
||||
entry_id: The entry ID to find
|
||||
|
||||
Returns:
|
||||
The HTTPEntry or None if not found
|
||||
"""
|
||||
for entry in self.entries:
|
||||
if entry.id == entry_id:
|
||||
return entry
|
||||
return None
|
||||
3
http_log_explorer/cli/__init__.py
Normal file
3
http_log_explorer/cli/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""CLI interface for HTTP Log Explorer."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
339
http_log_explorer/cli/commands.py
Normal file
339
http_log_explorer/cli/commands.py
Normal file
@@ -0,0 +1,339 @@
|
||||
"""CLI commands for HTTP Log Explorer."""
|
||||
|
||||
import sys
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
|
||||
from http_log_explorer.analyzers import DiffEngine, StatsGenerator, TrafficAnalyzer
|
||||
from http_log_explorer.cli.formatter import Formatter
|
||||
from http_log_explorer.exporters import CodeExporter, CurlExporter, JSONExporter
|
||||
from http_log_explorer.generators import OpenAPIGenerator
|
||||
from http_log_explorer.models import FilterCriteria, HTTPEntry
|
||||
from http_log_explorer.parsers import get_parser
|
||||
|
||||
console = Console()
|
||||
formatter = Formatter()
|
||||
|
||||
_entries_store: list[HTTPEntry] = []
|
||||
|
||||
|
||||
def reset_entries() -> None:
|
||||
"""Reset the global entries store. Used for testing."""
|
||||
global _entries_store
|
||||
_entries_store = []
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="0.1.0")
|
||||
def cli() -> None:
|
||||
"""HTTP Log Explorer - Parse, analyze, and explore HTTP traffic logs."""
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("file", type=click.Path(exists=True))
|
||||
@click.option("--stats", is_flag=True, help="Show statistics after loading")
|
||||
def load(file: str, stats: bool) -> None:
|
||||
"""Load and parse an HTTP log file.
|
||||
|
||||
Supports HAR files, curl -v output, and Chrome DevTools exports.
|
||||
"""
|
||||
global _entries_store
|
||||
|
||||
try:
|
||||
with open(file, encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error reading file: {e}[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
if not content.strip():
|
||||
console.print("[red]Error: File is empty[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
parser = get_parser(content)
|
||||
console.print(f"[green]Using parser: {parser.get_parser_name()}[/green]")
|
||||
entries = parser.parse(content, source_file=file)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]Parse error: {e}[/red]")
|
||||
console.print("[yellow]Supported formats:[/yellow]")
|
||||
console.print(" - HAR files (HTTP Archive format)")
|
||||
console.print(" - curl -v output")
|
||||
console.print(" - Chrome DevTools network exports")
|
||||
sys.exit(1)
|
||||
|
||||
_entries_store = entries
|
||||
console.print(f"[green]Loaded {len(entries)} entries[/green]")
|
||||
|
||||
if stats and entries:
|
||||
_show_stats(entries)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--limit", type=int, default=50, help="Limit number of entries shown")
|
||||
@click.option("--method", multiple=True, help="Filter by method (e.g., GET, POST)")
|
||||
@click.option("--status", multiple=True, type=int, help="Filter by status code")
|
||||
@click.option("--url", help="Filter by URL pattern (regex)")
|
||||
@click.option("--content-type", multiple=True, help="Filter by content type")
|
||||
def list_entries(
|
||||
limit: int,
|
||||
method: tuple[str, ...],
|
||||
status: tuple[int, ...],
|
||||
url: str | None,
|
||||
content_type: tuple[str, ...],
|
||||
) -> None:
|
||||
"""List loaded HTTP entries with optional filtering."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
entries = list(_entries_store)
|
||||
|
||||
criteria = FilterCriteria(
|
||||
methods=list(method) if method else None,
|
||||
status_codes=list(status) if status else None,
|
||||
url_pattern=url,
|
||||
content_types=list(content_type) if content_type else None,
|
||||
)
|
||||
|
||||
analyzer = TrafficAnalyzer(entries)
|
||||
filtered = analyzer.filter(criteria)
|
||||
|
||||
table = formatter.format_entry_table(filtered, limit=limit)
|
||||
console.print(table)
|
||||
console.print(f"\n[dim]Showing {min(limit, len(filtered))} of {len(filtered)} entries[/dim]")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("query")
|
||||
@click.option("--case-sensitive", is_flag=True, help="Case sensitive search")
|
||||
def search(query: str, case_sensitive: bool) -> None:
|
||||
"""Search across URLs and bodies."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
analyzer = TrafficAnalyzer(_entries_store)
|
||||
results = analyzer.search(query, case_sensitive=case_sensitive)
|
||||
|
||||
table = formatter.format_entry_table(results, limit=50)
|
||||
console.print(table)
|
||||
console.print(f"\n[dim]Found {len(results)} matching entries[/dim]")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("entry_id1")
|
||||
@click.argument("entry_id2")
|
||||
def diff(entry_id1: str, entry_id2: str) -> None:
|
||||
"""Compare two HTTP entries by ID."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
analyzer = TrafficAnalyzer(_entries_store)
|
||||
entry1 = analyzer.get_entry_by_id(entry_id1)
|
||||
entry2 = analyzer.get_entry_by_id(entry_id2)
|
||||
|
||||
if not entry1:
|
||||
console.print(f"[red]Entry not found: {entry_id1}[/red]")
|
||||
return
|
||||
if not entry2:
|
||||
console.print(f"[red]Entry not found: {entry_id2}[/red]")
|
||||
return
|
||||
|
||||
engine = DiffEngine()
|
||||
diff_result = engine.diff(entry1, entry2)
|
||||
diff_output = engine.unified_diff_output(diff_result)
|
||||
|
||||
console.print(diff_output)
|
||||
|
||||
|
||||
@cli.command()
|
||||
def stats() -> None:
|
||||
"""Show statistics for loaded entries."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
_show_stats(_entries_store)
|
||||
|
||||
|
||||
def _show_stats(entries: list[HTTPEntry]) -> None:
|
||||
"""Show statistics for entries."""
|
||||
generator = StatsGenerator(entries)
|
||||
stats_data = generator.to_dict()
|
||||
|
||||
console.print("\n[bold cyan]Traffic Statistics[/bold cyan]")
|
||||
console.print(f"Total Requests: {stats_data['total_requests']}")
|
||||
|
||||
console.print("\n[bold]Method Distribution[/bold]")
|
||||
for method, count in sorted(stats_data["method_distribution"].items()):
|
||||
console.print(f" {method}: {count}")
|
||||
|
||||
console.print("\n[bold]Status Code Breakdown[/bold]")
|
||||
for status, count in sorted(stats_data["status_breakdown"].items()):
|
||||
console.print(f" {status}: {count}")
|
||||
|
||||
console.print("\n[bold]Top Endpoints[/bold]")
|
||||
for endpoint, count in list(stats_data["endpoint_count"].items())[:10]:
|
||||
console.print(f" {endpoint}: {count}")
|
||||
|
||||
rt = stats_data.get("response_time_stats", {})
|
||||
if rt.get("avg", 0) > 0:
|
||||
console.print("\n[bold]Response Times[/bold]")
|
||||
console.print(f" Min: {rt.get('min', 0):.2f}ms")
|
||||
console.print(f" Max: {rt.get('max', 0):.2f}ms")
|
||||
console.print(f" Avg: {rt.get('avg', 0):.2f}ms")
|
||||
console.print(f" Median: {rt.get('median', 0):.2f}ms")
|
||||
console.print(f" P95: {rt.get('p95', 0):.2f}ms")
|
||||
console.print(f" P99: {rt.get('p99', 0):.2f}ms")
|
||||
|
||||
|
||||
@cli.command("export-json")
|
||||
@click.argument("output", type=click.Path())
|
||||
@click.option("--compact", is_flag=True, help="Export compact JSON")
|
||||
@click.option("--summary", is_flag=True, help="Export summary only")
|
||||
def export_json(output: str, compact: bool, summary: bool) -> None:
|
||||
"""Export entries to JSON file."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
exporter = JSONExporter()
|
||||
|
||||
try:
|
||||
if summary:
|
||||
content = exporter.export_summary(_entries_store)
|
||||
elif compact:
|
||||
content = exporter.export_compact(_entries_store)
|
||||
else:
|
||||
content = exporter.export(_entries_store)
|
||||
|
||||
with open(output, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
console.print(f"[green]Exported to {output}[/green]")
|
||||
except Exception as e:
|
||||
console.print(f"[red]Export error: {e}[/red]")
|
||||
|
||||
|
||||
@cli.command("export-curl")
|
||||
@click.argument("output", type=click.Path())
|
||||
def export_curl(output: str) -> None:
|
||||
"""Export entries as cURL commands."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
exporter = CurlExporter()
|
||||
|
||||
try:
|
||||
exporter.to_file(_entries_store, output)
|
||||
console.print(f"[green]Exported to {output}[/green]")
|
||||
except Exception as e:
|
||||
console.print(f"[red]Export error: {e}[/red]")
|
||||
|
||||
|
||||
@cli.command("export-code")
|
||||
@click.argument("output", type=click.Path())
|
||||
@click.option(
|
||||
"--language",
|
||||
type=click.Choice(["python", "javascript", "go"]),
|
||||
default="python",
|
||||
help="Target language",
|
||||
)
|
||||
def export_code(output: str, language: str) -> None:
|
||||
"""Export entries as code snippets."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
exporter = CodeExporter()
|
||||
|
||||
try:
|
||||
exporter.to_file(_entries_store, output, language)
|
||||
console.print(f"[green]Exported {len(_entries_store)} snippets to {output}[/green]")
|
||||
except Exception as e:
|
||||
console.print(f"[red]Export error: {e}[/red]")
|
||||
|
||||
|
||||
@cli.command("export-openapi")
|
||||
@click.argument("output", type=click.Path())
|
||||
@click.option("--title", default="API", help="API title")
|
||||
@click.option("--version", default="1.0.0", help="API version")
|
||||
@click.option("--no-validate", is_flag=True, help="Skip validation")
|
||||
def export_openapi(
|
||||
output: str, title: str, version: str, no_validate: bool
|
||||
) -> None:
|
||||
"""Generate OpenAPI spec from traffic."""
|
||||
global _entries_store
|
||||
|
||||
if not _entries_store:
|
||||
console.print("[yellow]No entries loaded. Use 'load' command first.[/yellow]")
|
||||
return
|
||||
|
||||
generator = OpenAPIGenerator(_entries_store)
|
||||
|
||||
try:
|
||||
spec = generator.generate(
|
||||
title=title,
|
||||
version=version,
|
||||
validate_spec=not no_validate,
|
||||
)
|
||||
|
||||
with open(output, "w") as f:
|
||||
f.write(generator.to_json(spec))
|
||||
|
||||
console.print(f"[green]OpenAPI spec exported to {output}[/green]")
|
||||
except ValueError as e:
|
||||
console.print(f"[red]Validation error: {e}[/red]")
|
||||
except Exception as e:
|
||||
console.print(f"[red]Export error: {e}[/red]")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--method", multiple=True, help="Filter by method")
|
||||
@click.option("--status", multiple=True, type=int, help="Filter by status code")
|
||||
@click.option("--url", help="Filter by URL pattern")
|
||||
@click.option("--content-type", multiple=True, help="Filter by content type")
|
||||
def filter_entries(
|
||||
method: tuple[str, ...],
|
||||
status: tuple[int, ...],
|
||||
url: str | None,
|
||||
content_type: tuple[str, ...],
|
||||
) -> None:
|
||||
"""Filter entries and show results (alias for list with filters)."""
|
||||
ctx = click.get_current_context()
|
||||
ctx.invoke(
|
||||
list_entries,
|
||||
limit=50,
|
||||
method=method,
|
||||
status=status,
|
||||
url=url,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main entry point."""
|
||||
cli()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
153
http_log_explorer/cli/formatter.py
Normal file
153
http_log_explorer/cli/formatter.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""Rich table formatter for HTTP entries."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
class Formatter:
|
||||
"""Format HTTP entries for terminal display."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize formatter."""
|
||||
self.console = Console()
|
||||
|
||||
def format_entry_table(
|
||||
self,
|
||||
entries: list[HTTPEntry],
|
||||
show_headers: bool = True,
|
||||
show_body: bool = False,
|
||||
limit: int | None = None,
|
||||
) -> Table:
|
||||
"""Create a table of HTTP entries.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
show_headers: Whether to show request/response headers
|
||||
show_body: Whether to show request/response body
|
||||
limit: Maximum number of entries to show
|
||||
|
||||
Returns:
|
||||
Rich Table object
|
||||
"""
|
||||
table = Table(title=f"HTTP Entries ({len(entries)} total)")
|
||||
|
||||
table.add_column("ID", style="cyan", no_wrap=True)
|
||||
table.add_column("Method", style="magenta", no_wrap=True)
|
||||
table.add_column("URL", style="blue")
|
||||
table.add_column("Status", justify="center", no_wrap=True)
|
||||
table.add_column("Time", style="dim", no_wrap=True)
|
||||
table.add_column("Duration", justify="right", no_wrap=True)
|
||||
|
||||
if show_headers:
|
||||
table.add_column("Req Headers", style="dim")
|
||||
table.add_column("Resp Headers", style="dim")
|
||||
|
||||
if show_body:
|
||||
table.add_column("Req Body", style="dim")
|
||||
table.add_column("Resp Body", style="dim")
|
||||
|
||||
display_entries = entries[:limit] if limit else entries
|
||||
|
||||
for entry in display_entries:
|
||||
row: list[Any] = [
|
||||
entry.id,
|
||||
entry.request.method,
|
||||
self._truncate_url(entry.request.url),
|
||||
self._format_status(entry.response.status),
|
||||
self._format_timestamp(entry.timestamp),
|
||||
self._format_duration(entry.duration_ms),
|
||||
]
|
||||
|
||||
if show_headers:
|
||||
row.append(self._format_headers(entry.request.headers))
|
||||
row.append(self._format_headers(entry.response.headers))
|
||||
|
||||
if show_body:
|
||||
row.append(self._truncate_body(entry.request.body))
|
||||
row.append(self._truncate_body(entry.response.body))
|
||||
|
||||
table.add_row(*row)
|
||||
|
||||
return table
|
||||
|
||||
def _truncate_url(self, url: str, max_length: int = 60) -> str:
|
||||
"""Truncate URL for display."""
|
||||
if len(url) <= max_length:
|
||||
return url
|
||||
return url[: max_length - 3] + "..."
|
||||
|
||||
def _format_status(self, status: int) -> Text:
|
||||
"""Format status code with color."""
|
||||
if 200 <= status < 300:
|
||||
return Text(str(status), style="green")
|
||||
elif 300 <= status < 400:
|
||||
return Text(str(status), style="blue")
|
||||
elif 400 <= status < 500:
|
||||
return Text(str(status), style="yellow")
|
||||
elif 500 <= status < 600:
|
||||
return Text(str(status), style="red")
|
||||
return Text(str(status))
|
||||
|
||||
def _format_timestamp(self, timestamp: Any) -> str:
|
||||
"""Format timestamp for display."""
|
||||
if timestamp is None:
|
||||
return "-"
|
||||
if hasattr(timestamp, "strftime"):
|
||||
return timestamp.strftime("%H:%M:%S")
|
||||
return str(timestamp)
|
||||
|
||||
def _format_duration(self, duration_ms: float | None) -> str:
|
||||
"""Format duration for display."""
|
||||
if duration_ms is None:
|
||||
return "-"
|
||||
if duration_ms < 1000:
|
||||
return f"{duration_ms:.0f}ms"
|
||||
return f"{duration_ms / 1000:.2f}s"
|
||||
|
||||
def _format_headers(self, headers: dict[str, str]) -> str:
|
||||
"""Format headers for display."""
|
||||
if not headers:
|
||||
return "-"
|
||||
count = len(headers)
|
||||
return f"{count} headers"
|
||||
|
||||
def _truncate_body(self, body: str | None, max_length: int = 50) -> str:
|
||||
"""Truncate body for display."""
|
||||
if body is None:
|
||||
return "-"
|
||||
body = body.strip()
|
||||
if not body:
|
||||
return "-"
|
||||
if len(body) <= max_length:
|
||||
return body
|
||||
return body[: max_length - 3] + "..."
|
||||
|
||||
def format_diff(self, diff_output: str) -> Table:
|
||||
"""Format diff output as table.
|
||||
|
||||
Args:
|
||||
diff_output: Diff output string
|
||||
|
||||
Returns:
|
||||
Rich Table object
|
||||
"""
|
||||
table = Table(title="Diff Comparison")
|
||||
table.add_column("Before/After", style="cyan", no_wrap=True)
|
||||
table.add_column("Change", style="white")
|
||||
|
||||
for line in diff_output.split("\n"):
|
||||
if line.startswith("-"):
|
||||
table.add_row("-", Text(line, style="red"))
|
||||
elif line.startswith("+"):
|
||||
table.add_row("+", Text(line, style="green"))
|
||||
elif line.startswith("---"):
|
||||
table.add_row("", Text(line, style="dim"))
|
||||
else:
|
||||
table.add_row("", line)
|
||||
|
||||
return table
|
||||
7
http_log_explorer/exporters/__init__.py
Normal file
7
http_log_explorer/exporters/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Exporters for various formats."""
|
||||
|
||||
from http_log_explorer.exporters.code_exporter import CodeExporter
|
||||
from http_log_explorer.exporters.curl_exporter import CurlExporter
|
||||
from http_log_explorer.exporters.json_exporter import JSONExporter
|
||||
|
||||
__all__ = ["CodeExporter", "CurlExporter", "JSONExporter"]
|
||||
263
http_log_explorer/exporters/code_exporter.py
Normal file
263
http_log_explorer/exporters/code_exporter.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""Code exporter for HTTP entries (Python, JavaScript, Go)."""
|
||||
|
||||
import json
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
class CodeExporter:
|
||||
"""Export HTTP entries as code snippets in various languages."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize code exporter."""
|
||||
self._template_dir = ""
|
||||
|
||||
PYTHON_TEMPLATE = '''import requests
|
||||
|
||||
headers = {headers}
|
||||
{data}
|
||||
response = requests.{method}(
|
||||
"{url}"{params}
|
||||
{headers_param})
|
||||
{body}
|
||||
print(response.status_code)
|
||||
print(response.json())
|
||||
'''
|
||||
|
||||
JAVASCRIPT_TEMPLATE = '''const axios = require('axios');
|
||||
|
||||
const config = {{
|
||||
method: '{method}',
|
||||
url: '{url}',
|
||||
{headers_js}
|
||||
{data_js}
|
||||
{body_js}
|
||||
}};
|
||||
|
||||
axios(config)
|
||||
.then(response => {{
|
||||
console.log(response.status);
|
||||
console.log(response.data);
|
||||
}})
|
||||
.catch(error => {{
|
||||
console.error(error);
|
||||
}});
|
||||
'''
|
||||
|
||||
GO_TEMPLATE = '''package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func main() {{
|
||||
{headers_go}
|
||||
{data_go}
|
||||
body{data_var} := {body_val}
|
||||
{body_prep}
|
||||
req, err := http.NewRequest("{method}", "{url}", {body_ref})
|
||||
if err != nil {{
|
||||
panic(err)
|
||||
}}
|
||||
{set_headers}
|
||||
client := &http.Client{{}}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {{
|
||||
panic(err)
|
||||
}}
|
||||
defer resp.Body.Close()
|
||||
|
||||
fmt.Println("Status:", resp.Status)
|
||||
}}
|
||||
|
||||
'''
|
||||
|
||||
def export_python(self, entry: HTTPEntry) -> str:
|
||||
"""Export entry as Python code.
|
||||
|
||||
Args:
|
||||
entry: HTTPEntry object
|
||||
|
||||
Returns:
|
||||
Python code string
|
||||
"""
|
||||
headers_str = self._format_python_dict(entry.request.headers)
|
||||
|
||||
data_line = ""
|
||||
body_line = ""
|
||||
params_line = ""
|
||||
|
||||
if entry.request.query_params:
|
||||
params_line = f", params={self._format_python_dict(entry.request.query_params)}"
|
||||
|
||||
if entry.request.body:
|
||||
body_line = "data=data,"
|
||||
|
||||
if entry.request.headers:
|
||||
headers_param = "\n headers=headers"
|
||||
else:
|
||||
headers_param = ""
|
||||
|
||||
method_lower = entry.request.method.lower()
|
||||
|
||||
return self.PYTHON_TEMPLATE.format(
|
||||
method=method_lower,
|
||||
url=entry.request.url,
|
||||
headers=headers_str,
|
||||
params=params_line,
|
||||
headers_param=headers_param,
|
||||
data=data_line,
|
||||
body=body_line,
|
||||
)
|
||||
|
||||
def export_javascript(self, entry: HTTPEntry) -> str:
|
||||
"""Export entry as JavaScript code.
|
||||
|
||||
Args:
|
||||
entry: HTTPEntry object
|
||||
|
||||
Returns:
|
||||
JavaScript code string
|
||||
"""
|
||||
headers_lines = []
|
||||
for name, value in entry.request.headers.items():
|
||||
headers_lines.append(f' "{name}": "{value}",')
|
||||
|
||||
headers_js = "\n".join(headers_lines)
|
||||
if headers_js:
|
||||
headers_js = "headers: {\n" + headers_js + "\n},"
|
||||
|
||||
data_js = ""
|
||||
data_val = "{}"
|
||||
body_js = ""
|
||||
|
||||
if entry.request.body:
|
||||
data_val = json.dumps(entry.request.body)
|
||||
data_js = f"const data = {data_val};"
|
||||
body_js = "data: data,"
|
||||
|
||||
if entry.request.query_params:
|
||||
data_val = json.dumps(entry.request.query_params)
|
||||
data_js = f"const params = {data_val};"
|
||||
body_js = "params: params,"
|
||||
|
||||
return self.JAVASCRIPT_TEMPLATE.format(
|
||||
method=entry.request.method.lower(),
|
||||
url=entry.request.url,
|
||||
headers_js=headers_js,
|
||||
data_js=data_js,
|
||||
body_js=body_js,
|
||||
)
|
||||
|
||||
def export_go(self, entry: HTTPEntry) -> str:
|
||||
"""Export entry as Go code.
|
||||
|
||||
Args:
|
||||
entry: HTTPEntry object
|
||||
|
||||
Returns:
|
||||
Go code string
|
||||
"""
|
||||
headers_lines = []
|
||||
for name, value in entry.request.headers.items():
|
||||
headers_lines.append(f' req.Header.Set("{name}", "{value}")')
|
||||
|
||||
headers_go = "\n".join(headers_lines)
|
||||
|
||||
data_val = "nil"
|
||||
data_var = ""
|
||||
body_prep = ""
|
||||
body_ref = "nil"
|
||||
data_go = ""
|
||||
|
||||
if entry.request.body:
|
||||
escaped = self._escape_go_string(entry.request.body)
|
||||
data_val = f"`{escaped}`"
|
||||
body_prep = f' body := bytes.NewBufferString({data_val})'
|
||||
body_ref = "body"
|
||||
|
||||
set_headers = headers_go if headers_go else " // No headers"
|
||||
|
||||
return self.GO_TEMPLATE.format(
|
||||
method=entry.request.method,
|
||||
url=entry.request.url,
|
||||
headers_go=headers_go,
|
||||
data_go=data_go,
|
||||
data_var=data_var,
|
||||
body_val=data_val,
|
||||
body_prep=body_prep,
|
||||
body_ref=body_ref,
|
||||
set_headers=set_headers,
|
||||
)
|
||||
|
||||
def export_batch(
|
||||
self, entries: list[HTTPEntry], language: str
|
||||
) -> list[str]:
|
||||
"""Export multiple entries as code snippets.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
language: Target language (python, javascript, go)
|
||||
|
||||
Returns:
|
||||
List of code strings
|
||||
|
||||
Raises:
|
||||
ValueError: If language is not supported
|
||||
"""
|
||||
language = language.lower()
|
||||
if language == "python":
|
||||
return [self.export_python(e) for e in entries]
|
||||
elif language == "javascript":
|
||||
return [self.export_javascript(e) for e in entries]
|
||||
elif language == "go":
|
||||
return [self.export_go(e) for e in entries]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported language: {language}. "
|
||||
f"Supported: python, javascript, go"
|
||||
)
|
||||
|
||||
def _format_python_dict(self, d: dict[str, str]) -> str:
|
||||
"""Format dictionary as Python code.
|
||||
|
||||
Args:
|
||||
d: Dictionary to format
|
||||
|
||||
Returns:
|
||||
Python dict string
|
||||
"""
|
||||
if not d:
|
||||
return "{}"
|
||||
items = [f'"{k}": "{v}"' for k, v in d.items()]
|
||||
return "{\n " + ",\n ".join(items) + "\n}"
|
||||
|
||||
def _escape_go_string(self, s: str) -> str:
|
||||
"""Escape string for Go.
|
||||
|
||||
Args:
|
||||
s: String to escape
|
||||
|
||||
Returns:
|
||||
Escaped string
|
||||
"""
|
||||
return s.replace("\\", "\\\\").replace("`", "\\`").replace("$", "\\$")
|
||||
|
||||
def to_file(
|
||||
self, entries: list[HTTPEntry], path: str, language: str
|
||||
) -> None:
|
||||
"""Write code snippets to file.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
path: Output file path
|
||||
language: Target language
|
||||
"""
|
||||
snippets = self.export_batch(entries, language)
|
||||
with open(path, "w") as f:
|
||||
for snippet in snippets:
|
||||
f.write(snippet)
|
||||
f.write("\n\n")
|
||||
70
http_log_explorer/exporters/curl_exporter.py
Normal file
70
http_log_explorer/exporters/curl_exporter.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""cURL exporter for HTTP entries."""
|
||||
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
class CurlExporter:
|
||||
"""Export HTTP entries as cURL commands."""
|
||||
|
||||
def export(self, entry: HTTPEntry) -> str:
|
||||
"""Export a single entry as cURL command.
|
||||
|
||||
Args:
|
||||
entry: HTTPEntry object
|
||||
|
||||
Returns:
|
||||
cURL command string
|
||||
"""
|
||||
parts = ["curl"]
|
||||
|
||||
parts.append("-X")
|
||||
parts.append(entry.request.method)
|
||||
|
||||
if entry.request.headers:
|
||||
for name, value in entry.request.headers.items():
|
||||
if name.lower() not in ("host", "content-length"):
|
||||
parts.append("-H")
|
||||
parts.append(f"{name}: {value}")
|
||||
|
||||
if entry.request.body:
|
||||
escaped_body = self._escape_body(entry.request.body)
|
||||
parts.append("-d")
|
||||
parts.append(f"'{escaped_body}'")
|
||||
|
||||
parts.append(f"'{entry.request.url}'")
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
def export_batch(self, entries: list[HTTPEntry]) -> list[str]:
|
||||
"""Export multiple entries as cURL commands.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
|
||||
Returns:
|
||||
List of cURL command strings
|
||||
"""
|
||||
return [self.export(entry) for entry in entries]
|
||||
|
||||
def _escape_body(self, body: str) -> str:
|
||||
"""Escape body string for shell.
|
||||
|
||||
Args:
|
||||
body: Body content
|
||||
|
||||
Returns:
|
||||
Escaped string
|
||||
"""
|
||||
return body.replace("'", "'\\''")
|
||||
|
||||
def to_file(self, entries: list[HTTPEntry], path: str) -> None:
|
||||
"""Write cURL commands to file (one per line).
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
path: Output file path
|
||||
"""
|
||||
with open(path, "w") as f:
|
||||
for entry in entries:
|
||||
f.write(self.export(entry) + "\n")
|
||||
66
http_log_explorer/exporters/json_exporter.py
Normal file
66
http_log_explorer/exporters/json_exporter.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""JSON exporter for HTTP entries."""
|
||||
|
||||
import json
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
class JSONExporter:
|
||||
"""Export HTTP entries to JSON format."""
|
||||
|
||||
def export(self, entries: list[HTTPEntry], indent: int = 2) -> str:
|
||||
"""Export entries to JSON string.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
indent: JSON indent level
|
||||
|
||||
Returns:
|
||||
JSON string representation
|
||||
"""
|
||||
data = [entry.to_dict() for entry in entries]
|
||||
return json.dumps(data, indent=indent, default=str)
|
||||
|
||||
def export_compact(self, entries: list[HTTPEntry]) -> str:
|
||||
"""Export entries to compact JSON (no indent).
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
|
||||
Returns:
|
||||
Compact JSON string
|
||||
"""
|
||||
data = [entry.to_dict() for entry in entries]
|
||||
return json.dumps(data, separators=(",", ":"), default=str)
|
||||
|
||||
def save(self, entries: list[HTTPEntry], path: str, indent: int = 2) -> None:
|
||||
"""Save entries to JSON file.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
path: Output file path
|
||||
indent: JSON indent level
|
||||
"""
|
||||
with open(path, "w") as f:
|
||||
f.write(self.export(entries, indent))
|
||||
|
||||
def export_summary(self, entries: list[HTTPEntry]) -> str:
|
||||
"""Export summary of entries (URL, method, status only).
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
|
||||
Returns:
|
||||
JSON string with summary info
|
||||
"""
|
||||
summary = []
|
||||
for entry in entries:
|
||||
summary.append({
|
||||
"id": entry.id,
|
||||
"method": entry.request.method,
|
||||
"url": entry.request.url,
|
||||
"status": entry.response.status,
|
||||
"content_type": entry.content_type,
|
||||
"duration_ms": entry.duration_ms,
|
||||
})
|
||||
return json.dumps(summary, indent=2)
|
||||
5
http_log_explorer/generators/__init__.py
Normal file
5
http_log_explorer/generators/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Generators for OpenAPI specs and exports."""
|
||||
|
||||
from http_log_explorer.generators.openapi_generator import OpenAPIGenerator
|
||||
|
||||
__all__ = ["OpenAPIGenerator"]
|
||||
431
http_log_explorer/generators/openapi_generator.py
Normal file
431
http_log_explorer/generators/openapi_generator.py
Normal file
@@ -0,0 +1,431 @@
|
||||
"""OpenAPI 3.0 spec generator from HTTP traffic."""
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
try:
|
||||
from openapi_spec_validator import validate
|
||||
VALIDATION_AVAILABLE = True
|
||||
except ImportError:
|
||||
VALIDATION_AVAILABLE = False
|
||||
|
||||
|
||||
class OpenAPIGenerator:
|
||||
"""Generate OpenAPI 3.0 specification from observed traffic."""
|
||||
|
||||
def __init__(self, entries: list[HTTPEntry]) -> None:
|
||||
"""Initialize with HTTP entries.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
"""
|
||||
self.entries = entries
|
||||
self.spec: dict[str, Any] = {}
|
||||
self._schemas: dict[str, dict[str, Any]] = {}
|
||||
self._path_items: dict[str, dict[str, Any]] = defaultdict(dict)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
title: str = "API",
|
||||
version: str = "1.0.0",
|
||||
description: str = "Generated from traffic analysis",
|
||||
validate_spec: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Generate OpenAPI spec from traffic.
|
||||
|
||||
Args:
|
||||
title: API title
|
||||
version: API version
|
||||
description: API description
|
||||
validate_spec: Whether to validate the generated spec
|
||||
|
||||
Returns:
|
||||
OpenAPI spec dictionary
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails and validate_spec is True
|
||||
"""
|
||||
self.spec = {
|
||||
"openapi": "3.0.3",
|
||||
"info": {
|
||||
"title": title,
|
||||
"version": version,
|
||||
"description": description,
|
||||
},
|
||||
"paths": {},
|
||||
"components": {
|
||||
"schemas": {},
|
||||
},
|
||||
}
|
||||
|
||||
self._schemas = {}
|
||||
self._path_items = defaultdict(dict)
|
||||
|
||||
self._infer_paths()
|
||||
self._infer_schemas()
|
||||
|
||||
self.spec["paths"] = dict(self._path_items)
|
||||
self.spec["components"]["schemas"] = self._schemas
|
||||
|
||||
if validate_spec and VALIDATION_AVAILABLE:
|
||||
try:
|
||||
validate(self.spec)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Generated spec is invalid: {e}") from e
|
||||
|
||||
return self.spec
|
||||
|
||||
def _infer_paths(self) -> None:
|
||||
"""Infer API paths from traffic."""
|
||||
for entry in self.entries:
|
||||
path = self._extract_path(entry.endpoint)
|
||||
method = entry.request.method.lower()
|
||||
|
||||
if path not in self._path_items:
|
||||
self._path_items[path] = {}
|
||||
|
||||
path_params = self._extract_path_params(path)
|
||||
if path_params and "parameters" not in self._path_items[path]:
|
||||
self._path_items[path]["parameters"] = path_params
|
||||
|
||||
operation: dict[str, Any] = {
|
||||
"responses": self._generate_responses(entry),
|
||||
}
|
||||
|
||||
if entry.request.headers:
|
||||
operation["parameters"] = self._generate_parameters(entry)
|
||||
|
||||
if entry.request.body:
|
||||
request_body = self._generate_request_body(entry)
|
||||
if request_body:
|
||||
operation["requestBody"] = request_body
|
||||
|
||||
self._path_items[path][method] = operation
|
||||
|
||||
def _extract_path_params(self, path: str) -> list[dict[str, Any]]:
|
||||
"""Extract path parameters from a path string.
|
||||
|
||||
Args:
|
||||
path: The path string like '/users/{id}'
|
||||
|
||||
Returns:
|
||||
List of parameter definitions
|
||||
"""
|
||||
params = []
|
||||
import re
|
||||
param_pattern = re.compile(r"\{([^}]+)\}")
|
||||
for match in param_pattern.finditer(path):
|
||||
param_name = match.group(1)
|
||||
params.append({
|
||||
"name": param_name,
|
||||
"in": "path",
|
||||
"required": True,
|
||||
"schema": {"type": "string"},
|
||||
})
|
||||
return params
|
||||
|
||||
def _extract_path(self, endpoint: str) -> str:
|
||||
"""Extract and normalize path from endpoint."""
|
||||
path = endpoint
|
||||
|
||||
parts = path.split("/")
|
||||
normalized_parts = []
|
||||
|
||||
for part in parts:
|
||||
if not part:
|
||||
normalized_parts.append("")
|
||||
elif part.isdigit():
|
||||
normalized_parts.append("{" + self._get_param_name(path, part) + "}")
|
||||
elif self._is_uuid(part):
|
||||
normalized_parts.append("{uuid}")
|
||||
elif self._is_hash(part):
|
||||
normalized_parts.append("{id}")
|
||||
else:
|
||||
normalized_parts.append(part)
|
||||
|
||||
return "/".join(normalized_parts) or "/"
|
||||
|
||||
def _get_param_name(self, path: str, value: str) -> str:
|
||||
"""Generate parameter name based on path context."""
|
||||
path_lower = path.lower()
|
||||
if "user" in path_lower or "id" in path_lower:
|
||||
return "id"
|
||||
if "page" in path_lower or "offset" in path_lower:
|
||||
return "page"
|
||||
if "limit" in path_lower or "size" in path_lower:
|
||||
return "limit"
|
||||
return "id"
|
||||
|
||||
def _is_uuid(self, s: str) -> bool:
|
||||
"""Check if string looks like a UUID."""
|
||||
uuid_pattern = re.compile(
|
||||
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
return bool(uuid_pattern.match(s))
|
||||
|
||||
def _is_hash(self, s: str) -> bool:
|
||||
"""Check if string looks like a hash."""
|
||||
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
|
||||
return bool(hash_pattern.match(s))
|
||||
|
||||
def _generate_responses(self, entry: HTTPEntry) -> dict[str, Any]:
|
||||
"""Generate response definitions."""
|
||||
content = {}
|
||||
ct = entry.content_type
|
||||
|
||||
if ct and "json" in ct.lower():
|
||||
schema = self._extract_schema_from_body(entry.response.body, "response")
|
||||
content = {
|
||||
"application/json": {
|
||||
"schema": schema,
|
||||
}
|
||||
}
|
||||
elif entry.response.body:
|
||||
content = {
|
||||
"text/plain": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status = entry.response.status
|
||||
status_text = entry.response.status_text or "OK"
|
||||
|
||||
return {
|
||||
str(status): {
|
||||
"description": status_text,
|
||||
"content": content,
|
||||
}
|
||||
}
|
||||
|
||||
def _generate_parameters(self, entry: HTTPEntry) -> list[dict[str, Any]]:
|
||||
"""Generate parameter definitions from query string."""
|
||||
params = []
|
||||
|
||||
for name, value in entry.request.query_params.items():
|
||||
param: dict[str, Any] = {
|
||||
"name": name,
|
||||
"in": "query",
|
||||
"schema": {
|
||||
"type": self._infer_type(value),
|
||||
},
|
||||
}
|
||||
if value:
|
||||
param["example"] = value
|
||||
params.append(param)
|
||||
|
||||
return params
|
||||
|
||||
def _generate_request_body(self, entry: HTTPEntry) -> dict[str, Any] | None:
|
||||
"""Generate request body definition."""
|
||||
body = entry.request.body
|
||||
if not body:
|
||||
return None
|
||||
|
||||
content: dict[str, Any] = {}
|
||||
|
||||
if self._is_json(body):
|
||||
schema = self._extract_schema_from_body(body, "request")
|
||||
content = {
|
||||
"application/json": {
|
||||
"schema": schema,
|
||||
}
|
||||
}
|
||||
else:
|
||||
content = {
|
||||
"text/plain": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"required": True,
|
||||
}
|
||||
|
||||
def _extract_schema_from_body(
|
||||
self, body: str | None, prefix: str = "schema"
|
||||
) -> dict[str, Any]:
|
||||
"""Extract JSON schema from body content.
|
||||
|
||||
Args:
|
||||
body: Body content
|
||||
prefix: Prefix for schema name
|
||||
|
||||
Returns:
|
||||
JSON Schema dictionary
|
||||
"""
|
||||
if not body:
|
||||
return {"type": "string"}
|
||||
|
||||
if not self._is_json(body):
|
||||
return {"type": "string"}
|
||||
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {"type": "string"}
|
||||
|
||||
if isinstance(data, dict):
|
||||
schema_name = f"{prefix}Schema"
|
||||
schema = self._dict_to_schema(data, schema_name)
|
||||
self._schemas[schema_name] = schema
|
||||
return {"$ref": f"#/components/schemas/{schema_name}"}
|
||||
elif isinstance(data, list) and data:
|
||||
return {
|
||||
"type": "array",
|
||||
"items": self._dict_to_schema(data[0], f"{prefix}Item"),
|
||||
}
|
||||
|
||||
return {"type": "string"}
|
||||
|
||||
def _dict_to_schema(
|
||||
self, data: dict[str, Any], name: str
|
||||
) -> dict[str, Any]:
|
||||
"""Convert dictionary to JSON schema.
|
||||
|
||||
Args:
|
||||
data: Dictionary to convert
|
||||
name: Schema name
|
||||
|
||||
Returns:
|
||||
JSON Schema dictionary
|
||||
"""
|
||||
properties: dict[str, Any] = {}
|
||||
required: list[str] = []
|
||||
|
||||
for key, value in data.items():
|
||||
prop_schema = self._value_to_schema(value, key)
|
||||
properties[key] = prop_schema
|
||||
required.append(key)
|
||||
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
"required": required,
|
||||
}
|
||||
|
||||
def _value_to_schema(self, value: Any, key: str) -> dict[str, Any]:
|
||||
"""Convert a value to JSON schema.
|
||||
|
||||
Args:
|
||||
value: Value to convert
|
||||
key: Key name (for nested object naming)
|
||||
|
||||
Returns:
|
||||
JSON Schema for the value
|
||||
"""
|
||||
if value is None:
|
||||
return {"type": "string", "nullable": True}
|
||||
elif isinstance(value, bool):
|
||||
return {"type": "boolean"}
|
||||
elif isinstance(value, int):
|
||||
return {"type": "integer"}
|
||||
elif isinstance(value, float):
|
||||
return {"type": "number"}
|
||||
elif isinstance(value, str):
|
||||
if self._is_json(value):
|
||||
nested = self._dict_to_schema(json.loads(value), f"{key}Schema")
|
||||
return nested
|
||||
return {"type": "string"}
|
||||
elif isinstance(value, dict):
|
||||
schema_name = f"{key}Schema"
|
||||
nested = self._dict_to_schema(value, schema_name)
|
||||
self._schemas[schema_name] = nested
|
||||
return {"$ref": f"#/components/schemas/{schema_name}"}
|
||||
elif isinstance(value, list):
|
||||
if value:
|
||||
item_schema = self._value_to_schema(value[0], f"{key}Item")
|
||||
return {"type": "array", "items": item_schema}
|
||||
return {"type": "array", "items": {"type": "string"}}
|
||||
|
||||
return {"type": "string"}
|
||||
|
||||
def _infer_type(self, value: str) -> str:
|
||||
"""Infer JSON type from string value.
|
||||
|
||||
Args:
|
||||
value: String value
|
||||
|
||||
Returns:
|
||||
JSON type string
|
||||
"""
|
||||
if not value:
|
||||
return "string"
|
||||
try:
|
||||
int(value)
|
||||
return "integer"
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
float(value)
|
||||
return "number"
|
||||
except ValueError:
|
||||
pass
|
||||
if value.lower() in ("true", "false"):
|
||||
return "boolean"
|
||||
return "string"
|
||||
|
||||
def _is_json(self, s: str) -> bool:
|
||||
"""Check if string is JSON.
|
||||
|
||||
Args:
|
||||
s: String to check
|
||||
|
||||
Returns:
|
||||
True if string is JSON
|
||||
"""
|
||||
if not s or not s.strip():
|
||||
return False
|
||||
if s.strip().startswith(("{", "[")):
|
||||
try:
|
||||
json.loads(s)
|
||||
return True
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
return False
|
||||
|
||||
def _infer_schemas(self) -> None:
|
||||
"""Infer additional schemas from request/response bodies."""
|
||||
for entry in self.entries:
|
||||
if entry.request.body and self._is_json(entry.request.body):
|
||||
try:
|
||||
data = json.loads(entry.request.body)
|
||||
if isinstance(data, dict):
|
||||
schema_name = "requestBodySchema"
|
||||
if schema_name not in self._schemas:
|
||||
self._schemas[schema_name] = self._dict_to_schema(data, schema_name)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
def to_json(self, spec: dict[str, Any] | None = None, indent: int = 2) -> str:
|
||||
"""Convert spec to JSON string.
|
||||
|
||||
Args:
|
||||
spec: Spec to convert, or use self.spec if None
|
||||
indent: JSON indent level
|
||||
|
||||
Returns:
|
||||
JSON string
|
||||
"""
|
||||
if spec is None:
|
||||
spec = self.spec
|
||||
return json.dumps(spec, indent=indent)
|
||||
|
||||
def save_spec(self, path: str, spec: dict[str, Any] | None = None) -> None:
|
||||
"""Save spec to file.
|
||||
|
||||
Args:
|
||||
path: File path to save to
|
||||
spec: Spec to save, or use self.spec if None
|
||||
"""
|
||||
with open(path, "w") as f:
|
||||
f.write(self.to_json(spec))
|
||||
17
http_log_explorer/models/__init__.py
Normal file
17
http_log_explorer/models/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Data models."""
|
||||
|
||||
from http_log_explorer.models.http_entry import (
|
||||
DiffResult,
|
||||
FilterCriteria,
|
||||
HTTPEntry,
|
||||
Request,
|
||||
Response,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DiffResult",
|
||||
"FilterCriteria",
|
||||
"HTTPEntry",
|
||||
"Request",
|
||||
"Response",
|
||||
]
|
||||
142
http_log_explorer/models/http_entry.py
Normal file
142
http_log_explorer/models/http_entry.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""Data models for HTTP entries."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class Request:
|
||||
"""Represents an HTTP request."""
|
||||
|
||||
method: str
|
||||
url: str
|
||||
http_version: str = "HTTP/1.1"
|
||||
headers: dict[str, str] = field(default_factory=dict)
|
||||
body: str | None = None
|
||||
query_params: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if isinstance(self.headers, list):
|
||||
self.headers = {h.get("name", ""): h.get("value", "") for h in self.headers}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Response:
|
||||
"""Represents an HTTP response."""
|
||||
|
||||
status: int
|
||||
status_text: str
|
||||
http_version: str = "HTTP/1.1"
|
||||
headers: dict[str, str] = field(default_factory=dict)
|
||||
body: str | None = None
|
||||
content_type: str | None = None
|
||||
response_time_ms: float | None = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if isinstance(self.headers, list):
|
||||
self.headers = {h.get("name", ""): h.get("value", "") for h in self.headers}
|
||||
|
||||
|
||||
@dataclass
|
||||
class HTTPEntry:
|
||||
"""Represents a complete HTTP request/response pair."""
|
||||
|
||||
id: str
|
||||
request: Request
|
||||
response: Response
|
||||
timestamp: datetime | None = None
|
||||
server_ip: str | None = None
|
||||
connection: str | None = None
|
||||
raw_size: int | None = None
|
||||
source_file: str | None = None
|
||||
|
||||
@property
|
||||
def duration_ms(self) -> float | None:
|
||||
"""Get response time in milliseconds."""
|
||||
return self.response.response_time_ms
|
||||
|
||||
@property
|
||||
def content_type(self) -> str | None:
|
||||
"""Get content type from response headers."""
|
||||
if self.response.content_type:
|
||||
return self.response.content_type
|
||||
for key, value in self.response.headers.items():
|
||||
if key.lower() == "content-type":
|
||||
return value
|
||||
return None
|
||||
|
||||
@property
|
||||
def endpoint(self) -> str:
|
||||
"""Extract endpoint path from URL."""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(self.request.url)
|
||||
return parsed.path or "/"
|
||||
|
||||
@property
|
||||
def host(self) -> str:
|
||||
"""Extract host from URL."""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(self.request.url)
|
||||
return parsed.netloc
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary representation."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"request": {
|
||||
"method": self.request.method,
|
||||
"url": self.request.url,
|
||||
"http_version": self.request.http_version,
|
||||
"headers": self.request.headers,
|
||||
"body": self.request.body,
|
||||
"query_params": self.request.query_params,
|
||||
},
|
||||
"response": {
|
||||
"status": self.response.status,
|
||||
"status_text": self.response.status_text,
|
||||
"http_version": self.response.http_version,
|
||||
"headers": self.response.headers,
|
||||
"body": self.response.body,
|
||||
"content_type": self.response.content_type,
|
||||
"response_time_ms": self.response.response_time_ms,
|
||||
},
|
||||
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
|
||||
"server_ip": self.server_ip,
|
||||
"connection": self.connection,
|
||||
"raw_size": self.raw_size,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FilterCriteria:
|
||||
"""Criteria for filtering HTTP entries."""
|
||||
|
||||
methods: list[str] | None = None
|
||||
status_codes: list[int] | None = None
|
||||
url_pattern: str | None = None
|
||||
content_types: list[str] | None = None
|
||||
start_time: datetime | None = None
|
||||
end_time: datetime | None = None
|
||||
min_response_time_ms: float | None = None
|
||||
max_response_time_ms: float | None = None
|
||||
request_body_contains: str | None = None
|
||||
response_body_contains: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiffResult:
|
||||
"""Represents the result of comparing two HTTP entries."""
|
||||
|
||||
entry1_id: str
|
||||
entry2_id: str
|
||||
request_headers_diff: list[str] = field(default_factory=list)
|
||||
request_body_diff: list[str] = field(default_factory=list)
|
||||
response_headers_diff: list[str] = field(default_factory=list)
|
||||
response_body_diff: list[str] = field(default_factory=list)
|
||||
status_changed: bool = False
|
||||
status1: int = 0
|
||||
status2: int = 0
|
||||
url_changed: bool = False
|
||||
76
http_log_explorer/parsers/__init__.py
Normal file
76
http_log_explorer/parsers/__init__.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""Parser interface for HTTP log formats."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
|
||||
class ParserInterface(ABC):
|
||||
"""Abstract base class for HTTP log parsers."""
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
|
||||
"""Parse content and return list of HTTP entries.
|
||||
|
||||
Args:
|
||||
content: The content to parse (string or bytes)
|
||||
source_file: Optional source file name for reference
|
||||
|
||||
Returns:
|
||||
List of HTTPEntry objects
|
||||
|
||||
Raises:
|
||||
ValueError: If content cannot be parsed
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def can_parse(self, content: str | bytes) -> bool:
|
||||
"""Check if this parser can handle the given content.
|
||||
|
||||
Args:
|
||||
content: The content to check
|
||||
|
||||
Returns:
|
||||
True if this parser can handle the content
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_parser_name() -> str:
|
||||
"""Return the name of this parser."""
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_parser(content: str | bytes) -> ParserInterface:
|
||||
"""Get the appropriate parser for the given content.
|
||||
|
||||
Args:
|
||||
content: The content to parse
|
||||
|
||||
Returns:
|
||||
An appropriate parser instance
|
||||
|
||||
Raises:
|
||||
ValueError: If no suitable parser is found
|
||||
"""
|
||||
from http_log_explorer.parsers.curl_parser import CurlParser
|
||||
from http_log_explorer.parsers.devtools_parser import DevToolsParser
|
||||
from http_log_explorer.parsers.har_parser import HARParser
|
||||
|
||||
parsers: list[ParserInterface] = [
|
||||
HARParser(),
|
||||
CurlParser(),
|
||||
DevToolsParser(),
|
||||
]
|
||||
|
||||
for parser in parsers:
|
||||
if parser.can_parse(content):
|
||||
return parser
|
||||
|
||||
raise ValueError(
|
||||
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["ParserInterface", "get_parser"]
|
||||
140
http_log_explorer/parsers/curl_parser.py
Normal file
140
http_log_explorer/parsers/curl_parser.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Parser for curl -v output."""
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from http_log_explorer.models import HTTPEntry, Request, Response
|
||||
from http_log_explorer.parsers import ParserInterface
|
||||
|
||||
|
||||
class CurlParser(ParserInterface):
|
||||
"""Parser for curl -v verbose output."""
|
||||
|
||||
REQUEST_LINE_RE = re.compile(r"^> (\w+) (\S+) (HTTP/[\d.]+)$", re.MULTILINE)
|
||||
RESPONSE_LINE_RE = re.compile(r"^< (HTTP/[\d.]+) (\d+) (.+)$", re.MULTILINE)
|
||||
HEADER_RE = re.compile(r"^(> |<) ([^:]+): (.+)$")
|
||||
TIMING_RE = re.compile(r"^\* time_conditional check:.*$")
|
||||
|
||||
@staticmethod
|
||||
def get_parser_name() -> str:
|
||||
return "curl"
|
||||
|
||||
def can_parse(self, content: str | bytes) -> bool:
|
||||
"""Check if content appears to be curl -v output."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="ignore")
|
||||
has_request = bool(self.REQUEST_LINE_RE.search(content))
|
||||
has_response = bool(self.RESPONSE_LINE_RE.search(content))
|
||||
return has_request and has_response
|
||||
|
||||
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
|
||||
"""Parse curl -v output into HTTPEntry objects."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="replace")
|
||||
|
||||
entries: list[HTTPEntry] = []
|
||||
blocks = self._split_blocks(content)
|
||||
|
||||
for idx, block in enumerate(blocks):
|
||||
try:
|
||||
entry = self._parse_block(block, idx, source_file)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return entries
|
||||
|
||||
def _split_blocks(self, content: str) -> list[dict[str, Any]]:
|
||||
"""Split curl output into request/response blocks."""
|
||||
blocks: list[dict[str, Any]] = []
|
||||
current_block: dict[str, Any] = {}
|
||||
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
request_match = self.REQUEST_LINE_RE.match(line)
|
||||
if request_match:
|
||||
if current_block.get("request"):
|
||||
blocks.append(current_block)
|
||||
current_block = {
|
||||
"request": {
|
||||
"method": request_match.group(1),
|
||||
"url": request_match.group(2),
|
||||
"http_version": request_match.group(3),
|
||||
},
|
||||
"headers": [],
|
||||
"body": None,
|
||||
"response": None,
|
||||
}
|
||||
continue
|
||||
|
||||
response_match = self.RESPONSE_LINE_RE.match(line)
|
||||
if response_match:
|
||||
if current_block.get("request"):
|
||||
current_block["response"] = {
|
||||
"http_version": response_match.group(1),
|
||||
"status": int(response_match.group(2)),
|
||||
"status_text": response_match.group(3),
|
||||
}
|
||||
continue
|
||||
|
||||
header_match = self.HEADER_RE.match(line)
|
||||
if header_match:
|
||||
direction = header_match.group(1)
|
||||
name = header_match.group(2)
|
||||
value = header_match.group(3)
|
||||
if direction == ">" and "headers" in current_block:
|
||||
current_block["headers"].append((name, value))
|
||||
continue
|
||||
|
||||
if current_block and current_block.get("response") and line.strip():
|
||||
if current_block["response"].get("body") is None:
|
||||
current_block["response"]["body"] = ""
|
||||
current_block["response"]["body"] += line + "\n"
|
||||
|
||||
if current_block.get("request"):
|
||||
blocks.append(current_block)
|
||||
|
||||
return blocks
|
||||
|
||||
def _parse_block(
|
||||
self, block: dict[str, Any], idx: int, source_file: str | None
|
||||
) -> HTTPEntry | None:
|
||||
"""Parse a single request/response block."""
|
||||
if not block.get("request") or not block.get("response"):
|
||||
return None
|
||||
|
||||
req_data = block["request"]
|
||||
resp_data = block["response"]
|
||||
|
||||
headers = dict(block.get("headers", []))
|
||||
|
||||
request = Request(
|
||||
method=req_data.get("method", "GET"),
|
||||
url=req_data.get("url", "/"),
|
||||
http_version=req_data.get("http_version", "HTTP/1.1"),
|
||||
headers=headers,
|
||||
body=block.get("body"),
|
||||
)
|
||||
|
||||
response_body = resp_data.get("body", "")
|
||||
if response_body:
|
||||
response_body = response_body.strip()
|
||||
|
||||
response = Response(
|
||||
status=resp_data.get("status", 0),
|
||||
status_text=resp_data.get("status_text", ""),
|
||||
http_version=resp_data.get("http_version", "HTTP/1.1"),
|
||||
headers={},
|
||||
body=response_body if response_body else None,
|
||||
content_type=headers.get("Content-Type") or headers.get("content-type"),
|
||||
)
|
||||
|
||||
return HTTPEntry(
|
||||
id=f"curl-{idx}",
|
||||
request=request,
|
||||
response=response,
|
||||
timestamp=datetime.now(),
|
||||
source_file=source_file,
|
||||
)
|
||||
133
http_log_explorer/parsers/devtools_parser.py
Normal file
133
http_log_explorer/parsers/devtools_parser.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Parser for Chrome DevTools network export format."""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from http_log_explorer.models import HTTPEntry, Request, Response
|
||||
from http_log_explorer.parsers import ParserInterface
|
||||
|
||||
|
||||
class DevToolsParser(ParserInterface):
|
||||
"""Parser for Chrome DevTools network export JSON."""
|
||||
|
||||
@staticmethod
|
||||
def get_parser_name() -> str:
|
||||
return "DevTools"
|
||||
|
||||
def can_parse(self, content: str | bytes) -> bool:
|
||||
"""Check if content appears to be DevTools network export."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="ignore")
|
||||
try:
|
||||
data = json.loads(content)
|
||||
if isinstance(data, list):
|
||||
return all(
|
||||
"request" in item and "response" in item for item in data[:3] if isinstance(item, dict)
|
||||
)
|
||||
if isinstance(data, dict):
|
||||
has_log = "log" in data
|
||||
has_entries = "entries" in data.get("log", {})
|
||||
has_creator = "creator" in data.get("log", {})
|
||||
return has_log and has_entries and not has_creator
|
||||
except json.JSONDecodeError:
|
||||
return False
|
||||
return False
|
||||
|
||||
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
|
||||
"""Parse DevTools network export into HTTPEntry objects."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="replace")
|
||||
|
||||
try:
|
||||
data = json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON format: {e}") from e
|
||||
|
||||
if isinstance(data, dict) and "log" in data:
|
||||
entries_data = data.get("log", {}).get("entries", [])
|
||||
elif isinstance(data, list):
|
||||
entries_data = data
|
||||
else:
|
||||
raise ValueError("Unrecognized DevTools format")
|
||||
|
||||
entries: list[HTTPEntry] = []
|
||||
for idx, entry_data in enumerate(entries_data):
|
||||
try:
|
||||
entry = self._convert_entry(entry_data, idx, source_file)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return entries
|
||||
|
||||
def _convert_entry(
|
||||
self, entry_data: dict[str, Any], idx: int, source_file: str | None
|
||||
) -> HTTPEntry | None:
|
||||
"""Convert a DevTools entry to our HTTPEntry model."""
|
||||
request_data = entry_data.get("request", {})
|
||||
response_data = entry_data.get("response", {})
|
||||
|
||||
if not request_data or not response_data:
|
||||
return None
|
||||
|
||||
request = Request(
|
||||
method=request_data.get("method", "GET"),
|
||||
url=request_data.get("url", ""),
|
||||
http_version=request_data.get("httpVersion", "HTTP/1.1"),
|
||||
headers=self._parse_headers(request_data.get("headers", {})),
|
||||
body=request_data.get("postData", {}).get("text") if request_data.get("postData") else None,
|
||||
query_params=self._parse_query_params(request_data.get("queryString", [])),
|
||||
)
|
||||
|
||||
response = Response(
|
||||
status=response_data.get("status", 0),
|
||||
status_text=response_data.get("statusText", ""),
|
||||
http_version=response_data.get("httpVersion", "HTTP/1.1"),
|
||||
headers=self._parse_headers(response_data.get("headers", {})),
|
||||
body=response_data.get("content", {}).get("text") if isinstance(response_data.get("content"), dict) else None,
|
||||
content_type=response_data.get("content", {}).get("mimeType") if isinstance(response_data.get("content"), dict) else None,
|
||||
response_time_ms=self._parse_time(entry_data),
|
||||
)
|
||||
|
||||
timestamp = self._parse_timestamp(entry_data)
|
||||
|
||||
return HTTPEntry(
|
||||
id=f"devtools-{idx}",
|
||||
request=request,
|
||||
response=response,
|
||||
timestamp=timestamp,
|
||||
server_ip=entry_data.get("serverIPAddress"),
|
||||
connection=entry_data.get("connection"),
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
def _parse_headers(self, headers: dict[str, Any] | list) -> dict[str, str]:
|
||||
"""Parse headers to dictionary."""
|
||||
if isinstance(headers, dict):
|
||||
return dict(headers)
|
||||
if isinstance(headers, list):
|
||||
return {h.get("name", ""): h.get("value", "") for h in headers}
|
||||
return {}
|
||||
|
||||
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
|
||||
"""Parse query string list to dictionary."""
|
||||
if isinstance(query_string, list):
|
||||
return {p.get("name", ""): p.get("value", "") for p in query_string}
|
||||
return {}
|
||||
|
||||
def _parse_time(self, entry_data: dict[str, Any]) -> float | None:
|
||||
"""Parse time from DevTools entry."""
|
||||
if "time" in entry_data:
|
||||
return float(entry_data["time"])
|
||||
return None
|
||||
|
||||
def _parse_timestamp(self, entry_data: dict[str, Any]) -> datetime | None:
|
||||
"""Parse timestamp from DevTools entry."""
|
||||
if "startedDateTime" in entry_data:
|
||||
try:
|
||||
return datetime.fromisoformat(entry_data["startedDateTime"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
return None
|
||||
47
http_log_explorer/parsers/factory.py
Normal file
47
http_log_explorer/parsers/factory.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Parser factory for creating appropriate parsers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from http_log_explorer.parsers.curl_parser import CurlParser
|
||||
from http_log_explorer.parsers.devtools_parser import DevToolsParser
|
||||
from http_log_explorer.parsers.har_parser import HARParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from http_log_explorer.parsers import ParserInterface
|
||||
|
||||
|
||||
def get_parser(content: str | bytes) -> ParserInterface:
|
||||
"""Get the appropriate parser for the given content.
|
||||
|
||||
Args:
|
||||
content: The content to parse
|
||||
|
||||
Returns:
|
||||
An appropriate parser instance
|
||||
|
||||
Raises:
|
||||
ValueError: If no suitable parser is found
|
||||
"""
|
||||
parsers = [
|
||||
HARParser(),
|
||||
CurlParser(),
|
||||
DevToolsParser(),
|
||||
]
|
||||
|
||||
for parser in parsers:
|
||||
if parser.can_parse(content):
|
||||
return parser
|
||||
|
||||
raise ValueError(
|
||||
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
|
||||
)
|
||||
|
||||
|
||||
def get_all_parsers() -> list[ParserInterface]:
|
||||
"""Get all available parser instances."""
|
||||
return [HARParser(), CurlParser(), DevToolsParser()]
|
||||
|
||||
|
||||
__all__ = ["get_parser", "get_all_parsers"]
|
||||
146
http_log_explorer/parsers/har_parser.py
Normal file
146
http_log_explorer/parsers/har_parser.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""HAR file parser using haralyzer."""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from haralyzer import HarParser
|
||||
|
||||
from http_log_explorer.models import HTTPEntry, Request, Response
|
||||
from http_log_explorer.parsers import ParserInterface
|
||||
|
||||
|
||||
class HARParser(ParserInterface):
|
||||
"""Parser for HAR (HTTP Archive) files."""
|
||||
|
||||
@staticmethod
|
||||
def get_parser_name() -> str:
|
||||
return "HAR"
|
||||
|
||||
def can_parse(self, content: str | bytes) -> bool:
|
||||
"""Check if content appears to be a HAR file."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="ignore")
|
||||
try:
|
||||
data = json.loads(content)
|
||||
has_log = "log" in data
|
||||
has_entries = "entries" in data.get("log", {})
|
||||
has_creator = "creator" in data.get("log", {})
|
||||
return has_log and has_entries and has_creator
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
return False
|
||||
|
||||
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
|
||||
"""Parse HAR content into HTTPEntry objects."""
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="replace")
|
||||
|
||||
try:
|
||||
data = json.loads(content)
|
||||
har_parser = HarParser(data)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid HAR format: {e}") from e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid HAR format: {e}") from e
|
||||
|
||||
entries: list[HTTPEntry] = []
|
||||
har_entries = har_parser.har_data.get("entries", [])
|
||||
for idx, har_entry in enumerate(har_entries):
|
||||
try:
|
||||
entry = self._convert_har_entry(har_entry, idx, source_file)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return entries
|
||||
|
||||
def _convert_har_entry(
|
||||
self, har_entry: Any, idx: int, source_file: str | None
|
||||
) -> HTTPEntry | None:
|
||||
"""Convert a haralyzer entry to our HTTPEntry model."""
|
||||
request_data = har_entry.get("request")
|
||||
response_data = har_entry.get("response")
|
||||
|
||||
if not request_data or not response_data:
|
||||
return None
|
||||
|
||||
request = Request(
|
||||
method=request_data.get("method", "GET"),
|
||||
url=self._build_url(request_data),
|
||||
http_version=request_data.get("httpVersion", "HTTP/1.1"),
|
||||
headers=self._parse_headers(request_data.get("headers", [])),
|
||||
body=self._get_request_body(request_data),
|
||||
query_params=self._parse_query_params(request_data.get("queryString", [])),
|
||||
)
|
||||
|
||||
response = Response(
|
||||
status=response_data.get("status", 0),
|
||||
status_text=response_data.get("statusText", ""),
|
||||
http_version=response_data.get("httpVersion", "HTTP/1.1"),
|
||||
headers=self._parse_headers(response_data.get("headers", [])),
|
||||
body=self._get_response_body(response_data),
|
||||
content_type=self._get_content_type(response_data.get("content", {})),
|
||||
response_time_ms=har_entry.get("time", None),
|
||||
)
|
||||
|
||||
timestamp = self._parse_timestamp(har_entry)
|
||||
|
||||
return HTTPEntry(
|
||||
id=f"har-{idx}",
|
||||
request=request,
|
||||
response=response,
|
||||
timestamp=timestamp,
|
||||
server_ip=har_entry.get("serverIPAddress", None),
|
||||
connection=har_entry.get("connection", None),
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
def _build_url(self, request_data: dict[str, Any]) -> str:
|
||||
"""Build full URL from request data."""
|
||||
url = request_data.get("url", "")
|
||||
if not url:
|
||||
host = ""
|
||||
for header in request_data.get("headers", []):
|
||||
if header.get("name", "").lower() == "host":
|
||||
host = header.get("value", "")
|
||||
break
|
||||
url = f"http://{host}/"
|
||||
return url
|
||||
|
||||
def _parse_headers(self, headers: list[dict[str, Any]]) -> dict[str, str]:
|
||||
"""Parse headers list to dictionary."""
|
||||
return {h.get("name", ""): h.get("value", "") for h in headers}
|
||||
|
||||
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
|
||||
"""Parse query string list to dictionary."""
|
||||
return {p.get("name", ""): p.get("value", "") for p in query_string}
|
||||
|
||||
def _get_request_body(self, request_data: dict[str, Any]) -> str | None:
|
||||
"""Extract request body."""
|
||||
post_data = request_data.get("postData", {})
|
||||
if post_data:
|
||||
if isinstance(post_data, dict):
|
||||
return post_data.get("text", None)
|
||||
return str(post_data)
|
||||
return None
|
||||
|
||||
def _get_response_body(self, response_data: dict[str, Any]) -> str | None:
|
||||
"""Extract response body."""
|
||||
content = response_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
return content.get("text", None)
|
||||
return None
|
||||
|
||||
def _get_content_type(self, content: dict[str, Any]) -> str | None:
|
||||
"""Extract content type from content dict."""
|
||||
if isinstance(content, dict):
|
||||
return content.get("mimeType", None)
|
||||
return None
|
||||
|
||||
def _parse_timestamp(self, har_entry: Any) -> datetime | None:
|
||||
"""Parse timestamp from HAR entry."""
|
||||
started_datetime = getattr(har_entry, "started_datetime", None)
|
||||
if started_datetime:
|
||||
return started_datetime
|
||||
return None
|
||||
Reference in New Issue
Block a user