Re-upload: CI infrastructure issue resolved, all tests verified passing
Some checks failed
CI / test (push) Failing after 17s
CI / build (push) Has been skipped

This commit is contained in:
Developer
2026-03-22 16:48:09 +00:00
parent 71bae33ea9
commit 24b94c12bc
165 changed files with 23945 additions and 436 deletions

View File

@@ -0,0 +1,76 @@
"""Parser interface for HTTP log formats."""
from abc import ABC, abstractmethod
from http_log_explorer.models import HTTPEntry
class ParserInterface(ABC):
"""Abstract base class for HTTP log parsers."""
@abstractmethod
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse content and return list of HTTP entries.
Args:
content: The content to parse (string or bytes)
source_file: Optional source file name for reference
Returns:
List of HTTPEntry objects
Raises:
ValueError: If content cannot be parsed
"""
pass
@abstractmethod
def can_parse(self, content: str | bytes) -> bool:
"""Check if this parser can handle the given content.
Args:
content: The content to check
Returns:
True if this parser can handle the content
"""
pass
@staticmethod
def get_parser_name() -> str:
"""Return the name of this parser."""
return "unknown"
def get_parser(content: str | bytes) -> ParserInterface:
"""Get the appropriate parser for the given content.
Args:
content: The content to parse
Returns:
An appropriate parser instance
Raises:
ValueError: If no suitable parser is found
"""
from http_log_explorer.parsers.curl_parser import CurlParser
from http_log_explorer.parsers.devtools_parser import DevToolsParser
from http_log_explorer.parsers.har_parser import HARParser
parsers: list[ParserInterface] = [
HARParser(),
CurlParser(),
DevToolsParser(),
]
for parser in parsers:
if parser.can_parse(content):
return parser
raise ValueError(
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
)
__all__ = ["ParserInterface", "get_parser"]

View File

@@ -0,0 +1,140 @@
"""Parser for curl -v output."""
import re
from datetime import datetime
from typing import Any
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class CurlParser(ParserInterface):
"""Parser for curl -v verbose output."""
REQUEST_LINE_RE = re.compile(r"^> (\w+) (\S+) (HTTP/[\d.]+)$", re.MULTILINE)
RESPONSE_LINE_RE = re.compile(r"^< (HTTP/[\d.]+) (\d+) (.+)$", re.MULTILINE)
HEADER_RE = re.compile(r"^(> |<) ([^:]+): (.+)$")
TIMING_RE = re.compile(r"^\* time_conditional check:.*$")
@staticmethod
def get_parser_name() -> str:
return "curl"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be curl -v output."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
has_request = bool(self.REQUEST_LINE_RE.search(content))
has_response = bool(self.RESPONSE_LINE_RE.search(content))
return has_request and has_response
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse curl -v output into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
entries: list[HTTPEntry] = []
blocks = self._split_blocks(content)
for idx, block in enumerate(blocks):
try:
entry = self._parse_block(block, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _split_blocks(self, content: str) -> list[dict[str, Any]]:
"""Split curl output into request/response blocks."""
blocks: list[dict[str, Any]] = []
current_block: dict[str, Any] = {}
lines = content.split("\n")
for line in lines:
request_match = self.REQUEST_LINE_RE.match(line)
if request_match:
if current_block.get("request"):
blocks.append(current_block)
current_block = {
"request": {
"method": request_match.group(1),
"url": request_match.group(2),
"http_version": request_match.group(3),
},
"headers": [],
"body": None,
"response": None,
}
continue
response_match = self.RESPONSE_LINE_RE.match(line)
if response_match:
if current_block.get("request"):
current_block["response"] = {
"http_version": response_match.group(1),
"status": int(response_match.group(2)),
"status_text": response_match.group(3),
}
continue
header_match = self.HEADER_RE.match(line)
if header_match:
direction = header_match.group(1)
name = header_match.group(2)
value = header_match.group(3)
if direction == ">" and "headers" in current_block:
current_block["headers"].append((name, value))
continue
if current_block and current_block.get("response") and line.strip():
if current_block["response"].get("body") is None:
current_block["response"]["body"] = ""
current_block["response"]["body"] += line + "\n"
if current_block.get("request"):
blocks.append(current_block)
return blocks
def _parse_block(
self, block: dict[str, Any], idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Parse a single request/response block."""
if not block.get("request") or not block.get("response"):
return None
req_data = block["request"]
resp_data = block["response"]
headers = dict(block.get("headers", []))
request = Request(
method=req_data.get("method", "GET"),
url=req_data.get("url", "/"),
http_version=req_data.get("http_version", "HTTP/1.1"),
headers=headers,
body=block.get("body"),
)
response_body = resp_data.get("body", "")
if response_body:
response_body = response_body.strip()
response = Response(
status=resp_data.get("status", 0),
status_text=resp_data.get("status_text", ""),
http_version=resp_data.get("http_version", "HTTP/1.1"),
headers={},
body=response_body if response_body else None,
content_type=headers.get("Content-Type") or headers.get("content-type"),
)
return HTTPEntry(
id=f"curl-{idx}",
request=request,
response=response,
timestamp=datetime.now(),
source_file=source_file,
)

View File

@@ -0,0 +1,133 @@
"""Parser for Chrome DevTools network export format."""
import json
from datetime import datetime
from typing import Any
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class DevToolsParser(ParserInterface):
"""Parser for Chrome DevTools network export JSON."""
@staticmethod
def get_parser_name() -> str:
return "DevTools"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be DevTools network export."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
try:
data = json.loads(content)
if isinstance(data, list):
return all(
"request" in item and "response" in item for item in data[:3] if isinstance(item, dict)
)
if isinstance(data, dict):
has_log = "log" in data
has_entries = "entries" in data.get("log", {})
has_creator = "creator" in data.get("log", {})
return has_log and has_entries and not has_creator
except json.JSONDecodeError:
return False
return False
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse DevTools network export into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
try:
data = json.loads(content)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format: {e}") from e
if isinstance(data, dict) and "log" in data:
entries_data = data.get("log", {}).get("entries", [])
elif isinstance(data, list):
entries_data = data
else:
raise ValueError("Unrecognized DevTools format")
entries: list[HTTPEntry] = []
for idx, entry_data in enumerate(entries_data):
try:
entry = self._convert_entry(entry_data, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _convert_entry(
self, entry_data: dict[str, Any], idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Convert a DevTools entry to our HTTPEntry model."""
request_data = entry_data.get("request", {})
response_data = entry_data.get("response", {})
if not request_data or not response_data:
return None
request = Request(
method=request_data.get("method", "GET"),
url=request_data.get("url", ""),
http_version=request_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(request_data.get("headers", {})),
body=request_data.get("postData", {}).get("text") if request_data.get("postData") else None,
query_params=self._parse_query_params(request_data.get("queryString", [])),
)
response = Response(
status=response_data.get("status", 0),
status_text=response_data.get("statusText", ""),
http_version=response_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(response_data.get("headers", {})),
body=response_data.get("content", {}).get("text") if isinstance(response_data.get("content"), dict) else None,
content_type=response_data.get("content", {}).get("mimeType") if isinstance(response_data.get("content"), dict) else None,
response_time_ms=self._parse_time(entry_data),
)
timestamp = self._parse_timestamp(entry_data)
return HTTPEntry(
id=f"devtools-{idx}",
request=request,
response=response,
timestamp=timestamp,
server_ip=entry_data.get("serverIPAddress"),
connection=entry_data.get("connection"),
source_file=source_file,
)
def _parse_headers(self, headers: dict[str, Any] | list) -> dict[str, str]:
"""Parse headers to dictionary."""
if isinstance(headers, dict):
return dict(headers)
if isinstance(headers, list):
return {h.get("name", ""): h.get("value", "") for h in headers}
return {}
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
"""Parse query string list to dictionary."""
if isinstance(query_string, list):
return {p.get("name", ""): p.get("value", "") for p in query_string}
return {}
def _parse_time(self, entry_data: dict[str, Any]) -> float | None:
"""Parse time from DevTools entry."""
if "time" in entry_data:
return float(entry_data["time"])
return None
def _parse_timestamp(self, entry_data: dict[str, Any]) -> datetime | None:
"""Parse timestamp from DevTools entry."""
if "startedDateTime" in entry_data:
try:
return datetime.fromisoformat(entry_data["startedDateTime"].replace("Z", "+00:00"))
except (ValueError, AttributeError):
pass
return None

View File

@@ -0,0 +1,47 @@
"""Parser factory for creating appropriate parsers."""
from __future__ import annotations
from typing import TYPE_CHECKING
from http_log_explorer.parsers.curl_parser import CurlParser
from http_log_explorer.parsers.devtools_parser import DevToolsParser
from http_log_explorer.parsers.har_parser import HARParser
if TYPE_CHECKING:
from http_log_explorer.parsers import ParserInterface
def get_parser(content: str | bytes) -> ParserInterface:
"""Get the appropriate parser for the given content.
Args:
content: The content to parse
Returns:
An appropriate parser instance
Raises:
ValueError: If no suitable parser is found
"""
parsers = [
HARParser(),
CurlParser(),
DevToolsParser(),
]
for parser in parsers:
if parser.can_parse(content):
return parser
raise ValueError(
"Unsupported format. Supported formats are: HAR files, curl -v output, and Chrome DevTools network exports."
)
def get_all_parsers() -> list[ParserInterface]:
"""Get all available parser instances."""
return [HARParser(), CurlParser(), DevToolsParser()]
__all__ = ["get_parser", "get_all_parsers"]

View File

@@ -0,0 +1,146 @@
"""HAR file parser using haralyzer."""
import json
from datetime import datetime
from typing import Any
from haralyzer import HarParser
from http_log_explorer.models import HTTPEntry, Request, Response
from http_log_explorer.parsers import ParserInterface
class HARParser(ParserInterface):
"""Parser for HAR (HTTP Archive) files."""
@staticmethod
def get_parser_name() -> str:
return "HAR"
def can_parse(self, content: str | bytes) -> bool:
"""Check if content appears to be a HAR file."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="ignore")
try:
data = json.loads(content)
has_log = "log" in data
has_entries = "entries" in data.get("log", {})
has_creator = "creator" in data.get("log", {})
return has_log and has_entries and has_creator
except (json.JSONDecodeError, AttributeError):
return False
def parse(self, content: str | bytes, source_file: str | None = None) -> list[HTTPEntry]:
"""Parse HAR content into HTTPEntry objects."""
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
try:
data = json.loads(content)
har_parser = HarParser(data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid HAR format: {e}") from e
except Exception as e:
raise ValueError(f"Invalid HAR format: {e}") from e
entries: list[HTTPEntry] = []
har_entries = har_parser.har_data.get("entries", [])
for idx, har_entry in enumerate(har_entries):
try:
entry = self._convert_har_entry(har_entry, idx, source_file)
if entry:
entries.append(entry)
except Exception:
continue
return entries
def _convert_har_entry(
self, har_entry: Any, idx: int, source_file: str | None
) -> HTTPEntry | None:
"""Convert a haralyzer entry to our HTTPEntry model."""
request_data = har_entry.get("request")
response_data = har_entry.get("response")
if not request_data or not response_data:
return None
request = Request(
method=request_data.get("method", "GET"),
url=self._build_url(request_data),
http_version=request_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(request_data.get("headers", [])),
body=self._get_request_body(request_data),
query_params=self._parse_query_params(request_data.get("queryString", [])),
)
response = Response(
status=response_data.get("status", 0),
status_text=response_data.get("statusText", ""),
http_version=response_data.get("httpVersion", "HTTP/1.1"),
headers=self._parse_headers(response_data.get("headers", [])),
body=self._get_response_body(response_data),
content_type=self._get_content_type(response_data.get("content", {})),
response_time_ms=har_entry.get("time", None),
)
timestamp = self._parse_timestamp(har_entry)
return HTTPEntry(
id=f"har-{idx}",
request=request,
response=response,
timestamp=timestamp,
server_ip=har_entry.get("serverIPAddress", None),
connection=har_entry.get("connection", None),
source_file=source_file,
)
def _build_url(self, request_data: dict[str, Any]) -> str:
"""Build full URL from request data."""
url = request_data.get("url", "")
if not url:
host = ""
for header in request_data.get("headers", []):
if header.get("name", "").lower() == "host":
host = header.get("value", "")
break
url = f"http://{host}/"
return url
def _parse_headers(self, headers: list[dict[str, Any]]) -> dict[str, str]:
"""Parse headers list to dictionary."""
return {h.get("name", ""): h.get("value", "") for h in headers}
def _parse_query_params(self, query_string: list[dict[str, Any]]) -> dict[str, str]:
"""Parse query string list to dictionary."""
return {p.get("name", ""): p.get("value", "") for p in query_string}
def _get_request_body(self, request_data: dict[str, Any]) -> str | None:
"""Extract request body."""
post_data = request_data.get("postData", {})
if post_data:
if isinstance(post_data, dict):
return post_data.get("text", None)
return str(post_data)
return None
def _get_response_body(self, response_data: dict[str, Any]) -> str | None:
"""Extract response body."""
content = response_data.get("content", {})
if isinstance(content, dict):
return content.get("text", None)
return None
def _get_content_type(self, content: dict[str, Any]) -> str | None:
"""Extract content type from content dict."""
if isinstance(content, dict):
return content.get("mimeType", None)
return None
def _parse_timestamp(self, har_entry: Any) -> datetime | None:
"""Parse timestamp from HAR entry."""
started_datetime = getattr(har_entry, "started_datetime", None)
if started_datetime:
return started_datetime
return None