Re-upload: CI infrastructure issue resolved, all tests verified passing
This commit is contained in:
431
http_log_explorer/generators/openapi_generator.py
Normal file
431
http_log_explorer/generators/openapi_generator.py
Normal file
@@ -0,0 +1,431 @@
|
||||
"""OpenAPI 3.0 spec generator from HTTP traffic."""
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from http_log_explorer.models import HTTPEntry
|
||||
|
||||
try:
|
||||
from openapi_spec_validator import validate
|
||||
VALIDATION_AVAILABLE = True
|
||||
except ImportError:
|
||||
VALIDATION_AVAILABLE = False
|
||||
|
||||
|
||||
class OpenAPIGenerator:
|
||||
"""Generate OpenAPI 3.0 specification from observed traffic."""
|
||||
|
||||
def __init__(self, entries: list[HTTPEntry]) -> None:
|
||||
"""Initialize with HTTP entries.
|
||||
|
||||
Args:
|
||||
entries: List of HTTPEntry objects
|
||||
"""
|
||||
self.entries = entries
|
||||
self.spec: dict[str, Any] = {}
|
||||
self._schemas: dict[str, dict[str, Any]] = {}
|
||||
self._path_items: dict[str, dict[str, Any]] = defaultdict(dict)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
title: str = "API",
|
||||
version: str = "1.0.0",
|
||||
description: str = "Generated from traffic analysis",
|
||||
validate_spec: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Generate OpenAPI spec from traffic.
|
||||
|
||||
Args:
|
||||
title: API title
|
||||
version: API version
|
||||
description: API description
|
||||
validate_spec: Whether to validate the generated spec
|
||||
|
||||
Returns:
|
||||
OpenAPI spec dictionary
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails and validate_spec is True
|
||||
"""
|
||||
self.spec = {
|
||||
"openapi": "3.0.3",
|
||||
"info": {
|
||||
"title": title,
|
||||
"version": version,
|
||||
"description": description,
|
||||
},
|
||||
"paths": {},
|
||||
"components": {
|
||||
"schemas": {},
|
||||
},
|
||||
}
|
||||
|
||||
self._schemas = {}
|
||||
self._path_items = defaultdict(dict)
|
||||
|
||||
self._infer_paths()
|
||||
self._infer_schemas()
|
||||
|
||||
self.spec["paths"] = dict(self._path_items)
|
||||
self.spec["components"]["schemas"] = self._schemas
|
||||
|
||||
if validate_spec and VALIDATION_AVAILABLE:
|
||||
try:
|
||||
validate(self.spec)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Generated spec is invalid: {e}") from e
|
||||
|
||||
return self.spec
|
||||
|
||||
def _infer_paths(self) -> None:
|
||||
"""Infer API paths from traffic."""
|
||||
for entry in self.entries:
|
||||
path = self._extract_path(entry.endpoint)
|
||||
method = entry.request.method.lower()
|
||||
|
||||
if path not in self._path_items:
|
||||
self._path_items[path] = {}
|
||||
|
||||
path_params = self._extract_path_params(path)
|
||||
if path_params and "parameters" not in self._path_items[path]:
|
||||
self._path_items[path]["parameters"] = path_params
|
||||
|
||||
operation: dict[str, Any] = {
|
||||
"responses": self._generate_responses(entry),
|
||||
}
|
||||
|
||||
if entry.request.headers:
|
||||
operation["parameters"] = self._generate_parameters(entry)
|
||||
|
||||
if entry.request.body:
|
||||
request_body = self._generate_request_body(entry)
|
||||
if request_body:
|
||||
operation["requestBody"] = request_body
|
||||
|
||||
self._path_items[path][method] = operation
|
||||
|
||||
def _extract_path_params(self, path: str) -> list[dict[str, Any]]:
|
||||
"""Extract path parameters from a path string.
|
||||
|
||||
Args:
|
||||
path: The path string like '/users/{id}'
|
||||
|
||||
Returns:
|
||||
List of parameter definitions
|
||||
"""
|
||||
params = []
|
||||
import re
|
||||
param_pattern = re.compile(r"\{([^}]+)\}")
|
||||
for match in param_pattern.finditer(path):
|
||||
param_name = match.group(1)
|
||||
params.append({
|
||||
"name": param_name,
|
||||
"in": "path",
|
||||
"required": True,
|
||||
"schema": {"type": "string"},
|
||||
})
|
||||
return params
|
||||
|
||||
def _extract_path(self, endpoint: str) -> str:
|
||||
"""Extract and normalize path from endpoint."""
|
||||
path = endpoint
|
||||
|
||||
parts = path.split("/")
|
||||
normalized_parts = []
|
||||
|
||||
for part in parts:
|
||||
if not part:
|
||||
normalized_parts.append("")
|
||||
elif part.isdigit():
|
||||
normalized_parts.append("{" + self._get_param_name(path, part) + "}")
|
||||
elif self._is_uuid(part):
|
||||
normalized_parts.append("{uuid}")
|
||||
elif self._is_hash(part):
|
||||
normalized_parts.append("{id}")
|
||||
else:
|
||||
normalized_parts.append(part)
|
||||
|
||||
return "/".join(normalized_parts) or "/"
|
||||
|
||||
def _get_param_name(self, path: str, value: str) -> str:
|
||||
"""Generate parameter name based on path context."""
|
||||
path_lower = path.lower()
|
||||
if "user" in path_lower or "id" in path_lower:
|
||||
return "id"
|
||||
if "page" in path_lower or "offset" in path_lower:
|
||||
return "page"
|
||||
if "limit" in path_lower or "size" in path_lower:
|
||||
return "limit"
|
||||
return "id"
|
||||
|
||||
def _is_uuid(self, s: str) -> bool:
|
||||
"""Check if string looks like a UUID."""
|
||||
uuid_pattern = re.compile(
|
||||
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
return bool(uuid_pattern.match(s))
|
||||
|
||||
def _is_hash(self, s: str) -> bool:
|
||||
"""Check if string looks like a hash."""
|
||||
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
|
||||
return bool(hash_pattern.match(s))
|
||||
|
||||
def _generate_responses(self, entry: HTTPEntry) -> dict[str, Any]:
|
||||
"""Generate response definitions."""
|
||||
content = {}
|
||||
ct = entry.content_type
|
||||
|
||||
if ct and "json" in ct.lower():
|
||||
schema = self._extract_schema_from_body(entry.response.body, "response")
|
||||
content = {
|
||||
"application/json": {
|
||||
"schema": schema,
|
||||
}
|
||||
}
|
||||
elif entry.response.body:
|
||||
content = {
|
||||
"text/plain": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status = entry.response.status
|
||||
status_text = entry.response.status_text or "OK"
|
||||
|
||||
return {
|
||||
str(status): {
|
||||
"description": status_text,
|
||||
"content": content,
|
||||
}
|
||||
}
|
||||
|
||||
def _generate_parameters(self, entry: HTTPEntry) -> list[dict[str, Any]]:
|
||||
"""Generate parameter definitions from query string."""
|
||||
params = []
|
||||
|
||||
for name, value in entry.request.query_params.items():
|
||||
param: dict[str, Any] = {
|
||||
"name": name,
|
||||
"in": "query",
|
||||
"schema": {
|
||||
"type": self._infer_type(value),
|
||||
},
|
||||
}
|
||||
if value:
|
||||
param["example"] = value
|
||||
params.append(param)
|
||||
|
||||
return params
|
||||
|
||||
def _generate_request_body(self, entry: HTTPEntry) -> dict[str, Any] | None:
|
||||
"""Generate request body definition."""
|
||||
body = entry.request.body
|
||||
if not body:
|
||||
return None
|
||||
|
||||
content: dict[str, Any] = {}
|
||||
|
||||
if self._is_json(body):
|
||||
schema = self._extract_schema_from_body(body, "request")
|
||||
content = {
|
||||
"application/json": {
|
||||
"schema": schema,
|
||||
}
|
||||
}
|
||||
else:
|
||||
content = {
|
||||
"text/plain": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"required": True,
|
||||
}
|
||||
|
||||
def _extract_schema_from_body(
|
||||
self, body: str | None, prefix: str = "schema"
|
||||
) -> dict[str, Any]:
|
||||
"""Extract JSON schema from body content.
|
||||
|
||||
Args:
|
||||
body: Body content
|
||||
prefix: Prefix for schema name
|
||||
|
||||
Returns:
|
||||
JSON Schema dictionary
|
||||
"""
|
||||
if not body:
|
||||
return {"type": "string"}
|
||||
|
||||
if not self._is_json(body):
|
||||
return {"type": "string"}
|
||||
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {"type": "string"}
|
||||
|
||||
if isinstance(data, dict):
|
||||
schema_name = f"{prefix}Schema"
|
||||
schema = self._dict_to_schema(data, schema_name)
|
||||
self._schemas[schema_name] = schema
|
||||
return {"$ref": f"#/components/schemas/{schema_name}"}
|
||||
elif isinstance(data, list) and data:
|
||||
return {
|
||||
"type": "array",
|
||||
"items": self._dict_to_schema(data[0], f"{prefix}Item"),
|
||||
}
|
||||
|
||||
return {"type": "string"}
|
||||
|
||||
def _dict_to_schema(
|
||||
self, data: dict[str, Any], name: str
|
||||
) -> dict[str, Any]:
|
||||
"""Convert dictionary to JSON schema.
|
||||
|
||||
Args:
|
||||
data: Dictionary to convert
|
||||
name: Schema name
|
||||
|
||||
Returns:
|
||||
JSON Schema dictionary
|
||||
"""
|
||||
properties: dict[str, Any] = {}
|
||||
required: list[str] = []
|
||||
|
||||
for key, value in data.items():
|
||||
prop_schema = self._value_to_schema(value, key)
|
||||
properties[key] = prop_schema
|
||||
required.append(key)
|
||||
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
"required": required,
|
||||
}
|
||||
|
||||
def _value_to_schema(self, value: Any, key: str) -> dict[str, Any]:
|
||||
"""Convert a value to JSON schema.
|
||||
|
||||
Args:
|
||||
value: Value to convert
|
||||
key: Key name (for nested object naming)
|
||||
|
||||
Returns:
|
||||
JSON Schema for the value
|
||||
"""
|
||||
if value is None:
|
||||
return {"type": "string", "nullable": True}
|
||||
elif isinstance(value, bool):
|
||||
return {"type": "boolean"}
|
||||
elif isinstance(value, int):
|
||||
return {"type": "integer"}
|
||||
elif isinstance(value, float):
|
||||
return {"type": "number"}
|
||||
elif isinstance(value, str):
|
||||
if self._is_json(value):
|
||||
nested = self._dict_to_schema(json.loads(value), f"{key}Schema")
|
||||
return nested
|
||||
return {"type": "string"}
|
||||
elif isinstance(value, dict):
|
||||
schema_name = f"{key}Schema"
|
||||
nested = self._dict_to_schema(value, schema_name)
|
||||
self._schemas[schema_name] = nested
|
||||
return {"$ref": f"#/components/schemas/{schema_name}"}
|
||||
elif isinstance(value, list):
|
||||
if value:
|
||||
item_schema = self._value_to_schema(value[0], f"{key}Item")
|
||||
return {"type": "array", "items": item_schema}
|
||||
return {"type": "array", "items": {"type": "string"}}
|
||||
|
||||
return {"type": "string"}
|
||||
|
||||
def _infer_type(self, value: str) -> str:
|
||||
"""Infer JSON type from string value.
|
||||
|
||||
Args:
|
||||
value: String value
|
||||
|
||||
Returns:
|
||||
JSON type string
|
||||
"""
|
||||
if not value:
|
||||
return "string"
|
||||
try:
|
||||
int(value)
|
||||
return "integer"
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
float(value)
|
||||
return "number"
|
||||
except ValueError:
|
||||
pass
|
||||
if value.lower() in ("true", "false"):
|
||||
return "boolean"
|
||||
return "string"
|
||||
|
||||
def _is_json(self, s: str) -> bool:
|
||||
"""Check if string is JSON.
|
||||
|
||||
Args:
|
||||
s: String to check
|
||||
|
||||
Returns:
|
||||
True if string is JSON
|
||||
"""
|
||||
if not s or not s.strip():
|
||||
return False
|
||||
if s.strip().startswith(("{", "[")):
|
||||
try:
|
||||
json.loads(s)
|
||||
return True
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
return False
|
||||
|
||||
def _infer_schemas(self) -> None:
|
||||
"""Infer additional schemas from request/response bodies."""
|
||||
for entry in self.entries:
|
||||
if entry.request.body and self._is_json(entry.request.body):
|
||||
try:
|
||||
data = json.loads(entry.request.body)
|
||||
if isinstance(data, dict):
|
||||
schema_name = "requestBodySchema"
|
||||
if schema_name not in self._schemas:
|
||||
self._schemas[schema_name] = self._dict_to_schema(data, schema_name)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
def to_json(self, spec: dict[str, Any] | None = None, indent: int = 2) -> str:
|
||||
"""Convert spec to JSON string.
|
||||
|
||||
Args:
|
||||
spec: Spec to convert, or use self.spec if None
|
||||
indent: JSON indent level
|
||||
|
||||
Returns:
|
||||
JSON string
|
||||
"""
|
||||
if spec is None:
|
||||
spec = self.spec
|
||||
return json.dumps(spec, indent=indent)
|
||||
|
||||
def save_spec(self, path: str, spec: dict[str, Any] | None = None) -> None:
|
||||
"""Save spec to file.
|
||||
|
||||
Args:
|
||||
path: File path to save to
|
||||
spec: Spec to save, or use self.spec if None
|
||||
"""
|
||||
with open(path, "w") as f:
|
||||
f.write(self.to_json(spec))
|
||||
Reference in New Issue
Block a user