Files
testdata-cli/http_log_explorer/generators/openapi_generator.py

432 lines
13 KiB
Python

"""OpenAPI 3.0 spec generator from HTTP traffic."""
import json
import re
from collections import defaultdict
from typing import Any
from http_log_explorer.models import HTTPEntry
try:
from openapi_spec_validator import validate
VALIDATION_AVAILABLE = True
except ImportError:
VALIDATION_AVAILABLE = False
class OpenAPIGenerator:
"""Generate OpenAPI 3.0 specification from observed traffic."""
def __init__(self, entries: list[HTTPEntry]) -> None:
"""Initialize with HTTP entries.
Args:
entries: List of HTTPEntry objects
"""
self.entries = entries
self.spec: dict[str, Any] = {}
self._schemas: dict[str, dict[str, Any]] = {}
self._path_items: dict[str, dict[str, Any]] = defaultdict(dict)
def generate(
self,
title: str = "API",
version: str = "1.0.0",
description: str = "Generated from traffic analysis",
validate_spec: bool = True,
) -> dict[str, Any]:
"""Generate OpenAPI spec from traffic.
Args:
title: API title
version: API version
description: API description
validate_spec: Whether to validate the generated spec
Returns:
OpenAPI spec dictionary
Raises:
ValueError: If validation fails and validate_spec is True
"""
self.spec = {
"openapi": "3.0.3",
"info": {
"title": title,
"version": version,
"description": description,
},
"paths": {},
"components": {
"schemas": {},
},
}
self._schemas = {}
self._path_items = defaultdict(dict)
self._infer_paths()
self._infer_schemas()
self.spec["paths"] = dict(self._path_items)
self.spec["components"]["schemas"] = self._schemas
if validate_spec and VALIDATION_AVAILABLE:
try:
validate(self.spec)
except Exception as e:
raise ValueError(f"Generated spec is invalid: {e}") from e
return self.spec
def _infer_paths(self) -> None:
"""Infer API paths from traffic."""
for entry in self.entries:
path = self._extract_path(entry.endpoint)
method = entry.request.method.lower()
if path not in self._path_items:
self._path_items[path] = {}
path_params = self._extract_path_params(path)
if path_params and "parameters" not in self._path_items[path]:
self._path_items[path]["parameters"] = path_params
operation: dict[str, Any] = {
"responses": self._generate_responses(entry),
}
if entry.request.headers:
operation["parameters"] = self._generate_parameters(entry)
if entry.request.body:
request_body = self._generate_request_body(entry)
if request_body:
operation["requestBody"] = request_body
self._path_items[path][method] = operation
def _extract_path_params(self, path: str) -> list[dict[str, Any]]:
"""Extract path parameters from a path string.
Args:
path: The path string like '/users/{id}'
Returns:
List of parameter definitions
"""
params = []
import re
param_pattern = re.compile(r"\{([^}]+)\}")
for match in param_pattern.finditer(path):
param_name = match.group(1)
params.append({
"name": param_name,
"in": "path",
"required": True,
"schema": {"type": "string"},
})
return params
def _extract_path(self, endpoint: str) -> str:
"""Extract and normalize path from endpoint."""
path = endpoint
parts = path.split("/")
normalized_parts = []
for part in parts:
if not part:
normalized_parts.append("")
elif part.isdigit():
normalized_parts.append("{" + self._get_param_name(path, part) + "}")
elif self._is_uuid(part):
normalized_parts.append("{uuid}")
elif self._is_hash(part):
normalized_parts.append("{id}")
else:
normalized_parts.append(part)
return "/".join(normalized_parts) or "/"
def _get_param_name(self, path: str, value: str) -> str:
"""Generate parameter name based on path context."""
path_lower = path.lower()
if "user" in path_lower or "id" in path_lower:
return "id"
if "page" in path_lower or "offset" in path_lower:
return "page"
if "limit" in path_lower or "size" in path_lower:
return "limit"
return "id"
def _is_uuid(self, s: str) -> bool:
"""Check if string looks like a UUID."""
uuid_pattern = re.compile(
r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$",
re.IGNORECASE,
)
return bool(uuid_pattern.match(s))
def _is_hash(self, s: str) -> bool:
"""Check if string looks like a hash."""
hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE)
return bool(hash_pattern.match(s))
def _generate_responses(self, entry: HTTPEntry) -> dict[str, Any]:
"""Generate response definitions."""
content = {}
ct = entry.content_type
if ct and "json" in ct.lower():
schema = self._extract_schema_from_body(entry.response.body, "response")
content = {
"application/json": {
"schema": schema,
}
}
elif entry.response.body:
content = {
"text/plain": {
"schema": {
"type": "string",
}
}
}
status = entry.response.status
status_text = entry.response.status_text or "OK"
return {
str(status): {
"description": status_text,
"content": content,
}
}
def _generate_parameters(self, entry: HTTPEntry) -> list[dict[str, Any]]:
"""Generate parameter definitions from query string."""
params = []
for name, value in entry.request.query_params.items():
param: dict[str, Any] = {
"name": name,
"in": "query",
"schema": {
"type": self._infer_type(value),
},
}
if value:
param["example"] = value
params.append(param)
return params
def _generate_request_body(self, entry: HTTPEntry) -> dict[str, Any] | None:
"""Generate request body definition."""
body = entry.request.body
if not body:
return None
content: dict[str, Any] = {}
if self._is_json(body):
schema = self._extract_schema_from_body(body, "request")
content = {
"application/json": {
"schema": schema,
}
}
else:
content = {
"text/plain": {
"schema": {
"type": "string",
}
}
}
return {
"content": content,
"required": True,
}
def _extract_schema_from_body(
self, body: str | None, prefix: str = "schema"
) -> dict[str, Any]:
"""Extract JSON schema from body content.
Args:
body: Body content
prefix: Prefix for schema name
Returns:
JSON Schema dictionary
"""
if not body:
return {"type": "string"}
if not self._is_json(body):
return {"type": "string"}
try:
data = json.loads(body)
except (json.JSONDecodeError, TypeError):
return {"type": "string"}
if isinstance(data, dict):
schema_name = f"{prefix}Schema"
schema = self._dict_to_schema(data, schema_name)
self._schemas[schema_name] = schema
return {"$ref": f"#/components/schemas/{schema_name}"}
elif isinstance(data, list) and data:
return {
"type": "array",
"items": self._dict_to_schema(data[0], f"{prefix}Item"),
}
return {"type": "string"}
def _dict_to_schema(
self, data: dict[str, Any], name: str
) -> dict[str, Any]:
"""Convert dictionary to JSON schema.
Args:
data: Dictionary to convert
name: Schema name
Returns:
JSON Schema dictionary
"""
properties: dict[str, Any] = {}
required: list[str] = []
for key, value in data.items():
prop_schema = self._value_to_schema(value, key)
properties[key] = prop_schema
required.append(key)
return {
"type": "object",
"properties": properties,
"required": required,
}
def _value_to_schema(self, value: Any, key: str) -> dict[str, Any]:
"""Convert a value to JSON schema.
Args:
value: Value to convert
key: Key name (for nested object naming)
Returns:
JSON Schema for the value
"""
if value is None:
return {"type": "string", "nullable": True}
elif isinstance(value, bool):
return {"type": "boolean"}
elif isinstance(value, int):
return {"type": "integer"}
elif isinstance(value, float):
return {"type": "number"}
elif isinstance(value, str):
if self._is_json(value):
nested = self._dict_to_schema(json.loads(value), f"{key}Schema")
return nested
return {"type": "string"}
elif isinstance(value, dict):
schema_name = f"{key}Schema"
nested = self._dict_to_schema(value, schema_name)
self._schemas[schema_name] = nested
return {"$ref": f"#/components/schemas/{schema_name}"}
elif isinstance(value, list):
if value:
item_schema = self._value_to_schema(value[0], f"{key}Item")
return {"type": "array", "items": item_schema}
return {"type": "array", "items": {"type": "string"}}
return {"type": "string"}
def _infer_type(self, value: str) -> str:
"""Infer JSON type from string value.
Args:
value: String value
Returns:
JSON type string
"""
if not value:
return "string"
try:
int(value)
return "integer"
except ValueError:
pass
try:
float(value)
return "number"
except ValueError:
pass
if value.lower() in ("true", "false"):
return "boolean"
return "string"
def _is_json(self, s: str) -> bool:
"""Check if string is JSON.
Args:
s: String to check
Returns:
True if string is JSON
"""
if not s or not s.strip():
return False
if s.strip().startswith(("{", "[")):
try:
json.loads(s)
return True
except (json.JSONDecodeError, TypeError):
pass
return False
def _infer_schemas(self) -> None:
"""Infer additional schemas from request/response bodies."""
for entry in self.entries:
if entry.request.body and self._is_json(entry.request.body):
try:
data = json.loads(entry.request.body)
if isinstance(data, dict):
schema_name = "requestBodySchema"
if schema_name not in self._schemas:
self._schemas[schema_name] = self._dict_to_schema(data, schema_name)
except (json.JSONDecodeError, TypeError):
pass
def to_json(self, spec: dict[str, Any] | None = None, indent: int = 2) -> str:
"""Convert spec to JSON string.
Args:
spec: Spec to convert, or use self.spec if None
indent: JSON indent level
Returns:
JSON string
"""
if spec is None:
spec = self.spec
return json.dumps(spec, indent=indent)
def save_spec(self, path: str, spec: dict[str, Any] | None = None) -> None:
"""Save spec to file.
Args:
path: File path to save to
spec: Spec to save, or use self.spec if None
"""
with open(path, "w") as f:
f.write(self.to_json(spec))