"""OpenAPI 3.0 spec generator from HTTP traffic.""" import json import re from collections import defaultdict from typing import Any from http_log_explorer.models import HTTPEntry try: from openapi_spec_validator import validate VALIDATION_AVAILABLE = True except ImportError: VALIDATION_AVAILABLE = False class OpenAPIGenerator: """Generate OpenAPI 3.0 specification from observed traffic.""" def __init__(self, entries: list[HTTPEntry]) -> None: """Initialize with HTTP entries. Args: entries: List of HTTPEntry objects """ self.entries = entries self.spec: dict[str, Any] = {} self._schemas: dict[str, dict[str, Any]] = {} self._path_items: dict[str, dict[str, Any]] = defaultdict(dict) def generate( self, title: str = "API", version: str = "1.0.0", description: str = "Generated from traffic analysis", validate_spec: bool = True, ) -> dict[str, Any]: """Generate OpenAPI spec from traffic. Args: title: API title version: API version description: API description validate_spec: Whether to validate the generated spec Returns: OpenAPI spec dictionary Raises: ValueError: If validation fails and validate_spec is True """ self.spec = { "openapi": "3.0.3", "info": { "title": title, "version": version, "description": description, }, "paths": {}, "components": { "schemas": {}, }, } self._schemas = {} self._path_items = defaultdict(dict) self._infer_paths() self._infer_schemas() self.spec["paths"] = dict(self._path_items) self.spec["components"]["schemas"] = self._schemas if validate_spec and VALIDATION_AVAILABLE: try: validate(self.spec) except Exception as e: raise ValueError(f"Generated spec is invalid: {e}") from e return self.spec def _infer_paths(self) -> None: """Infer API paths from traffic.""" for entry in self.entries: path = self._extract_path(entry.endpoint) method = entry.request.method.lower() if path not in self._path_items: self._path_items[path] = {} path_params = self._extract_path_params(path) if path_params and "parameters" not in self._path_items[path]: self._path_items[path]["parameters"] = path_params operation: dict[str, Any] = { "responses": self._generate_responses(entry), } if entry.request.headers: operation["parameters"] = self._generate_parameters(entry) if entry.request.body: request_body = self._generate_request_body(entry) if request_body: operation["requestBody"] = request_body self._path_items[path][method] = operation def _extract_path_params(self, path: str) -> list[dict[str, Any]]: """Extract path parameters from a path string. Args: path: The path string like '/users/{id}' Returns: List of parameter definitions """ params = [] import re param_pattern = re.compile(r"\{([^}]+)\}") for match in param_pattern.finditer(path): param_name = match.group(1) params.append({ "name": param_name, "in": "path", "required": True, "schema": {"type": "string"}, }) return params def _extract_path(self, endpoint: str) -> str: """Extract and normalize path from endpoint.""" path = endpoint parts = path.split("/") normalized_parts = [] for part in parts: if not part: normalized_parts.append("") elif part.isdigit(): normalized_parts.append("{" + self._get_param_name(path, part) + "}") elif self._is_uuid(part): normalized_parts.append("{uuid}") elif self._is_hash(part): normalized_parts.append("{id}") else: normalized_parts.append(part) return "/".join(normalized_parts) or "/" def _get_param_name(self, path: str, value: str) -> str: """Generate parameter name based on path context.""" path_lower = path.lower() if "user" in path_lower or "id" in path_lower: return "id" if "page" in path_lower or "offset" in path_lower: return "page" if "limit" in path_lower or "size" in path_lower: return "limit" return "id" def _is_uuid(self, s: str) -> bool: """Check if string looks like a UUID.""" uuid_pattern = re.compile( r"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$", re.IGNORECASE, ) return bool(uuid_pattern.match(s)) def _is_hash(self, s: str) -> bool: """Check if string looks like a hash.""" hash_pattern = re.compile(r"^[a-f0-9]{32,}$", re.IGNORECASE) return bool(hash_pattern.match(s)) def _generate_responses(self, entry: HTTPEntry) -> dict[str, Any]: """Generate response definitions.""" content = {} ct = entry.content_type if ct and "json" in ct.lower(): schema = self._extract_schema_from_body(entry.response.body, "response") content = { "application/json": { "schema": schema, } } elif entry.response.body: content = { "text/plain": { "schema": { "type": "string", } } } status = entry.response.status status_text = entry.response.status_text or "OK" return { str(status): { "description": status_text, "content": content, } } def _generate_parameters(self, entry: HTTPEntry) -> list[dict[str, Any]]: """Generate parameter definitions from query string.""" params = [] for name, value in entry.request.query_params.items(): param: dict[str, Any] = { "name": name, "in": "query", "schema": { "type": self._infer_type(value), }, } if value: param["example"] = value params.append(param) return params def _generate_request_body(self, entry: HTTPEntry) -> dict[str, Any] | None: """Generate request body definition.""" body = entry.request.body if not body: return None content: dict[str, Any] = {} if self._is_json(body): schema = self._extract_schema_from_body(body, "request") content = { "application/json": { "schema": schema, } } else: content = { "text/plain": { "schema": { "type": "string", } } } return { "content": content, "required": True, } def _extract_schema_from_body( self, body: str | None, prefix: str = "schema" ) -> dict[str, Any]: """Extract JSON schema from body content. Args: body: Body content prefix: Prefix for schema name Returns: JSON Schema dictionary """ if not body: return {"type": "string"} if not self._is_json(body): return {"type": "string"} try: data = json.loads(body) except (json.JSONDecodeError, TypeError): return {"type": "string"} if isinstance(data, dict): schema_name = f"{prefix}Schema" schema = self._dict_to_schema(data, schema_name) self._schemas[schema_name] = schema return {"$ref": f"#/components/schemas/{schema_name}"} elif isinstance(data, list) and data: return { "type": "array", "items": self._dict_to_schema(data[0], f"{prefix}Item"), } return {"type": "string"} def _dict_to_schema( self, data: dict[str, Any], name: str ) -> dict[str, Any]: """Convert dictionary to JSON schema. Args: data: Dictionary to convert name: Schema name Returns: JSON Schema dictionary """ properties: dict[str, Any] = {} required: list[str] = [] for key, value in data.items(): prop_schema = self._value_to_schema(value, key) properties[key] = prop_schema required.append(key) return { "type": "object", "properties": properties, "required": required, } def _value_to_schema(self, value: Any, key: str) -> dict[str, Any]: """Convert a value to JSON schema. Args: value: Value to convert key: Key name (for nested object naming) Returns: JSON Schema for the value """ if value is None: return {"type": "string", "nullable": True} elif isinstance(value, bool): return {"type": "boolean"} elif isinstance(value, int): return {"type": "integer"} elif isinstance(value, float): return {"type": "number"} elif isinstance(value, str): if self._is_json(value): nested = self._dict_to_schema(json.loads(value), f"{key}Schema") return nested return {"type": "string"} elif isinstance(value, dict): schema_name = f"{key}Schema" nested = self._dict_to_schema(value, schema_name) self._schemas[schema_name] = nested return {"$ref": f"#/components/schemas/{schema_name}"} elif isinstance(value, list): if value: item_schema = self._value_to_schema(value[0], f"{key}Item") return {"type": "array", "items": item_schema} return {"type": "array", "items": {"type": "string"}} return {"type": "string"} def _infer_type(self, value: str) -> str: """Infer JSON type from string value. Args: value: String value Returns: JSON type string """ if not value: return "string" try: int(value) return "integer" except ValueError: pass try: float(value) return "number" except ValueError: pass if value.lower() in ("true", "false"): return "boolean" return "string" def _is_json(self, s: str) -> bool: """Check if string is JSON. Args: s: String to check Returns: True if string is JSON """ if not s or not s.strip(): return False if s.strip().startswith(("{", "[")): try: json.loads(s) return True except (json.JSONDecodeError, TypeError): pass return False def _infer_schemas(self) -> None: """Infer additional schemas from request/response bodies.""" for entry in self.entries: if entry.request.body and self._is_json(entry.request.body): try: data = json.loads(entry.request.body) if isinstance(data, dict): schema_name = "requestBodySchema" if schema_name not in self._schemas: self._schemas[schema_name] = self._dict_to_schema(data, schema_name) except (json.JSONDecodeError, TypeError): pass def to_json(self, spec: dict[str, Any] | None = None, indent: int = 2) -> str: """Convert spec to JSON string. Args: spec: Spec to convert, or use self.spec if None indent: JSON indent level Returns: JSON string """ if spec is None: spec = self.spec return json.dumps(spec, indent=indent) def save_spec(self, path: str, spec: dict[str, Any] | None = None) -> None: """Save spec to file. Args: path: File path to save to spec: Spec to save, or use self.spec if None """ with open(path, "w") as f: f.write(self.to_json(spec))