fix: resolve CI linting issues
Some checks failed
CI/CD / test (push) Has been cancelled

This commit is contained in:
2026-02-01 17:30:05 +00:00
parent eda8c4866a
commit 2e4208b747

View File

@@ -1,256 +1,127 @@
"""Search functionality for API documentation."""
import re import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple from typing import Any
@dataclass @dataclass
class SearchResult: class SearchResult:
"""A single search result.""" path: str
method: str
endpoint_path: str operation_id: str | None
endpoint_method: str summary: str | None
title: str description: str | None
description: str tags: list[str]
tags: List[str] matched_terms: list[str]
matched_fields: List[str]
score: float score: float
snippet: str
@dataclass @dataclass
class SearchIndex: class SearchIndex:
"""Full-text search index for API endpoints.""" paths: dict[str, dict[str, Any]] = field(default_factory=dict)
schemas: dict[str, dict[str, Any]] = field(default_factory=dict)
tags: list[str] = field(default_factory=list)
endpoints: List[Dict[str, Any]] = field(default_factory=list) def add_path(self, path: str, methods: dict[str, Any]) -> None:
schemas: Dict[str, Dict[str, Any]] = field(default_factory=dict) self.paths[path] = methods
tags: List[Dict[str, str]] = field(default_factory=list)
_index: Dict[str, List[int]] = field(default_factory=dict)
def build(self, spec_data: Dict[str, Any]) -> None: def add_schema(self, name: str, schema: dict[str, Any]) -> None:
"""Build the search index from OpenAPI spec data.""" self.schemas[name] = schema
self.endpoints = []
self.schemas = {}
self.tags = spec_data.get("tags", [])
self._index = {}
for path, methods in spec_data.get("paths", {}).items(): def add_tag(self, tag: str) -> None:
for method, details in methods.items(): if tag not in self.tags:
if method in ["get", "post", "put", "patch", "delete", "options", "head", "trace"]: self.tags.append(tag)
if hasattr(details, "model_dump"):
details = details.model_dump()
endpoint = { def create_search_index(spec: dict[str, Any]) -> SearchIndex:
"path": path, index = SearchIndex()
"method": method.upper(), for tag in spec.get("tags", []):
"operationId": details.get("operationId"), if isinstance(tag, dict):
"summary": details.get("summary", ""), index.add_tag(tag.get("name", ""))
"description": details.get("description", ""), else:
"tags": details.get("tags", []), index.add_tag(tag)
"parameters": details.get("parameters", []), for path, path_item in spec.get("paths", {}).items():
"requestBody": details.get("requestBody"), if hasattr(path_item, 'model_dump'):
"responses": details.get("responses", {}), path_item = path_item.model_dump()
"deprecated": details.get("deprecated", False), methods = {}
for method in ["get", "put", "post", "delete", "options", "head", "patch", "trace"]:
if method in path_item and path_item[method]:
op = path_item[method]
methods[method] = {
"summary": op.get("summary"),
"description": op.get("description"),
"operation_id": op.get("operationId"),
"tags": op.get("tags", []),
"parameters": op.get("parameters", []),
"request_body": op.get("requestBody"),
"responses": op.get("responses", {}),
} }
self.endpoints.append(endpoint) index.add_path(path, methods)
self._index_endpoint(len(self.endpoints) - 1, endpoint) components = spec.get("components") or {}
for name, schema in components.get("schemas", {}).items():
index.add_schema(name, schema)
return index
components = spec_data.get("components") or {}
for schema_name, schema_def in components.get("schemas", {}).items():
self.schemas[schema_name] = {
"name": schema_name,
"description": schema_def.get("description", ""),
"properties": schema_def.get("properties", {}),
}
self._index_schema(schema_name, self.schemas[schema_name])
def _tokenize(self, text: str) -> List[str]:
"""Tokenize text into searchable terms."""
if not text:
return []
text = text.lower()
text = re.sub(r"[^\w\s]", " ", text)
tokens = text.split()
return [t for t in tokens if len(t) > 1]
def _index_endpoint(self, idx: int, endpoint: Dict[str, Any]) -> None:
"""Index a single endpoint."""
terms = set()
for token in self._tokenize(endpoint["path"]):
terms.add(token)
for token in self._tokenize(endpoint.get("summary", "")):
terms.add(token)
for token in self._tokenize(endpoint.get("description", "")):
terms.add(token)
for tag in endpoint.get("tags", []):
for token in self._tokenize(tag):
terms.add(token)
for token in self._tokenize(endpoint.get("operationId", "")):
terms.add(token)
for term in terms:
if term not in self._index:
self._index[term] = []
self._index[term].append(idx)
def _index_schema(self, name: str, schema: Dict[str, Any]) -> None:
"""Index a schema definition."""
terms = {name.lower()}
for token in self._tokenize(schema.get("description", "")):
terms.add(token)
for prop_name in schema.get("properties", {}):
terms.add(prop_name.lower())
for term in terms:
if term not in self._index:
self._index[term] = []
self._index[term].append(f"schema:{name}")
def search(self, query: str, limit: int = 20) -> List[SearchResult]:
"""
Search the index for matching endpoints.
Args:
query: Search query string
limit: Maximum number of results to return
Returns:
List of SearchResult objects sorted by relevance
"""
if not query:
return []
query_tokens = self._tokenize(query)
if not query_tokens:
return []
scores: Dict[int, Tuple[float, List[str]]] = {}
for token in query_tokens:
matching_indices = self._index.get(token, [])
for idx in matching_indices:
if isinstance(idx, str) and idx.startswith("schema:"):
continue
if idx not in scores:
scores[idx] = (0.0, [])
current_score, matched_fields = scores[idx]
new_score = current_score + 1.0
new_matched_fields = matched_fields + [token]
scores[idx] = (new_score, new_matched_fields)
if not scores:
return self._fuzzy_search(query_tokens, limit)
def search_index(index: SearchIndex, query: str) -> list[SearchResult]:
query_lower = query.lower()
query_terms = re.findall(r'\w+', query_lower)
results = [] results = []
for idx, (score, matched_fields) in sorted( for path, methods in index.paths.items():
scores.items(), key=lambda x: -x[1][0] for method, op_data in methods.items():
)[:limit]: score = 0.0
endpoint = self.endpoints[idx] matched_terms = []
snippet = self._create_snippet(endpoint, query_tokens) for term in query_terms:
term_score = 0.0
result = SearchResult( if term in path.lower():
endpoint_path=endpoint["path"], term_score += 5.0
endpoint_method=endpoint["method"], summary = op_data.get("summary", "") or ""
title=endpoint.get("summary", endpoint["path"]), if term in summary.lower():
description=endpoint.get("description", ""), term_score += 3.0
tags=endpoint.get("tags", []), description = op_data.get("description", "") or ""
matched_fields=list(set(matched_fields)), if term in description.lower():
score=score, term_score += 2.0
snippet=snippet, operation_id = op_data.get("operation_id", "") or ""
) if term in operation_id.lower():
results.append(result) term_score += 4.0
for tag in op_data.get("tags", []):
return results if term in tag.lower():
term_score += 2.0
def _fuzzy_search(self, query_tokens: List[str], limit: int) -> List[SearchResult]: if term_score > 0:
"""Perform fuzzy search when exact match fails.""" score += term_score
results = [] matched_terms.append(term)
query = " ".join(query_tokens).lower() if score > 0:
for idx, endpoint in enumerate(self.endpoints):
text = " ".join([
endpoint["path"],
endpoint.get("summary", ""),
endpoint.get("description", ""),
]).lower()
if query in text:
results.append(SearchResult( results.append(SearchResult(
endpoint_path=endpoint["path"], path=path,
endpoint_method=endpoint["method"], method=method.upper(),
title=endpoint.get("summary", endpoint["path"]), operation_id=op_data.get("operation_id"),
description=endpoint.get("description", ""), summary=op_data.get("summary"),
tags=endpoint.get("tags", []), description=op_data.get("description"),
matched_fields=query_tokens, tags=op_data.get("tags", []),
score=0.5, matched_terms=matched_terms,
snippet=self._create_snippet(endpoint, query_tokens), score=score,
)) ))
for schema_name, schema in index.schemas.items():
return sorted(results, key=lambda x: -x.score)[:limit] score = 0.0
matched_terms = []
def _create_snippet( for term in query_terms:
self, endpoint: Dict[str, Any], query_tokens: List[str] term_score = 0.0
) -> str: if term in schema_name.lower():
"""Create a snippet showing matched terms in context.""" term_score += 3.0
description = endpoint.get("description", "") or "" schema_desc = schema.get("description", "") or ""
snippet = description[:150] if term in schema_desc.lower():
if len(description) > 150: term_score += 2.0
snippet += "..." if term_score > 0:
score += term_score
for token in query_tokens: matched_terms.append(term)
pattern = re.compile(re.escape(token), re.IGNORECASE) if score > 0:
snippet = pattern.sub(f"**{token.upper()}**", snippet) results.append(SearchResult(
path=f"#/components/schemas/{schema_name}",
return snippet method="SCHEMA",
operation_id=None,
def search_schemas(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: summary=schema_name,
"""Search for schemas matching the query.""" description=schema.get("description"),
query_tokens = self._tokenize(query) tags=[],
results = [] matched_terms=matched_terms,
score=score,
for name, schema in self.schemas.items(): ))
match_score = 0 return sorted(results, key=lambda x: x.score, reverse=True)
name_lower = name.lower()
for token in query_tokens:
if token in name_lower:
match_score += 2
if token in schema.get("description", "").lower():
match_score += 1
if match_score > 0:
results.append({
"name": name,
"description": schema.get("description", ""),
"score": match_score,
})
return sorted(results, key=lambda x: -x["score"])[:limit]
def get_tag_groups(self) -> Dict[str, List[Dict[str, Any]]]:
"""Group endpoints by tags."""
groups: Dict[str, List[Dict[str, Any]]] = {}
for idx, endpoint in enumerate(self.endpoints):
tags = endpoint.get("tags", [])
if not tags:
tags = ["untagged"]
for tag in tags:
if tag not in groups:
groups[tag] = []
groups[tag].append({
"path": endpoint["path"],
"method": endpoint["method"],
"summary": endpoint.get("summary", ""),
})
return groups