58 lines
2.3 KiB
Python
58 lines
2.3 KiB
Python
import re
|
|
from typing import Any
|
|
|
|
from ..db import get_database
|
|
|
|
|
|
class SearchEngine:
|
|
def __init__(self, db_path: str | None = None):
|
|
self.db = get_database(db_path)
|
|
|
|
def search(self, query: str, language: str | None = None, tag: str | None = None,
|
|
limit: int = 50, offset: int = 0) -> list[dict[str, Any]]:
|
|
if not query or not query.strip() or query == "*":
|
|
return self.db.list_snippets(language=language, tag=tag, limit=limit, offset=offset)
|
|
results = self.db.search_snippets(query, limit=limit + offset)
|
|
if language or tag:
|
|
filtered = []
|
|
for snippet in results:
|
|
if language and snippet.get("language", "").lower() != language.lower():
|
|
continue
|
|
if tag and tag.lower() not in [t.lower() for t in snippet.get("tags", [])]:
|
|
continue
|
|
filtered.append(snippet)
|
|
results = filtered
|
|
return results[offset : offset + limit]
|
|
|
|
def highlight_matches(self, text: str, query: str) -> str:
|
|
if not query:
|
|
return text
|
|
terms = query.split()
|
|
pattern = "|".join(re.escape(term) for term in terms if term not in ("AND", "OR", "NOT"))
|
|
if not pattern:
|
|
return text
|
|
regex = re.compile(f"({pattern})", re.IGNORECASE)
|
|
return regex.sub(r"**\1**", text)
|
|
|
|
def suggest_completions(self, prefix: str, limit: int = 10) -> list[str]:
|
|
all_tags = self.db.list_tags()
|
|
prefix_lower = prefix.lower()
|
|
return [tag for tag in all_tags if tag.startswith(prefix_lower)][:limit]
|
|
|
|
def parse_query(self, query: str) -> dict[str, Any]:
|
|
tokens = query.split()
|
|
terms = []
|
|
operators = []
|
|
current_operator = "AND"
|
|
for token in tokens:
|
|
if token.upper() in ("AND", "OR"):
|
|
operators.append(token.upper())
|
|
elif token.upper() == "NOT":
|
|
operators.append("NOT")
|
|
else:
|
|
terms.append({"term": token.strip('"'), "operator": current_operator})
|
|
if operators and operators[-1] == "NOT":
|
|
operators.pop()
|
|
current_operator = "AND" if not operators else operators[-1]
|
|
return {"terms": terms, "raw": query}
|