68 lines
2.3 KiB
Python
68 lines
2.3 KiB
Python
"""FTS5 search engine for snippets."""
|
|
|
|
from typing import Any
|
|
|
|
from snip.db.database import Database, get_database
|
|
|
|
|
|
class SearchEngine:
|
|
def __init__(self, db: Database | str | None = None):
|
|
if isinstance(db, str) or db is None:
|
|
self.db = get_database(db)
|
|
else:
|
|
self.db = db
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
limit: int = 50,
|
|
language: str | None = None,
|
|
tag: str | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
"""Search snippets using FTS5."""
|
|
return self.db.search_snippets(query, limit=limit, language=language, tag=tag)
|
|
|
|
def highlight(self, text: str, query: str) -> str:
|
|
"""Add highlighting markers around matched terms."""
|
|
import re
|
|
terms = re.split(r'\s+', query)
|
|
result = text
|
|
for term in terms:
|
|
if term:
|
|
result = re.sub(f'({re.escape(term)})', r'**\1**', result, flags=re.IGNORECASE)
|
|
return result
|
|
|
|
def suggest(self, prefix: str, limit: int = 10) -> list[str]:
|
|
"""Suggest completions for a prefix."""
|
|
snippets = self.db.list_snippets(limit=100)
|
|
suggestions = set()
|
|
for s in snippets:
|
|
title = s.get("title", "")
|
|
if title.lower().startswith(prefix.lower()):
|
|
suggestions.add(title)
|
|
tags = s.get("tags", [])
|
|
if isinstance(tags, str):
|
|
tags = json.loads(tags)
|
|
for tag in tags:
|
|
if tag.lower().startswith(prefix.lower()):
|
|
suggestions.add(tag)
|
|
return sorted(list(suggestions))[:limit]
|
|
|
|
def parse_query(self, query: str) -> dict[str, Any]:
|
|
"""Parse a search query into components."""
|
|
import re
|
|
result = {
|
|
"terms": [],
|
|
"language": None,
|
|
"tag": None,
|
|
}
|
|
language_match = re.search(r'language:(\w+)', query)
|
|
if language_match:
|
|
result["language"] = language_match.group(1)
|
|
query = re.sub(r'language:\w+', '', query)
|
|
tag_match = re.search(r'tag:(\w+)', query)
|
|
if tag_match:
|
|
result["tag"] = tag_match.group(1)
|
|
query = re.sub(r'tag:\w+', '', query)
|
|
result["terms"] = query.split()
|
|
return result |