91 lines
2.6 KiB
Python
91 lines
2.6 KiB
Python
"""Search result shaping for sFetch."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import html
|
|
import re
|
|
|
|
from database import search_images, search_pages, search_videos
|
|
|
|
SNIPPET_LENGTH = 200
|
|
|
|
|
|
def _extract_terms(query: str) -> list[str]:
|
|
terms = {term.lower() for term in re.findall(r"\w+", query, flags=re.UNICODE)}
|
|
return sorted(terms, key=len, reverse=True)
|
|
|
|
|
|
def _build_snippet(body_text: str) -> str:
|
|
snippet = body_text[:SNIPPET_LENGTH].strip()
|
|
if not snippet:
|
|
return "No preview available."
|
|
if len(body_text) > SNIPPET_LENGTH:
|
|
return f"{snippet}..."
|
|
return snippet
|
|
|
|
|
|
def _highlight_terms(snippet: str, query: str) -> str:
|
|
safe_snippet = html.escape(snippet)
|
|
for term in _extract_terms(query):
|
|
pattern = re.compile(re.escape(html.escape(term)), flags=re.IGNORECASE)
|
|
safe_snippet = pattern.sub(lambda match: f"<mark>{match.group(0)}</mark>", safe_snippet)
|
|
return safe_snippet
|
|
|
|
|
|
async def search(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
|
rows = await search_pages(query=query, limit=limit, offset=offset)
|
|
results: list[dict] = []
|
|
|
|
for row in rows:
|
|
title = (row.get("title") or row.get("url") or "Untitled").strip()
|
|
body_text = row.get("body_text") or ""
|
|
snippet = _highlight_terms(_build_snippet(body_text), query)
|
|
results.append(
|
|
{
|
|
"id": row["id"],
|
|
"url": row["url"],
|
|
"title": title,
|
|
"snippet": snippet,
|
|
"indexed_at": row["indexed_at"],
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
async def search_images_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
|
rows = await search_images(query=query, limit=limit, offset=offset)
|
|
results: list[dict] = []
|
|
|
|
for row in rows:
|
|
results.append(
|
|
{
|
|
"id": row["id"],
|
|
"url": row["url"],
|
|
"page_url": row["page_url"],
|
|
"alt_text": row["alt_text"] or "",
|
|
"indexed_at": row["indexed_at"],
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
async def search_videos_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
|
rows = await search_videos(query=query, limit=limit, offset=offset)
|
|
results: list[dict] = []
|
|
|
|
for row in rows:
|
|
title = (row.get("title") or "Video result").strip()
|
|
results.append(
|
|
{
|
|
"id": row["id"],
|
|
"url": row["url"],
|
|
"page_url": row["page_url"],
|
|
"title": title,
|
|
"indexed_at": row["indexed_at"],
|
|
}
|
|
)
|
|
|
|
return results
|