Files
Ned Halksworth e0f2eedcd9 inital commit
2026-05-04 19:31:46 +01:00

91 lines
2.6 KiB
Python

"""Search result shaping for sFetch."""
from __future__ import annotations
import html
import re
from database import search_images, search_pages, search_videos
SNIPPET_LENGTH = 200
def _extract_terms(query: str) -> list[str]:
terms = {term.lower() for term in re.findall(r"\w+", query, flags=re.UNICODE)}
return sorted(terms, key=len, reverse=True)
def _build_snippet(body_text: str) -> str:
snippet = body_text[:SNIPPET_LENGTH].strip()
if not snippet:
return "No preview available."
if len(body_text) > SNIPPET_LENGTH:
return f"{snippet}..."
return snippet
def _highlight_terms(snippet: str, query: str) -> str:
safe_snippet = html.escape(snippet)
for term in _extract_terms(query):
pattern = re.compile(re.escape(html.escape(term)), flags=re.IGNORECASE)
safe_snippet = pattern.sub(lambda match: f"<mark>{match.group(0)}</mark>", safe_snippet)
return safe_snippet
async def search(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
rows = await search_pages(query=query, limit=limit, offset=offset)
results: list[dict] = []
for row in rows:
title = (row.get("title") or row.get("url") or "Untitled").strip()
body_text = row.get("body_text") or ""
snippet = _highlight_terms(_build_snippet(body_text), query)
results.append(
{
"id": row["id"],
"url": row["url"],
"title": title,
"snippet": snippet,
"indexed_at": row["indexed_at"],
}
)
return results
async def search_images_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
rows = await search_images(query=query, limit=limit, offset=offset)
results: list[dict] = []
for row in rows:
results.append(
{
"id": row["id"],
"url": row["url"],
"page_url": row["page_url"],
"alt_text": row["alt_text"] or "",
"indexed_at": row["indexed_at"],
}
)
return results
async def search_videos_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
rows = await search_videos(query=query, limit=limit, offset=offset)
results: list[dict] = []
for row in rows:
title = (row.get("title") or "Video result").strip()
results.append(
{
"id": row["id"],
"url": row["url"],
"page_url": row["page_url"],
"title": title,
"indexed_at": row["indexed_at"],
}
)
return results