"""Search result shaping for sFetch.""" from __future__ import annotations import html import re from database import search_images, search_pages, search_videos SNIPPET_LENGTH = 200 def _extract_terms(query: str) -> list[str]: terms = {term.lower() for term in re.findall(r"\w+", query, flags=re.UNICODE)} return sorted(terms, key=len, reverse=True) def _build_snippet(body_text: str) -> str: snippet = body_text[:SNIPPET_LENGTH].strip() if not snippet: return "No preview available." if len(body_text) > SNIPPET_LENGTH: return f"{snippet}..." return snippet def _highlight_terms(snippet: str, query: str) -> str: safe_snippet = html.escape(snippet) for term in _extract_terms(query): pattern = re.compile(re.escape(html.escape(term)), flags=re.IGNORECASE) safe_snippet = pattern.sub(lambda match: f"{match.group(0)}", safe_snippet) return safe_snippet async def search(query: str, limit: int = 10, offset: int = 0) -> list[dict]: rows = await search_pages(query=query, limit=limit, offset=offset) results: list[dict] = [] for row in rows: title = (row.get("title") or row.get("url") or "Untitled").strip() body_text = row.get("body_text") or "" snippet = _highlight_terms(_build_snippet(body_text), query) results.append( { "id": row["id"], "url": row["url"], "title": title, "snippet": snippet, "indexed_at": row["indexed_at"], } ) return results async def search_images_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]: rows = await search_images(query=query, limit=limit, offset=offset) results: list[dict] = [] for row in rows: results.append( { "id": row["id"], "url": row["url"], "page_url": row["page_url"], "alt_text": row["alt_text"] or "", "indexed_at": row["indexed_at"], } ) return results async def search_videos_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]: rows = await search_videos(query=query, limit=limit, offset=offset) results: list[dict] = [] for row in rows: title = (row.get("title") or "Video result").strip() results.append( { "id": row["id"], "url": row["url"], "page_url": row["page_url"], "title": title, "indexed_at": row["indexed_at"], } ) return results