inital commit
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
"""Search result shaping for sFetch."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
|
||||
from database import search_images, search_pages, search_videos
|
||||
|
||||
SNIPPET_LENGTH = 200
|
||||
|
||||
|
||||
def _extract_terms(query: str) -> list[str]:
|
||||
terms = {term.lower() for term in re.findall(r"\w+", query, flags=re.UNICODE)}
|
||||
return sorted(terms, key=len, reverse=True)
|
||||
|
||||
|
||||
def _build_snippet(body_text: str) -> str:
|
||||
snippet = body_text[:SNIPPET_LENGTH].strip()
|
||||
if not snippet:
|
||||
return "No preview available."
|
||||
if len(body_text) > SNIPPET_LENGTH:
|
||||
return f"{snippet}..."
|
||||
return snippet
|
||||
|
||||
|
||||
def _highlight_terms(snippet: str, query: str) -> str:
|
||||
safe_snippet = html.escape(snippet)
|
||||
for term in _extract_terms(query):
|
||||
pattern = re.compile(re.escape(html.escape(term)), flags=re.IGNORECASE)
|
||||
safe_snippet = pattern.sub(lambda match: f"<mark>{match.group(0)}</mark>", safe_snippet)
|
||||
return safe_snippet
|
||||
|
||||
|
||||
async def search(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
||||
rows = await search_pages(query=query, limit=limit, offset=offset)
|
||||
results: list[dict] = []
|
||||
|
||||
for row in rows:
|
||||
title = (row.get("title") or row.get("url") or "Untitled").strip()
|
||||
body_text = row.get("body_text") or ""
|
||||
snippet = _highlight_terms(_build_snippet(body_text), query)
|
||||
results.append(
|
||||
{
|
||||
"id": row["id"],
|
||||
"url": row["url"],
|
||||
"title": title,
|
||||
"snippet": snippet,
|
||||
"indexed_at": row["indexed_at"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def search_images_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
||||
rows = await search_images(query=query, limit=limit, offset=offset)
|
||||
results: list[dict] = []
|
||||
|
||||
for row in rows:
|
||||
results.append(
|
||||
{
|
||||
"id": row["id"],
|
||||
"url": row["url"],
|
||||
"page_url": row["page_url"],
|
||||
"alt_text": row["alt_text"] or "",
|
||||
"indexed_at": row["indexed_at"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def search_videos_api(query: str, limit: int = 10, offset: int = 0) -> list[dict]:
|
||||
rows = await search_videos(query=query, limit=limit, offset=offset)
|
||||
results: list[dict] = []
|
||||
|
||||
for row in rows:
|
||||
title = (row.get("title") or "Video result").strip()
|
||||
results.append(
|
||||
{
|
||||
"id": row["id"],
|
||||
"url": row["url"],
|
||||
"page_url": row["page_url"],
|
||||
"title": title,
|
||||
"indexed_at": row["indexed_at"],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user