inital commit

This commit is contained in:
Ned Halksworth
2026-05-04 19:31:46 +01:00
commit e0f2eedcd9
14 changed files with 3718 additions and 0 deletions
+402
View File
@@ -0,0 +1,402 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>sFetch</title>
<script src="https://cdn.tailwindcss.com"></script>
<script>
tailwind.config = {
theme: {
extend: {
colors: {
sfetch: {
bg: "#f8fafc",
surface: "#ffffff",
surfaceSoft: "#f1f5f9",
ink: "#202124",
muted: "#5f6368",
border: "#dadce0",
blue: "#1a73e8",
orange: "#de5833",
green: "#0b8043",
},
},
boxShadow: {
search: "0 2px 8px rgba(60, 64, 67, 0.14), 0 1px 3px rgba(60, 64, 67, 0.12)",
panel: "0 16px 40px rgba(15, 23, 42, 0.08)",
},
},
},
};
</script>
<style>
:root {
color-scheme: light;
}
body {
background: #f8fafc;
color: #202124;
font-family: Arial, Helvetica, sans-serif;
}
.brand {
font-family: Arial, Helvetica, sans-serif;
font-weight: 700;
letter-spacing: 0;
}
.brand span:nth-child(1) { color: #de5833; }
.brand span:nth-child(2) { color: #1a73e8; }
.brand span:nth-child(3) { color: #188038; }
.brand span:nth-child(4) { color: #fbbc04; }
.brand span:nth-child(5) { color: #1a73e8; }
.brand span:nth-child(6) { color: #de5833; }
.modal-open {
overflow: hidden;
}
</style>
</head>
<body class="min-h-screen">
<main class="flex min-h-screen flex-col">
<header class="flex items-center justify-between px-5 py-4 text-sm text-sfetch-muted sm:px-8">
<a href="./index.html" class="brand text-2xl" aria-label="sFetch home">
<span>s</span><span>F</span><span>e</span><span>t</span><span>c</span><span>h</span>
</a>
<button
id="openCrawlerModal"
class="rounded-full border border-sfetch-border bg-white px-4 py-2 font-medium text-sfetch-ink transition hover:border-sfetch-orange hover:text-sfetch-orange"
>
Index tools
</button>
</header>
<section class="mx-auto flex w-full max-w-5xl flex-1 flex-col items-center justify-center px-5 pb-24 pt-10">
<h1 class="brand text-center text-6xl leading-none sm:text-7xl">
<span>s</span><span>F</span><span>e</span><span>t</span><span>c</span><span>h</span>
</h1>
<form id="searchForm" class="mt-9 w-full max-w-2xl">
<label
for="searchInput"
class="flex min-h-14 items-center gap-3 rounded-full border border-sfetch-border bg-white px-5 transition focus-within:border-transparent focus-within:shadow-search"
>
<svg class="h-5 w-5 shrink-0 text-sfetch-muted" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" aria-hidden="true">
<circle cx="11" cy="11" r="6"></circle>
<path d="M20 20L16.65 16.65"></path>
</svg>
<input
id="searchInput"
type="text"
autocomplete="off"
placeholder="Search sFetch"
class="w-full bg-transparent text-base text-sfetch-ink outline-none placeholder:text-sfetch-muted sm:text-lg"
/>
</label>
<div class="mt-6 flex flex-wrap items-center justify-center gap-3">
<button
type="submit"
class="rounded-md bg-sfetch-blue px-5 py-2.5 text-sm font-medium text-white transition hover:bg-[#1558b0]"
>
sFetch Search
</button>
<button
type="button"
data-search-type="image"
class="rounded-md border border-sfetch-border bg-white px-5 py-2.5 text-sm font-medium text-sfetch-ink transition hover:border-sfetch-blue hover:text-sfetch-blue"
>
Images
</button>
<button
type="button"
data-search-type="video"
class="rounded-md border border-sfetch-border bg-white px-5 py-2.5 text-sm font-medium text-sfetch-ink transition hover:border-sfetch-blue hover:text-sfetch-blue"
>
Videos
</button>
</div>
</form>
<section class="mt-12 w-full max-w-3xl rounded-lg border border-sfetch-border bg-white p-4 shadow-panel" aria-label="Index controls">
<div class="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
<div>
<p class="text-xs font-semibold uppercase text-sfetch-orange">Index</p>
<p id="statsSummary" class="mt-1 text-sm text-sfetch-muted">Checking index...</p>
</div>
<div class="flex flex-wrap gap-2">
<button
id="seedTopSites"
class="rounded-md bg-sfetch-orange px-4 py-2 text-sm font-medium text-white transition hover:bg-[#c44724]"
>
Seed top 1000
</button>
<button
id="openCrawlerModalSecondary"
class="rounded-md border border-sfetch-border bg-white px-4 py-2 text-sm font-medium text-sfetch-ink transition hover:border-sfetch-orange hover:text-sfetch-orange"
>
Custom crawl
</button>
</div>
</div>
<div class="mt-4 h-2 overflow-hidden rounded-full bg-sfetch-surfaceSoft">
<div id="seedProgress" class="h-full w-0 bg-sfetch-orange transition-all duration-300"></div>
</div>
<p id="seedStatus" class="mt-3 min-h-5 text-sm text-sfetch-muted">Top-site seed status unavailable.</p>
</section>
</section>
<footer class="border-t border-sfetch-border bg-white px-5 py-4 text-center text-xs text-sfetch-muted">
&copy; 2026 sFetch
</footer>
</main>
<div
id="crawlerModal"
class="pointer-events-none fixed inset-0 z-30 flex items-center justify-center bg-slate-900/35 px-4 opacity-0 transition"
aria-hidden="true"
>
<div class="w-full max-w-xl rounded-lg border border-sfetch-border bg-white p-5 shadow-panel">
<div class="flex items-center justify-between gap-4 border-b border-sfetch-border pb-4">
<h2 class="text-lg font-semibold text-sfetch-ink">Custom crawl</h2>
<button
id="closeCrawlerModal"
class="flex h-9 w-9 items-center justify-center rounded-full text-sfetch-muted transition hover:bg-sfetch-surfaceSoft hover:text-sfetch-ink"
aria-label="Close crawler modal"
>
X
</button>
</div>
<form id="crawlerForm" class="mt-5 space-y-4">
<div>
<label for="seedUrls" class="mb-2 block text-sm font-medium text-sfetch-ink">Seed URLs</label>
<textarea
id="seedUrls"
rows="6"
placeholder="https://example.com&#10;https://docs.python.org/"
class="w-full rounded-md border border-sfetch-border bg-white px-3 py-2 text-sm text-sfetch-ink outline-none transition focus:border-sfetch-blue focus:ring-2 focus:ring-blue-100"
></textarea>
</div>
<div class="grid gap-4 sm:grid-cols-2">
<div>
<label for="crawlDepth" class="mb-2 block text-sm font-medium text-sfetch-ink">Max depth</label>
<input
id="crawlDepth"
type="number"
min="0"
max="5"
value="2"
class="w-full rounded-md border border-sfetch-border bg-white px-3 py-2 text-sm text-sfetch-ink outline-none transition focus:border-sfetch-blue focus:ring-2 focus:ring-blue-100"
/>
</div>
<div>
<label for="maxPagesPerDomain" class="mb-2 block text-sm font-medium text-sfetch-ink">Pages per domain</label>
<input
id="maxPagesPerDomain"
type="number"
min="1"
max="500"
value="50"
class="w-full rounded-md border border-sfetch-border bg-white px-3 py-2 text-sm text-sfetch-ink outline-none transition focus:border-sfetch-blue focus:ring-2 focus:ring-blue-100"
/>
</div>
</div>
<label class="flex items-center gap-3 text-sm text-sfetch-ink">
<input id="sameDomainOnly" type="checkbox" checked class="h-4 w-4 rounded border-sfetch-border text-sfetch-blue" />
Same domain only
</label>
<p id="crawlerStatus" class="min-h-5 text-sm text-sfetch-muted"></p>
<div class="flex flex-col-reverse gap-3 sm:flex-row sm:justify-end">
<button
type="button"
id="cancelCrawler"
class="rounded-md border border-sfetch-border bg-white px-4 py-2 text-sm font-medium text-sfetch-ink transition hover:bg-sfetch-surfaceSoft"
>
Cancel
</button>
<button
type="submit"
class="rounded-md bg-sfetch-blue px-4 py-2 text-sm font-medium text-white transition hover:bg-[#1558b0]"
>
Launch crawl
</button>
</div>
</form>
</div>
</div>
<script>
const API_BASE = "http://localhost:8000";
const searchForm = document.getElementById("searchForm");
const searchInput = document.getElementById("searchInput");
const openCrawlerModal = document.getElementById("openCrawlerModal");
const openCrawlerModalSecondary = document.getElementById("openCrawlerModalSecondary");
const closeCrawlerModal = document.getElementById("closeCrawlerModal");
const cancelCrawler = document.getElementById("cancelCrawler");
const crawlerModal = document.getElementById("crawlerModal");
const crawlerForm = document.getElementById("crawlerForm");
const crawlerStatus = document.getElementById("crawlerStatus");
const seedUrlsField = document.getElementById("seedUrls");
const crawlDepthField = document.getElementById("crawlDepth");
const maxPagesPerDomainField = document.getElementById("maxPagesPerDomain");
const sameDomainOnlyField = document.getElementById("sameDomainOnly");
const statsSummary = document.getElementById("statsSummary");
const seedStatus = document.getElementById("seedStatus");
const seedProgress = document.getElementById("seedProgress");
const seedTopSites = document.getElementById("seedTopSites");
function runSearch(type = "all") {
const query = searchInput.value.trim();
if (!query) {
searchInput.focus();
return;
}
const params = new URLSearchParams({ q: query });
if (type !== "all") {
params.set("type", type);
}
window.location.href = `results.html?${params.toString()}`;
}
function setModalOpen(isOpen) {
crawlerModal.classList.toggle("opacity-0", !isOpen);
crawlerModal.classList.toggle("pointer-events-none", !isOpen);
crawlerModal.setAttribute("aria-hidden", String(!isOpen));
document.body.classList.toggle("modal-open", isOpen);
if (isOpen) {
seedUrlsField.focus();
} else {
crawlerStatus.textContent = "";
}
}
async function refreshStats() {
try {
const response = await fetch(`${API_BASE}/stats`);
const stats = await response.json();
if (!response.ok) {
throw new Error();
}
const lastIndexed = stats.last_indexed_at ? `, last indexed ${stats.last_indexed_at}` : "";
statsSummary.textContent = `${stats.total_pages.toLocaleString()} pages${lastIndexed}`;
} catch {
statsSummary.textContent = "Backend unavailable";
}
}
async function refreshSeedStatus() {
try {
const response = await fetch(`${API_BASE}/crawl/top-sites/status`);
const status = await response.json();
if (!response.ok) {
throw new Error();
}
const total = Number(status.total || 0);
const indexed = Number(status.indexed || 0);
const percent = total > 0 && status.state === "complete" ? 100 : total > 0 ? Math.min(96, (indexed / total) * 100) : 0;
seedProgress.style.width = `${percent}%`;
seedStatus.textContent = `${status.message || "Idle"}${status.source ? ` Source: ${status.source}` : ""}`;
} catch {
seedProgress.style.width = "0%";
seedStatus.textContent = "Top-site seed status unavailable.";
}
}
async function seedTopSitesNow() {
seedTopSites.disabled = true;
seedTopSites.textContent = "Queued";
try {
const response = await fetch(`${API_BASE}/crawl/top-sites`, { method: "POST" });
const data = await response.json().catch(() => ({}));
if (!response.ok) {
throw new Error(data.detail || "Unable to queue top-site seed.");
}
seedStatus.textContent = "Top-site seed queued.";
await refreshSeedStatus();
} catch (error) {
seedStatus.textContent = error.message || "Unable to queue top-site seed.";
} finally {
setTimeout(() => {
seedTopSites.disabled = false;
seedTopSites.textContent = "Seed top 1000";
}, 1200);
}
}
async function handleCrawlerSubmit(event) {
event.preventDefault();
const seedUrls = seedUrlsField.value
.split("\n")
.map((value) => value.trim())
.filter(Boolean);
if (!seedUrls.length) {
crawlerStatus.textContent = "Add at least one seed URL.";
return;
}
const payload = {
seed_urls: seedUrls,
max_depth: Number.parseInt(crawlDepthField.value, 10) || 0,
max_pages_per_domain: Number.parseInt(maxPagesPerDomainField.value, 10) || 1,
same_domain_only: sameDomainOnlyField.checked,
};
crawlerStatus.textContent = "Starting crawl...";
try {
const response = await fetch(`${API_BASE}/crawl`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
const data = await response.json().catch(() => ({}));
if (!response.ok) {
throw new Error(data.detail || "Unable to start the crawler.");
}
crawlerStatus.textContent = `Crawl started for ${seedUrls.length} seed URL${seedUrls.length === 1 ? "" : "s"}.`;
setTimeout(() => {
setModalOpen(false);
refreshStats();
}, 900);
} catch (error) {
crawlerStatus.textContent = error.message || "Unable to start the crawler.";
}
}
searchForm.addEventListener("submit", (event) => {
event.preventDefault();
runSearch("all");
});
document.querySelectorAll("[data-search-type]").forEach((button) => {
button.addEventListener("click", () => runSearch(button.dataset.searchType || "all"));
});
openCrawlerModal.addEventListener("click", () => setModalOpen(true));
openCrawlerModalSecondary.addEventListener("click", () => setModalOpen(true));
closeCrawlerModal.addEventListener("click", () => setModalOpen(false));
cancelCrawler.addEventListener("click", () => setModalOpen(false));
crawlerModal.addEventListener("click", (event) => {
if (event.target === crawlerModal) {
setModalOpen(false);
}
});
seedTopSites.addEventListener("click", seedTopSitesNow);
crawlerForm.addEventListener("submit", handleCrawlerSubmit);
refreshStats();
refreshSeedStatus();
setInterval(refreshStats, 10000);
setInterval(refreshSeedStatus, 5000);
</script>
</body>
</html>