From affab8ac8329bafd38ed3beb83d06c85424802d5 Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 03:17:17 -0500 Subject: [PATCH] Phase 45 slice 2: context7 HTTP bridge for doc drift detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bun bridge on :3900 that wraps context7's public API and exposes the surface gateway consumes for Phase 45 drift checks. Own port so a failure here never tips over mcp-server on :3700. Endpoints: GET /health status + cache stats GET /docs/:tool resolve tool → library_id → fetch docs → return descriptor {snippet_hash, last_updated, source_url, docs_preview, ...} GET /docs/:tool/diff?since=X compare current snippet_hash to X; returns {drifted: bool, current, previous, preview if drifted} GET /cache debug dump of cached entries Implementation notes: - 5 minute in-memory cache (context7 rate-limits by IP; gateway drift-checks are the hot caller) - 1500-token slices from context7 (enough for drift-meaningful hash, not so much we hammer their API) - snippet_hash = SHA-256 prefix (16 hex chars) of fetched content - Library resolution prefers "finalized" state; falls back to top result if none finalized Verified live against context7.com: - /health → ok, 0 cache, 300s TTL - /docs/docker → library_id /docker/docs, title "Docker", hash 475a0396ca436bba, last updated 2026-04-20 - /docs/docker (again) → cache hit, 0.37ms (5400× speedup) - /docs/docker/diff?since=stale-hash-0000 → drifted=true, preview included - /docs/docker/diff?since= → drifted=false, preview omitted (honest: no drift to show) Not yet wired: - Gateway consumer (Phase 45 slice 3): /vectors/playbook_memory/doc_drift/check/{id} calls this bridge and updates DocRef.snippet_hash + doc_drift_flagged_at - Systemd unit (bridge is manual-start for now, same as bot/) Co-Authored-By: Claude Opus 4.7 (1M context) --- mcp-server/context7_bridge.ts | 178 ++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 mcp-server/context7_bridge.ts diff --git a/mcp-server/context7_bridge.ts b/mcp-server/context7_bridge.ts new file mode 100644 index 0000000..4e705f1 --- /dev/null +++ b/mcp-server/context7_bridge.ts @@ -0,0 +1,178 @@ +// Phase 45 slice 2 — context7 HTTP bridge. +// +// Exposes an HTTP surface the Rust gateway consumes to check external +// doc drift on playbooks. Wraps context7's public API: +// +// https://context7.com/api/v1/search?query= → resolve +// https://context7.com/api/v1//cli?type=txt&tokens=N → docs +// +// Own port so a failure here never tips over the mcp-server on :3700. +// Cache is in-memory (5 min TTL) — context7 rate-limits by IP, and +// gateway drift-checks are the hot caller. +// +// Endpoints: +// GET /health health + cache stats +// GET /docs/:tool resolve + fetch + return descriptor +// GET /docs/:tool/diff?since=X return drift vs content hash X +// GET /cache dump current cache (debugging) + +import { createHash } from "node:crypto"; + +const PORT = Number(process.env.CONTEXT7_BRIDGE_PORT ?? 3900); +const CONTEXT7_BASE = (process.env.CONTEXT7_BASE_URL ?? "https://context7.com/api/v1").replace(/\/+$/, ""); +const TOKENS_PER_FETCH = 1500; // enough for a drift-meaningful slice, not so much we hammer +const CACHE_TTL_MS = 5 * 60 * 1000; + +interface CachedDoc { + tool: string; + library_id: string; // context7 canonical, e.g. "/docker/docs" + title: string; + last_updated: string | null; + snippet_hash: string; // SHA-256 of first TOKENS_PER_FETCH of fetched docs + docs_preview: string; // first 400 chars, returned in responses for display + retrieved_at: string; // ISO when WE last fetched + source_url: string; // human-facing context7 URL +} + +const cache = new Map(); + +function normalizeTool(s: string): string { + return s.trim().toLowerCase(); +} + +async function resolveLibraryId(tool: string): Promise<{ id: string; title: string; last_updated: string | null } | null> { + const r = await fetch(`${CONTEXT7_BASE}/search?query=${encodeURIComponent(tool)}`, { + signal: AbortSignal.timeout(10000), + }); + if (!r.ok) throw new Error(`context7 search ${r.status}: ${await r.text().catch(() => "?")}`); + const j = await r.json() as any; + const results = Array.isArray(j.results) ? j.results : []; + // First result is top-ranked; context7 sorts by its internal score. + // Skip "state != finalized" to avoid unstable previews. + const pick = results.find((x: any) => x?.state === "finalized") ?? results[0]; + if (!pick?.id) return null; + return { + id: String(pick.id), + title: String(pick.title ?? tool), + last_updated: typeof pick.lastUpdateDate === "string" ? pick.lastUpdateDate : null, + }; +} + +async function fetchDocsText(libraryId: string): Promise { + // Strip the leading slash on library_id so the URL doesn't have a + // double slash. context7 returns ids like "/docker/docs" but the + // fetch path wants "docker/docs". + const cleanId = libraryId.replace(/^\/+/, ""); + const url = `${CONTEXT7_BASE}/${cleanId}?type=txt&tokens=${TOKENS_PER_FETCH}`; + const r = await fetch(url, { signal: AbortSignal.timeout(20000) }); + if (!r.ok) throw new Error(`context7 fetch ${r.status} on ${cleanId}: ${await r.text().catch(() => "?")}`); + return await r.text(); +} + +function hashContent(s: string): string { + return createHash("sha256").update(s).digest("hex").slice(0, 16); +} + +async function getCurrent(tool: string): Promise { + const key = normalizeTool(tool); + const cached = cache.get(key); + if (cached && Date.now() - cached.at < CACHE_TTL_MS) { + return cached.entry; + } + const resolved = await resolveLibraryId(key); + if (!resolved) { + throw new Error(`no context7 library found for '${tool}'`); + } + const text = await fetchDocsText(resolved.id); + const entry: CachedDoc = { + tool: key, + library_id: resolved.id, + title: resolved.title, + last_updated: resolved.last_updated, + snippet_hash: hashContent(text), + docs_preview: text.slice(0, 400), + retrieved_at: new Date().toISOString(), + source_url: `https://context7.com${resolved.id.startsWith("/") ? resolved.id : "/" + resolved.id}`, + }; + cache.set(key, { entry, at: Date.now() }); + return entry; +} + +function jsonResponse(body: unknown, status: number = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { "content-type": "application/json" }, + }); +} + +Bun.serve({ + port: PORT, + hostname: "0.0.0.0", + async fetch(req) { + const url = new URL(req.url); + + if (url.pathname === "/health") { + return jsonResponse({ + status: "ok", + cache_size: cache.size, + context7_base: CONTEXT7_BASE, + ttl_ms: CACHE_TTL_MS, + }); + } + + if (url.pathname === "/cache") { + const dump = Array.from(cache.entries()).map(([k, v]) => ({ + tool: k, + library_id: v.entry.library_id, + snippet_hash: v.entry.snippet_hash, + retrieved_at: v.entry.retrieved_at, + age_ms: Date.now() - v.at, + })); + return jsonResponse({ entries: dump }); + } + + // GET /docs/:tool — return current descriptor (fetches + caches) + const m1 = url.pathname.match(/^\/docs\/([^/]+)$/); + if (m1 && req.method === "GET") { + const tool = decodeURIComponent(m1[1]); + try { + const entry = await getCurrent(tool); + return jsonResponse(entry); + } catch (e) { + return jsonResponse({ error: (e as Error).message }, 404); + } + } + + // GET /docs/:tool/diff?since=hash — compare current vs recorded hash + const m2 = url.pathname.match(/^\/docs\/([^/]+)\/diff$/); + if (m2 && req.method === "GET") { + const tool = decodeURIComponent(m2[1]); + const since = url.searchParams.get("since"); + if (!since) { + return jsonResponse({ error: "query param 'since' (snippet_hash) required" }, 400); + } + try { + const entry = await getCurrent(tool); + const drifted = entry.snippet_hash !== since; + return jsonResponse({ + tool: entry.tool, + drifted, + previous_snippet_hash: since, + current_snippet_hash: entry.snippet_hash, + library_id: entry.library_id, + title: entry.title, + last_updated_upstream: entry.last_updated, + retrieved_at: entry.retrieved_at, + source_url: entry.source_url, + docs_preview: drifted ? entry.docs_preview : null, + }); + } catch (e) { + return jsonResponse({ error: (e as Error).message }, 404); + } + } + + return jsonResponse({ error: "not found", paths: ["/health", "/docs/:tool", "/docs/:tool/diff?since=HASH", "/cache"] }, 404); + }, +}); + +console.log(`[context7-bridge] listening on :${PORT} (context7 base: ${CONTEXT7_BASE})`);