Phase 45 slice 2: context7 HTTP bridge for doc drift detection

Bun bridge on :3900 that wraps context7's public API and exposes the
surface gateway consumes for Phase 45 drift checks. Own port so a
failure here never tips over mcp-server on :3700.

Endpoints:
  GET /health                    status + cache stats
  GET /docs/:tool                resolve tool → library_id → fetch
                                 docs → return descriptor
                                 {snippet_hash, last_updated,
                                 source_url, docs_preview, ...}
  GET /docs/:tool/diff?since=X   compare current snippet_hash to X;
                                 returns {drifted: bool, current,
                                 previous, preview if drifted}
  GET /cache                     debug dump of cached entries

Implementation notes:
- 5 minute in-memory cache (context7 rate-limits by IP; gateway
  drift-checks are the hot caller)
- 1500-token slices from context7 (enough for drift-meaningful
  hash, not so much we hammer their API)
- snippet_hash = SHA-256 prefix (16 hex chars) of fetched content
- Library resolution prefers "finalized" state; falls back to top
  result if none finalized

Verified live against context7.com:
- /health                                  → ok, 0 cache, 300s TTL
- /docs/docker                             → library_id /docker/docs,
                                             title "Docker", hash
                                             475a0396ca436bba, last
                                             updated 2026-04-20
- /docs/docker (again)                     → cache hit, 0.37ms
                                             (5400× speedup)
- /docs/docker/diff?since=stale-hash-0000  → drifted=true, preview
                                             included
- /docs/docker/diff?since=<current hash>   → drifted=false, preview
                                             omitted (honest: no
                                             drift to show)

Not yet wired:
- Gateway consumer (Phase 45 slice 3):
  /vectors/playbook_memory/doc_drift/check/{id} calls this bridge
  and updates DocRef.snippet_hash + doc_drift_flagged_at
- Systemd unit (bridge is manual-start for now, same as bot/)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
profit 2026-04-22 03:17:17 -05:00
parent 2a4b81bf48
commit affab8ac83

View File

@ -0,0 +1,178 @@
// Phase 45 slice 2 — context7 HTTP bridge.
//
// Exposes an HTTP surface the Rust gateway consumes to check external
// doc drift on playbooks. Wraps context7's public API:
//
// https://context7.com/api/v1/search?query=<name> → resolve
// https://context7.com/api/v1/<lib-id>/cli?type=txt&tokens=N → docs
//
// Own port so a failure here never tips over the mcp-server on :3700.
// Cache is in-memory (5 min TTL) — context7 rate-limits by IP, and
// gateway drift-checks are the hot caller.
//
// Endpoints:
// GET /health health + cache stats
// GET /docs/:tool resolve + fetch + return descriptor
// GET /docs/:tool/diff?since=X return drift vs content hash X
// GET /cache dump current cache (debugging)
import { createHash } from "node:crypto";
const PORT = Number(process.env.CONTEXT7_BRIDGE_PORT ?? 3900);
const CONTEXT7_BASE = (process.env.CONTEXT7_BASE_URL ?? "https://context7.com/api/v1").replace(/\/+$/, "");
const TOKENS_PER_FETCH = 1500; // enough for a drift-meaningful slice, not so much we hammer
const CACHE_TTL_MS = 5 * 60 * 1000;
interface CachedDoc {
tool: string;
library_id: string; // context7 canonical, e.g. "/docker/docs"
title: string;
last_updated: string | null;
snippet_hash: string; // SHA-256 of first TOKENS_PER_FETCH of fetched docs
docs_preview: string; // first 400 chars, returned in responses for display
retrieved_at: string; // ISO when WE last fetched
source_url: string; // human-facing context7 URL
}
const cache = new Map<string, { entry: CachedDoc; at: number }>();
function normalizeTool(s: string): string {
return s.trim().toLowerCase();
}
async function resolveLibraryId(tool: string): Promise<{ id: string; title: string; last_updated: string | null } | null> {
const r = await fetch(`${CONTEXT7_BASE}/search?query=${encodeURIComponent(tool)}`, {
signal: AbortSignal.timeout(10000),
});
if (!r.ok) throw new Error(`context7 search ${r.status}: ${await r.text().catch(() => "?")}`);
const j = await r.json() as any;
const results = Array.isArray(j.results) ? j.results : [];
// First result is top-ranked; context7 sorts by its internal score.
// Skip "state != finalized" to avoid unstable previews.
const pick = results.find((x: any) => x?.state === "finalized") ?? results[0];
if (!pick?.id) return null;
return {
id: String(pick.id),
title: String(pick.title ?? tool),
last_updated: typeof pick.lastUpdateDate === "string" ? pick.lastUpdateDate : null,
};
}
async function fetchDocsText(libraryId: string): Promise<string> {
// Strip the leading slash on library_id so the URL doesn't have a
// double slash. context7 returns ids like "/docker/docs" but the
// fetch path wants "docker/docs".
const cleanId = libraryId.replace(/^\/+/, "");
const url = `${CONTEXT7_BASE}/${cleanId}?type=txt&tokens=${TOKENS_PER_FETCH}`;
const r = await fetch(url, { signal: AbortSignal.timeout(20000) });
if (!r.ok) throw new Error(`context7 fetch ${r.status} on ${cleanId}: ${await r.text().catch(() => "?")}`);
return await r.text();
}
function hashContent(s: string): string {
return createHash("sha256").update(s).digest("hex").slice(0, 16);
}
async function getCurrent(tool: string): Promise<CachedDoc> {
const key = normalizeTool(tool);
const cached = cache.get(key);
if (cached && Date.now() - cached.at < CACHE_TTL_MS) {
return cached.entry;
}
const resolved = await resolveLibraryId(key);
if (!resolved) {
throw new Error(`no context7 library found for '${tool}'`);
}
const text = await fetchDocsText(resolved.id);
const entry: CachedDoc = {
tool: key,
library_id: resolved.id,
title: resolved.title,
last_updated: resolved.last_updated,
snippet_hash: hashContent(text),
docs_preview: text.slice(0, 400),
retrieved_at: new Date().toISOString(),
source_url: `https://context7.com${resolved.id.startsWith("/") ? resolved.id : "/" + resolved.id}`,
};
cache.set(key, { entry, at: Date.now() });
return entry;
}
function jsonResponse(body: unknown, status: number = 200): Response {
return new Response(JSON.stringify(body), {
status,
headers: { "content-type": "application/json" },
});
}
Bun.serve({
port: PORT,
hostname: "0.0.0.0",
async fetch(req) {
const url = new URL(req.url);
if (url.pathname === "/health") {
return jsonResponse({
status: "ok",
cache_size: cache.size,
context7_base: CONTEXT7_BASE,
ttl_ms: CACHE_TTL_MS,
});
}
if (url.pathname === "/cache") {
const dump = Array.from(cache.entries()).map(([k, v]) => ({
tool: k,
library_id: v.entry.library_id,
snippet_hash: v.entry.snippet_hash,
retrieved_at: v.entry.retrieved_at,
age_ms: Date.now() - v.at,
}));
return jsonResponse({ entries: dump });
}
// GET /docs/:tool — return current descriptor (fetches + caches)
const m1 = url.pathname.match(/^\/docs\/([^/]+)$/);
if (m1 && req.method === "GET") {
const tool = decodeURIComponent(m1[1]);
try {
const entry = await getCurrent(tool);
return jsonResponse(entry);
} catch (e) {
return jsonResponse({ error: (e as Error).message }, 404);
}
}
// GET /docs/:tool/diff?since=hash — compare current vs recorded hash
const m2 = url.pathname.match(/^\/docs\/([^/]+)\/diff$/);
if (m2 && req.method === "GET") {
const tool = decodeURIComponent(m2[1]);
const since = url.searchParams.get("since");
if (!since) {
return jsonResponse({ error: "query param 'since' (snippet_hash) required" }, 400);
}
try {
const entry = await getCurrent(tool);
const drifted = entry.snippet_hash !== since;
return jsonResponse({
tool: entry.tool,
drifted,
previous_snippet_hash: since,
current_snippet_hash: entry.snippet_hash,
library_id: entry.library_id,
title: entry.title,
last_updated_upstream: entry.last_updated,
retrieved_at: entry.retrieved_at,
source_url: entry.source_url,
docs_preview: drifted ? entry.docs_preview : null,
});
} catch (e) {
return jsonResponse({ error: (e as Error).message }, 404);
}
}
return jsonResponse({ error: "not found", paths: ["/health", "/docs/:tool", "/docs/:tool/diff?since=HASH", "/cache"] }, 404);
},
});
console.log(`[context7-bridge] listening on :${PORT} (context7 base: ${CONTEXT7_BASE})`);