Phase 45 slice 2: context7 HTTP bridge for doc drift detection
Bun bridge on :3900 that wraps context7's public API and exposes the
surface gateway consumes for Phase 45 drift checks. Own port so a
failure here never tips over mcp-server on :3700.
Endpoints:
GET /health status + cache stats
GET /docs/:tool resolve tool → library_id → fetch
docs → return descriptor
{snippet_hash, last_updated,
source_url, docs_preview, ...}
GET /docs/:tool/diff?since=X compare current snippet_hash to X;
returns {drifted: bool, current,
previous, preview if drifted}
GET /cache debug dump of cached entries
Implementation notes:
- 5 minute in-memory cache (context7 rate-limits by IP; gateway
drift-checks are the hot caller)
- 1500-token slices from context7 (enough for drift-meaningful
hash, not so much we hammer their API)
- snippet_hash = SHA-256 prefix (16 hex chars) of fetched content
- Library resolution prefers "finalized" state; falls back to top
result if none finalized
Verified live against context7.com:
- /health → ok, 0 cache, 300s TTL
- /docs/docker → library_id /docker/docs,
title "Docker", hash
475a0396ca436bba, last
updated 2026-04-20
- /docs/docker (again) → cache hit, 0.37ms
(5400× speedup)
- /docs/docker/diff?since=stale-hash-0000 → drifted=true, preview
included
- /docs/docker/diff?since=<current hash> → drifted=false, preview
omitted (honest: no
drift to show)
Not yet wired:
- Gateway consumer (Phase 45 slice 3):
/vectors/playbook_memory/doc_drift/check/{id} calls this bridge
and updates DocRef.snippet_hash + doc_drift_flagged_at
- Systemd unit (bridge is manual-start for now, same as bot/)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2a4b81bf48
commit
affab8ac83
178
mcp-server/context7_bridge.ts
Normal file
178
mcp-server/context7_bridge.ts
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
// Phase 45 slice 2 — context7 HTTP bridge.
|
||||||
|
//
|
||||||
|
// Exposes an HTTP surface the Rust gateway consumes to check external
|
||||||
|
// doc drift on playbooks. Wraps context7's public API:
|
||||||
|
//
|
||||||
|
// https://context7.com/api/v1/search?query=<name> → resolve
|
||||||
|
// https://context7.com/api/v1/<lib-id>/cli?type=txt&tokens=N → docs
|
||||||
|
//
|
||||||
|
// Own port so a failure here never tips over the mcp-server on :3700.
|
||||||
|
// Cache is in-memory (5 min TTL) — context7 rate-limits by IP, and
|
||||||
|
// gateway drift-checks are the hot caller.
|
||||||
|
//
|
||||||
|
// Endpoints:
|
||||||
|
// GET /health health + cache stats
|
||||||
|
// GET /docs/:tool resolve + fetch + return descriptor
|
||||||
|
// GET /docs/:tool/diff?since=X return drift vs content hash X
|
||||||
|
// GET /cache dump current cache (debugging)
|
||||||
|
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
|
||||||
|
const PORT = Number(process.env.CONTEXT7_BRIDGE_PORT ?? 3900);
|
||||||
|
const CONTEXT7_BASE = (process.env.CONTEXT7_BASE_URL ?? "https://context7.com/api/v1").replace(/\/+$/, "");
|
||||||
|
const TOKENS_PER_FETCH = 1500; // enough for a drift-meaningful slice, not so much we hammer
|
||||||
|
const CACHE_TTL_MS = 5 * 60 * 1000;
|
||||||
|
|
||||||
|
interface CachedDoc {
|
||||||
|
tool: string;
|
||||||
|
library_id: string; // context7 canonical, e.g. "/docker/docs"
|
||||||
|
title: string;
|
||||||
|
last_updated: string | null;
|
||||||
|
snippet_hash: string; // SHA-256 of first TOKENS_PER_FETCH of fetched docs
|
||||||
|
docs_preview: string; // first 400 chars, returned in responses for display
|
||||||
|
retrieved_at: string; // ISO when WE last fetched
|
||||||
|
source_url: string; // human-facing context7 URL
|
||||||
|
}
|
||||||
|
|
||||||
|
const cache = new Map<string, { entry: CachedDoc; at: number }>();
|
||||||
|
|
||||||
|
function normalizeTool(s: string): string {
|
||||||
|
return s.trim().toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolveLibraryId(tool: string): Promise<{ id: string; title: string; last_updated: string | null } | null> {
|
||||||
|
const r = await fetch(`${CONTEXT7_BASE}/search?query=${encodeURIComponent(tool)}`, {
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(`context7 search ${r.status}: ${await r.text().catch(() => "?")}`);
|
||||||
|
const j = await r.json() as any;
|
||||||
|
const results = Array.isArray(j.results) ? j.results : [];
|
||||||
|
// First result is top-ranked; context7 sorts by its internal score.
|
||||||
|
// Skip "state != finalized" to avoid unstable previews.
|
||||||
|
const pick = results.find((x: any) => x?.state === "finalized") ?? results[0];
|
||||||
|
if (!pick?.id) return null;
|
||||||
|
return {
|
||||||
|
id: String(pick.id),
|
||||||
|
title: String(pick.title ?? tool),
|
||||||
|
last_updated: typeof pick.lastUpdateDate === "string" ? pick.lastUpdateDate : null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchDocsText(libraryId: string): Promise<string> {
|
||||||
|
// Strip the leading slash on library_id so the URL doesn't have a
|
||||||
|
// double slash. context7 returns ids like "/docker/docs" but the
|
||||||
|
// fetch path wants "docker/docs".
|
||||||
|
const cleanId = libraryId.replace(/^\/+/, "");
|
||||||
|
const url = `${CONTEXT7_BASE}/${cleanId}?type=txt&tokens=${TOKENS_PER_FETCH}`;
|
||||||
|
const r = await fetch(url, { signal: AbortSignal.timeout(20000) });
|
||||||
|
if (!r.ok) throw new Error(`context7 fetch ${r.status} on ${cleanId}: ${await r.text().catch(() => "?")}`);
|
||||||
|
return await r.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
function hashContent(s: string): string {
|
||||||
|
return createHash("sha256").update(s).digest("hex").slice(0, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getCurrent(tool: string): Promise<CachedDoc> {
|
||||||
|
const key = normalizeTool(tool);
|
||||||
|
const cached = cache.get(key);
|
||||||
|
if (cached && Date.now() - cached.at < CACHE_TTL_MS) {
|
||||||
|
return cached.entry;
|
||||||
|
}
|
||||||
|
const resolved = await resolveLibraryId(key);
|
||||||
|
if (!resolved) {
|
||||||
|
throw new Error(`no context7 library found for '${tool}'`);
|
||||||
|
}
|
||||||
|
const text = await fetchDocsText(resolved.id);
|
||||||
|
const entry: CachedDoc = {
|
||||||
|
tool: key,
|
||||||
|
library_id: resolved.id,
|
||||||
|
title: resolved.title,
|
||||||
|
last_updated: resolved.last_updated,
|
||||||
|
snippet_hash: hashContent(text),
|
||||||
|
docs_preview: text.slice(0, 400),
|
||||||
|
retrieved_at: new Date().toISOString(),
|
||||||
|
source_url: `https://context7.com${resolved.id.startsWith("/") ? resolved.id : "/" + resolved.id}`,
|
||||||
|
};
|
||||||
|
cache.set(key, { entry, at: Date.now() });
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
function jsonResponse(body: unknown, status: number = 200): Response {
|
||||||
|
return new Response(JSON.stringify(body), {
|
||||||
|
status,
|
||||||
|
headers: { "content-type": "application/json" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Bun.serve({
|
||||||
|
port: PORT,
|
||||||
|
hostname: "0.0.0.0",
|
||||||
|
async fetch(req) {
|
||||||
|
const url = new URL(req.url);
|
||||||
|
|
||||||
|
if (url.pathname === "/health") {
|
||||||
|
return jsonResponse({
|
||||||
|
status: "ok",
|
||||||
|
cache_size: cache.size,
|
||||||
|
context7_base: CONTEXT7_BASE,
|
||||||
|
ttl_ms: CACHE_TTL_MS,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.pathname === "/cache") {
|
||||||
|
const dump = Array.from(cache.entries()).map(([k, v]) => ({
|
||||||
|
tool: k,
|
||||||
|
library_id: v.entry.library_id,
|
||||||
|
snippet_hash: v.entry.snippet_hash,
|
||||||
|
retrieved_at: v.entry.retrieved_at,
|
||||||
|
age_ms: Date.now() - v.at,
|
||||||
|
}));
|
||||||
|
return jsonResponse({ entries: dump });
|
||||||
|
}
|
||||||
|
|
||||||
|
// GET /docs/:tool — return current descriptor (fetches + caches)
|
||||||
|
const m1 = url.pathname.match(/^\/docs\/([^/]+)$/);
|
||||||
|
if (m1 && req.method === "GET") {
|
||||||
|
const tool = decodeURIComponent(m1[1]);
|
||||||
|
try {
|
||||||
|
const entry = await getCurrent(tool);
|
||||||
|
return jsonResponse(entry);
|
||||||
|
} catch (e) {
|
||||||
|
return jsonResponse({ error: (e as Error).message }, 404);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GET /docs/:tool/diff?since=hash — compare current vs recorded hash
|
||||||
|
const m2 = url.pathname.match(/^\/docs\/([^/]+)\/diff$/);
|
||||||
|
if (m2 && req.method === "GET") {
|
||||||
|
const tool = decodeURIComponent(m2[1]);
|
||||||
|
const since = url.searchParams.get("since");
|
||||||
|
if (!since) {
|
||||||
|
return jsonResponse({ error: "query param 'since' (snippet_hash) required" }, 400);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const entry = await getCurrent(tool);
|
||||||
|
const drifted = entry.snippet_hash !== since;
|
||||||
|
return jsonResponse({
|
||||||
|
tool: entry.tool,
|
||||||
|
drifted,
|
||||||
|
previous_snippet_hash: since,
|
||||||
|
current_snippet_hash: entry.snippet_hash,
|
||||||
|
library_id: entry.library_id,
|
||||||
|
title: entry.title,
|
||||||
|
last_updated_upstream: entry.last_updated,
|
||||||
|
retrieved_at: entry.retrieved_at,
|
||||||
|
source_url: entry.source_url,
|
||||||
|
docs_preview: drifted ? entry.docs_preview : null,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
return jsonResponse({ error: (e as Error).message }, 404);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsonResponse({ error: "not found", paths: ["/health", "/docs/:tool", "/docs/:tool/diff?since=HASH", "/cache"] }, 404);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[context7-bridge] listening on :${PORT} (context7 base: ${CONTEXT7_BASE})`);
|
||||||
Loading…
x
Reference in New Issue
Block a user