// Architecture smoke test — exercises the lakehouse substrate against // the 500k-worker reference dataset. Proves the architecture's core // claims from docs/PRD.md §Shared requirements end-to-end in <60s, // without cloud calls. // // Each section tests ONE invariant. PASS or FAIL printed per section // with the actual measurement. A FAIL tells us exactly where the // architecture broke; the failure is the driver for the next commit. // // Run: bun run tests/architecture_smoke.ts // // Expected preconditions (checked at top): // - gateway on :3100 // - sidecar on :3200 (for embed endpoint) // - workers_500k dataset registered in catalog (500k rows) // - workers_500k_v1 vector index built (50k chunks, 768d nomic) // - primary bucket accessible const GATEWAY = process.env.GATEWAY_URL ?? "http://localhost:3100"; const SIDECAR = process.env.SIDECAR_URL ?? "http://localhost:3200"; type Check = { name: string; ok: boolean; measure?: string; err?: string; }; const results: Check[] = []; function record(name: string, ok: boolean, measure?: string, err?: string) { results.push({ name, ok, measure, err }); const marker = ok ? "✓" : "✗"; const detail = ok ? (measure ?? "") : (err ?? ""); console.log(` ${marker} ${name.padEnd(50)} ${detail}`); } async function http(path: string, init?: RequestInit): Promise { const r = await fetch(`${GATEWAY}${path}`, { ...init, headers: { "content-type": "application/json", ...(init?.headers ?? {}) }, }); if (!r.ok) throw new Error(`${path} ${r.status}: ${await r.text().catch(() => "?")}`); return r.json() as Promise; } async function main() { const t0 = Date.now(); console.log(`\n━━━ Architecture smoke test ━━━`); console.log(`Gateway: ${GATEWAY}`); console.log(`Sidecar: ${SIDECAR}`); console.log(); // ─── 1. Preconditions ───────────────────────────────────────── console.log("【 1 · preconditions 】"); try { const h = await fetch(`${GATEWAY}/health`); record("gateway /health", h.ok, `HTTP ${h.status}`); } catch (e) { record("gateway /health", false, undefined, String(e)); } try { const h = await fetch(`${SIDECAR}/health`); record("sidecar /health", h.ok, `HTTP ${h.status}`); } catch (e) { record("sidecar /health", false, undefined, String(e)); } // ─── 2. Catalog — invariant: any dataset is queryable by name ── console.log("\n【 2 · catalog lookup 】"); try { const ds = await http("/catalog/datasets"); const w500 = ds.find((d) => d.name === "workers_500k"); if (w500) { record("workers_500k in catalog", true, `${w500.row_count ?? "?"} rows`); } else { record("workers_500k in catalog", false, undefined, "not found"); } } catch (e) { record("workers_500k in catalog", false, undefined, String(e)); } // ─── 3. SQL at scale — PRD §shared: millions of rows, sub-second ── console.log("\n【 3 · SQL at scale 】"); await timed("count(*) on workers_500k", async () => { const j = await http("/query/sql", { method: "POST", body: JSON.stringify({ sql: "SELECT COUNT(*) as n FROM workers_500k" }), }); const n = j?.rows?.[0]?.n ?? 0; if (n < 400_000) throw new Error(`only ${n} rows — expected ~500k`); return `${n.toLocaleString()} rows`; }); await timed("geo filter on workers_500k", async () => { const j = await http("/query/sql", { method: "POST", body: JSON.stringify({ sql: "SELECT COUNT(*) as n FROM workers_500k WHERE state = 'OH'", }), }); return `${(j?.rows?.[0]?.n ?? 0).toLocaleString()} Ohio workers`; }); // ─── 4. Vector search — PRD §shared: AI embeddings over a profile index ── // Gateway endpoint takes TEXT + embeds internally (uses the sidecar's // /embed under the hood). We don't pre-embed on the client side. console.log("\n【 4 · vector search 】"); await timed("vector /vectors/search on workers_500k_v1", async () => { const j = await http("/vectors/search", { method: "POST", body: JSON.stringify({ index_name: "workers_500k_v1", query: "experienced welder for industrial work", top_k: 10, }), }); const results = j?.results ?? []; if (!Array.isArray(results) || results.length === 0) { throw new Error(`no results: ${JSON.stringify(j).slice(0, 200)}`); } return `${results.length} results`; }); // ─── 5. Hybrid SQL+vector — PRD §shared: scoped view via filter ── console.log("\n【 5 · hybrid search 】"); await timed("hybrid: 'welder in Toledo, OH', SQL-filtered", async () => { const j = await http("/vectors/hybrid", { method: "POST", body: JSON.stringify({ index_name: "workers_500k_v1", question: "experienced welder", sql_filter: "state = 'OH' AND city = 'Toledo'", k: 10, }), }); const results = j?.results ?? j?.hits ?? []; if (!Array.isArray(results)) { throw new Error(`malformed response: ${JSON.stringify(j).slice(0, 200)}`); } return `${results.length} Toledo-OH hits`; }); // ─── 6. Playbook memory — PRD §shared: trials as first-class data ── console.log("\n【 6 · playbook memory 】"); try { const s = await http("/vectors/playbook_memory/stats"); const entries = s?.entries_count ?? s?.count ?? 0; record("playbook_memory populated", true, `${entries} entries`); } catch (e) { record("playbook_memory populated", false, undefined, String(e)); } // ─── 7. Pathway memory (ADR-021) — the compounding layer ── console.log("\n【 7 · pathway memory (ADR-021) 】"); try { const s = await http("/vectors/pathway/stats"); record( "pathway_memory populated", s.total_pathways > 0, `${s.total_pathways} traces, ${s.retired} retired, ${s.with_audit_pass} audit-pass`, ); // Probe the bug_fingerprints endpoint we rely on for preamble const bf = await http("/vectors/pathway/bug_fingerprints", { method: "POST", body: JSON.stringify({ task_class: "scrum_review", file_path: "crates/queryd/src/delta.rs", signal_class: null, limit: 5, }), }); record( "bug_fingerprints endpoint", Array.isArray(bf.fingerprints), `${bf.fingerprints?.length ?? 0} patterns`, ); } catch (e) { record("pathway_memory stats", false, undefined, String(e)); } // ─── 8. Truth gate — SQL safety on query path ── console.log("\n【 8 · truth gate (SQL safety) 】"); try { const r = await fetch(`${GATEWAY}/query/sql`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ sql: "DROP TABLE workers_500k" }), }); record( "destructive SQL blocked on /query/sql", r.status === 403, `HTTP ${r.status}`, r.status === 403 ? undefined : `expected 403, got ${r.status}`, ); } catch (e) { record("destructive SQL blocked on /query/sql", false, undefined, String(e)); } // ─── Summary ───────────────────────────────────────────────── const elapsed = Date.now() - t0; const passed = results.filter((r) => r.ok).length; const failed = results.filter((r) => !r.ok).length; console.log(); console.log(`━━━ Summary ━━━`); console.log(` ${passed}/${passed + failed} passed · ${elapsed}ms elapsed`); if (failed > 0) { console.log(); console.log(`FAILED checks — these are the next things to fix:`); for (const r of results) { if (!r.ok) console.log(` ✗ ${r.name}: ${r.err ?? "?"}`); } process.exit(1); } console.log(`✓ architecture smoke passed`); } async function timed(name: string, fn: () => Promise) { const t0 = Date.now(); try { const detail = await fn(); const ms = Date.now() - t0; record(name, true, `${ms}ms ${detail ? `· ${detail}` : ""}`); } catch (e) { const ms = Date.now() - t0; record(name, false, `${ms}ms`, String(e)); } } await main();