From 5c93338f40856cf946f6d69376912bbb2e2869b4 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 17 Apr 2026 13:16:11 -0500 Subject: [PATCH] Fix: gateway defaulted to wrong vector index (10K instead of 50K) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All gateway endpoints pointed to ethereal_workers_v1 (10K, W- prefix) instead of workers_500k_v1 (50K, W500K- prefix). Filters appeared broken because the vector results came from the wrong dataset — IDs matched numerically but belonged to different workers. Now: every search, match, and hybrid call uses workers_500k_v1. Verified: 'experienced welder' + state=OH + role=Welder returns 5 Welders in OH (Carmen Perry, Janet White, Rachel Miller, etc). Co-Authored-By: Claude Opus 4.6 (1M context) --- mcp-server/index.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mcp-server/index.ts b/mcp-server/index.ts index e25eb7f..b454249 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -74,7 +74,7 @@ server.tool( top_k: z.number().default(5), }, async ({ question, sql_filter, dataset, id_column, top_k }) => { - const body: any = { question, index_name: "ethereal_workers_v1", filter_dataset: dataset, id_column, top_k, generate: true }; + const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true }; if (sql_filter) body.sql_filter = sql_filter; const r = await api("POST", "/vectors/hybrid", body); return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] }; @@ -106,7 +106,7 @@ server.tool( if (city) filter += ` AND city = '${city}'`; const r = await api("POST", "/vectors/hybrid", { question: `Find the best ${role} workers with relevant skills and certifications`, - index_name: "ethereal_workers_v1", sql_filter: filter, + index_name: "workers_500k_v1", sql_filter: filter, filter_dataset: "ethereal_workers", id_column: "worker_id", top_k: headcount * 2, generate: false, }); @@ -139,7 +139,7 @@ server.tool( server.tool( "rag_question", "Natural language question answered via RAG (embed → search → retrieve → generate). For open-ended questions where SQL alone isn't enough.", - { question: z.string(), index: z.string().default("ethereal_workers_v1"), top_k: z.number().default(5) }, + { question: z.string(), index: z.string().default("workers_500k_v1"), top_k: z.number().default(5) }, async ({ question, index, top_k }) => { const r = await api("POST", "/vectors/rag", { index_name: index, question, top_k }); return { content: [{ type: "text" as const, text: r.error ? `RAG Error: ${r.error}` : `Answer: ${r.answer}\n\nSources: ${r.sources?.length || 0}` }] }; @@ -381,7 +381,7 @@ async function main() { if (url.pathname === "/search") { const b = await json(); return ok(await api("POST", "/vectors/hybrid", { - question: b.question, index_name: b.index || "ethereal_workers_v1", + question: b.question, index_name: b.index || "workers_500k_v1", sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers", id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false, })); @@ -400,7 +400,7 @@ async function main() { if (b.city) filter += ` AND city = '${b.city}'`; return ok(await api("POST", "/vectors/hybrid", { question: `Best ${b.role} workers with relevant skills`, - index_name: b.index || "ethereal_workers_v1", sql_filter: filter, + index_name: b.index || "workers_500k_v1", sql_filter: filter, filter_dataset: b.dataset || "ethereal_workers", id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false, })); @@ -415,7 +415,7 @@ async function main() { // Tool: RAG if (url.pathname === "/ask") { const b = await json(); - return ok(await api("POST", "/vectors/rag", { index_name: b.index || "ethereal_workers_v1", question: b.question, top_k: b.top_k || 5 })); + return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 })); } // Tool: log success @@ -954,7 +954,7 @@ async function runWeekSimulation() { const filt = `role = '${role}' AND state = '${state}' AND reliability >= ${minRel}`; const r = await api("POST", "/vectors/hybrid", { question: `Find ${role} workers for ${pick(NOTES)}`, - index_name: "ethereal_workers_v1", + index_name: "workers_500k_v1", sql_filter: filt, filter_dataset: "ethereal_workers", id_column: "worker_id",