Fix: gateway defaulted to wrong vector index (10K instead of 50K)

All gateway endpoints pointed to ethereal_workers_v1 (10K, W- prefix)
instead of workers_500k_v1 (50K, W500K- prefix). Filters appeared
broken because the vector results came from the wrong dataset —
IDs matched numerically but belonged to different workers.

Now: every search, match, and hybrid call uses workers_500k_v1.
Verified: 'experienced welder' + state=OH + role=Welder returns
5 Welders in OH (Carmen Perry, Janet White, Rachel Miller, etc).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-17 13:16:11 -05:00
parent f9e2a0bbbe
commit 5c93338f40

View File

@ -74,7 +74,7 @@ server.tool(
top_k: z.number().default(5), top_k: z.number().default(5),
}, },
async ({ question, sql_filter, dataset, id_column, top_k }) => { async ({ question, sql_filter, dataset, id_column, top_k }) => {
const body: any = { question, index_name: "ethereal_workers_v1", filter_dataset: dataset, id_column, top_k, generate: true }; const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true };
if (sql_filter) body.sql_filter = sql_filter; if (sql_filter) body.sql_filter = sql_filter;
const r = await api("POST", "/vectors/hybrid", body); const r = await api("POST", "/vectors/hybrid", body);
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] }; return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
@ -106,7 +106,7 @@ server.tool(
if (city) filter += ` AND city = '${city}'`; if (city) filter += ` AND city = '${city}'`;
const r = await api("POST", "/vectors/hybrid", { const r = await api("POST", "/vectors/hybrid", {
question: `Find the best ${role} workers with relevant skills and certifications`, question: `Find the best ${role} workers with relevant skills and certifications`,
index_name: "ethereal_workers_v1", sql_filter: filter, index_name: "workers_500k_v1", sql_filter: filter,
filter_dataset: "ethereal_workers", id_column: "worker_id", filter_dataset: "ethereal_workers", id_column: "worker_id",
top_k: headcount * 2, generate: false, top_k: headcount * 2, generate: false,
}); });
@ -139,7 +139,7 @@ server.tool(
server.tool( server.tool(
"rag_question", "rag_question",
"Natural language question answered via RAG (embed → search → retrieve → generate). For open-ended questions where SQL alone isn't enough.", "Natural language question answered via RAG (embed → search → retrieve → generate). For open-ended questions where SQL alone isn't enough.",
{ question: z.string(), index: z.string().default("ethereal_workers_v1"), top_k: z.number().default(5) }, { question: z.string(), index: z.string().default("workers_500k_v1"), top_k: z.number().default(5) },
async ({ question, index, top_k }) => { async ({ question, index, top_k }) => {
const r = await api("POST", "/vectors/rag", { index_name: index, question, top_k }); const r = await api("POST", "/vectors/rag", { index_name: index, question, top_k });
return { content: [{ type: "text" as const, text: r.error ? `RAG Error: ${r.error}` : `Answer: ${r.answer}\n\nSources: ${r.sources?.length || 0}` }] }; return { content: [{ type: "text" as const, text: r.error ? `RAG Error: ${r.error}` : `Answer: ${r.answer}\n\nSources: ${r.sources?.length || 0}` }] };
@ -381,7 +381,7 @@ async function main() {
if (url.pathname === "/search") { if (url.pathname === "/search") {
const b = await json(); const b = await json();
return ok(await api("POST", "/vectors/hybrid", { return ok(await api("POST", "/vectors/hybrid", {
question: b.question, index_name: b.index || "ethereal_workers_v1", question: b.question, index_name: b.index || "workers_500k_v1",
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers", sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false, id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
})); }));
@ -400,7 +400,7 @@ async function main() {
if (b.city) filter += ` AND city = '${b.city}'`; if (b.city) filter += ` AND city = '${b.city}'`;
return ok(await api("POST", "/vectors/hybrid", { return ok(await api("POST", "/vectors/hybrid", {
question: `Best ${b.role} workers with relevant skills`, question: `Best ${b.role} workers with relevant skills`,
index_name: b.index || "ethereal_workers_v1", sql_filter: filter, index_name: b.index || "workers_500k_v1", sql_filter: filter,
filter_dataset: b.dataset || "ethereal_workers", filter_dataset: b.dataset || "ethereal_workers",
id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false, id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
})); }));
@ -415,7 +415,7 @@ async function main() {
// Tool: RAG // Tool: RAG
if (url.pathname === "/ask") { if (url.pathname === "/ask") {
const b = await json(); const b = await json();
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "ethereal_workers_v1", question: b.question, top_k: b.top_k || 5 })); return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
} }
// Tool: log success // Tool: log success
@ -954,7 +954,7 @@ async function runWeekSimulation() {
const filt = `role = '${role}' AND state = '${state}' AND reliability >= ${minRel}`; const filt = `role = '${role}' AND state = '${state}' AND reliability >= ${minRel}`;
const r = await api("POST", "/vectors/hybrid", { const r = await api("POST", "/vectors/hybrid", {
question: `Find ${role} workers for ${pick(NOTES)}`, question: `Find ${role} workers for ${pick(NOTES)}`,
index_name: "ethereal_workers_v1", index_name: "workers_500k_v1",
sql_filter: filt, sql_filter: filt,
filter_dataset: "ethereal_workers", filter_dataset: "ethereal_workers",
id_column: "worker_id", id_column: "worker_id",