Fix: gateway defaulted to wrong vector index (10K instead of 50K)

All gateway endpoints pointed to ethereal_workers_v1 (10K, W- prefix)
instead of workers_500k_v1 (50K, W500K- prefix). Filters appeared
broken because the vector results came from the wrong dataset —
IDs matched numerically but belonged to different workers.

Now: every search, match, and hybrid call uses workers_500k_v1.
Verified: 'experienced welder' + state=OH + role=Welder returns
5 Welders in OH (Carmen Perry, Janet White, Rachel Miller, etc).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-17 13:16:11 -05:00
parent f9e2a0bbbe
commit 5c93338f40

View File

@ -74,7 +74,7 @@ server.tool(
top_k: z.number().default(5),
},
async ({ question, sql_filter, dataset, id_column, top_k }) => {
const body: any = { question, index_name: "ethereal_workers_v1", filter_dataset: dataset, id_column, top_k, generate: true };
const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true };
if (sql_filter) body.sql_filter = sql_filter;
const r = await api("POST", "/vectors/hybrid", body);
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
@ -106,7 +106,7 @@ server.tool(
if (city) filter += ` AND city = '${city}'`;
const r = await api("POST", "/vectors/hybrid", {
question: `Find the best ${role} workers with relevant skills and certifications`,
index_name: "ethereal_workers_v1", sql_filter: filter,
index_name: "workers_500k_v1", sql_filter: filter,
filter_dataset: "ethereal_workers", id_column: "worker_id",
top_k: headcount * 2, generate: false,
});
@ -139,7 +139,7 @@ server.tool(
server.tool(
"rag_question",
"Natural language question answered via RAG (embed → search → retrieve → generate). For open-ended questions where SQL alone isn't enough.",
{ question: z.string(), index: z.string().default("ethereal_workers_v1"), top_k: z.number().default(5) },
{ question: z.string(), index: z.string().default("workers_500k_v1"), top_k: z.number().default(5) },
async ({ question, index, top_k }) => {
const r = await api("POST", "/vectors/rag", { index_name: index, question, top_k });
return { content: [{ type: "text" as const, text: r.error ? `RAG Error: ${r.error}` : `Answer: ${r.answer}\n\nSources: ${r.sources?.length || 0}` }] };
@ -381,7 +381,7 @@ async function main() {
if (url.pathname === "/search") {
const b = await json();
return ok(await api("POST", "/vectors/hybrid", {
question: b.question, index_name: b.index || "ethereal_workers_v1",
question: b.question, index_name: b.index || "workers_500k_v1",
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
}));
@ -400,7 +400,7 @@ async function main() {
if (b.city) filter += ` AND city = '${b.city}'`;
return ok(await api("POST", "/vectors/hybrid", {
question: `Best ${b.role} workers with relevant skills`,
index_name: b.index || "ethereal_workers_v1", sql_filter: filter,
index_name: b.index || "workers_500k_v1", sql_filter: filter,
filter_dataset: b.dataset || "ethereal_workers",
id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
}));
@ -415,7 +415,7 @@ async function main() {
// Tool: RAG
if (url.pathname === "/ask") {
const b = await json();
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "ethereal_workers_v1", question: b.question, top_k: b.top_k || 5 }));
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
}
// Tool: log success
@ -954,7 +954,7 @@ async function runWeekSimulation() {
const filt = `role = '${role}' AND state = '${state}' AND reliability >= ${minRel}`;
const r = await api("POST", "/vectors/hybrid", {
question: `Find ${role} workers for ${pick(NOTES)}`,
index_name: "ethereal_workers_v1",
index_name: "workers_500k_v1",
sql_filter: filt,
filter_dataset: "ethereal_workers",
id_column: "worker_id",