diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 4f3a839..a33fc7d 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -384,14 +384,21 @@ async function main() { // Tool: hybrid search if (url.pathname === "/search") { const b = await json(); + // Availability soft-filter: if the caller didn't constrain + // availability and isn't explicitly opting out, auto-append + // `availability > 0.5`. Recruiters calling this route expect + // "available workers" by default; surfacing someone who's on + // an active placement breaks trust on the first call. + let filter = b.sql_filter as (string | undefined); + const optOut = b.include_unavailable === true; + if (!optOut && filter && !/availability/i.test(filter)) { + filter = `(${filter}) AND CAST(availability AS DOUBLE) > 0.5`; + } return ok(await api("POST", "/vectors/hybrid", { question: b.question, index_name: b.index || "workers_500k_v1", - sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers", + sql_filter: filter, filter_dataset: b.dataset || "ethereal_workers", id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false, use_playbook_memory: b.use_playbook_memory !== false, - // Forward explicitly so Bun /search isn't capped by the - // server's default — boost silently misses good matches when - // memory has >25 entries and only top-5 playbooks are scanned. playbook_memory_k: b.playbook_memory_k ?? 25, })); } @@ -450,7 +457,34 @@ async function main() { const result = String(b.result || ""); const arrowIdx = result.indexOf("→"); const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : ""; - const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean); + const rawEndorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean); + + // Parse the contract's (city, state) from operation. Seed is + // keyed by (city, state, name) so validation must match those + // coordinates, not just the name. + const opMatch = String(b.operation || "").match(/ in ([^,]+),\s*([A-Za-z]+)/); + const city = opMatch ? opMatch[1].trim() : ""; + const state = opMatch ? opMatch[2].trim() : ""; + + // Ghost-name guard — /log previously accepted any endorsed + // names without verification. Those ghosts landed in + // playbook_memory, grew the entry count, but boost silently + // never fired because no real worker chunk ever matched the + // stored (city, state, name) tuple. Real-test on 2026-04-20 + // surfaced this. Validate against workers_500k before seeding. + let endorsed: string[] = rawEndorsed; + let rejected: string[] = []; + if (rawEndorsed.length && city && state) { + const quoted = rawEndorsed.map(n => `'${n.replace(/'/g, "''")}'`).join(","); + const sql = `SELECT DISTINCT name FROM workers_500k ` + + `WHERE name IN (${quoted}) AND city = '${city.replace(/'/g,"''")}' ` + + `AND state = '${state.replace(/'/g,"''")}'`; + const vr = await api("POST", "/query/sql", { sql }).catch(() => ({ rows: [] as any[] })) as any; + const found = new Set((vr.rows ?? []).map((r: any) => r.name)); + endorsed = rawEndorsed.filter(n => found.has(n)); + rejected = rawEndorsed.filter(n => !found.has(n)); + } + let seeded = 0; let persisted_rows = 0; if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) { @@ -465,11 +499,6 @@ async function main() { }).catch(() => null) as any; if (seedRes && seedRes.playbook_id) { seeded = endorsed.length; - // After every successful seed, persist memory → SQL so the - // successful_playbooks_live table reflects current operator - // activity. /persist_sql writes the FULL state, which is safe - // because in-memory playbook_memory IS the source of truth - // (no concurrent writer outside this process modifies it). const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any; if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted; } @@ -478,7 +507,10 @@ async function main() { logged: true, seeded, persisted_to_sql: persisted_rows, - note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.", + rejected_ghost_names: rejected, + note: rejected.length + ? `${rejected.length} endorsed name(s) not found in workers_500k for ${city}, ${state} — skipped seeding to prevent silent boost failure.` + : "successful_playbooks_live is the SQL surface for live operator activity. /log is non-destructive and name-validated.", }); }