/log validates endorsed names + /search auto-appends availability>0.5

Two gap-fills surfaced by the real test on 2026-04-20:

1. /log no longer seeds endorsed_names that don't exist in workers_500k
   for the contract's (city, state). Previously accepted ghost names
   silently (entry count grew, SQL row landed, but boost never fired
   because no real worker chunk matched the stored tuple). Response now
   reports rejected_ghost_names and explains why seeding was skipped.

2. Bun /search auto-appends `CAST(availability AS DOUBLE) > 0.5` to
   sql_filter when the caller didn't constrain availability themselves.
   Recruiters expect "available workers" by default — surfacing someone
   on an active placement would break trust on first contact.
   Opt out with `include_unavailable: true`.

Verified: ghost names rejected end-to-end, real names accepted, mixed
input handled correctly. Availability filter drops ~10 workers from a
305-row Cleveland OH Welder pool to 295 actually-available.
This commit is contained in:
root 2026-04-20 14:44:12 -05:00
parent 25b7e6c3a7
commit 20b0289aa9

View File

@ -384,14 +384,21 @@ async function main() {
// Tool: hybrid search
if (url.pathname === "/search") {
const b = await json();
// Availability soft-filter: if the caller didn't constrain
// availability and isn't explicitly opting out, auto-append
// `availability > 0.5`. Recruiters calling this route expect
// "available workers" by default; surfacing someone who's on
// an active placement breaks trust on the first call.
let filter = b.sql_filter as (string | undefined);
const optOut = b.include_unavailable === true;
if (!optOut && filter && !/availability/i.test(filter)) {
filter = `(${filter}) AND CAST(availability AS DOUBLE) > 0.5`;
}
return ok(await api("POST", "/vectors/hybrid", {
question: b.question, index_name: b.index || "workers_500k_v1",
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
sql_filter: filter, filter_dataset: b.dataset || "ethereal_workers",
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
use_playbook_memory: b.use_playbook_memory !== false,
// Forward explicitly so Bun /search isn't capped by the
// server's default — boost silently misses good matches when
// memory has >25 entries and only top-5 playbooks are scanned.
playbook_memory_k: b.playbook_memory_k ?? 25,
}));
}
@ -450,7 +457,34 @@ async function main() {
const result = String(b.result || "");
const arrowIdx = result.indexOf("→");
const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
const rawEndorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
// Parse the contract's (city, state) from operation. Seed is
// keyed by (city, state, name) so validation must match those
// coordinates, not just the name.
const opMatch = String(b.operation || "").match(/ in ([^,]+),\s*([A-Za-z]+)/);
const city = opMatch ? opMatch[1].trim() : "";
const state = opMatch ? opMatch[2].trim() : "";
// Ghost-name guard — /log previously accepted any endorsed
// names without verification. Those ghosts landed in
// playbook_memory, grew the entry count, but boost silently
// never fired because no real worker chunk ever matched the
// stored (city, state, name) tuple. Real-test on 2026-04-20
// surfaced this. Validate against workers_500k before seeding.
let endorsed: string[] = rawEndorsed;
let rejected: string[] = [];
if (rawEndorsed.length && city && state) {
const quoted = rawEndorsed.map(n => `'${n.replace(/'/g, "''")}'`).join(",");
const sql = `SELECT DISTINCT name FROM workers_500k `
+ `WHERE name IN (${quoted}) AND city = '${city.replace(/'/g,"''")}' `
+ `AND state = '${state.replace(/'/g,"''")}'`;
const vr = await api("POST", "/query/sql", { sql }).catch(() => ({ rows: [] as any[] })) as any;
const found = new Set((vr.rows ?? []).map((r: any) => r.name));
endorsed = rawEndorsed.filter(n => found.has(n));
rejected = rawEndorsed.filter(n => !found.has(n));
}
let seeded = 0;
let persisted_rows = 0;
if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
@ -465,11 +499,6 @@ async function main() {
}).catch(() => null) as any;
if (seedRes && seedRes.playbook_id) {
seeded = endorsed.length;
// After every successful seed, persist memory → SQL so the
// successful_playbooks_live table reflects current operator
// activity. /persist_sql writes the FULL state, which is safe
// because in-memory playbook_memory IS the source of truth
// (no concurrent writer outside this process modifies it).
const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
}
@ -478,7 +507,10 @@ async function main() {
logged: true,
seeded,
persisted_to_sql: persisted_rows,
note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.",
rejected_ghost_names: rejected,
note: rejected.length
? `${rejected.length} endorsed name(s) not found in workers_500k for ${city}, ${state} — skipped seeding to prevent silent boost failure.`
: "successful_playbooks_live is the SQL surface for live operator activity. /log is non-destructive and name-validated.",
});
}