/log validates endorsed names + /search auto-appends availability>0.5
Two gap-fills surfaced by the real test on 2026-04-20: 1. /log no longer seeds endorsed_names that don't exist in workers_500k for the contract's (city, state). Previously accepted ghost names silently (entry count grew, SQL row landed, but boost never fired because no real worker chunk matched the stored tuple). Response now reports rejected_ghost_names and explains why seeding was skipped. 2. Bun /search auto-appends `CAST(availability AS DOUBLE) > 0.5` to sql_filter when the caller didn't constrain availability themselves. Recruiters expect "available workers" by default — surfacing someone on an active placement would break trust on first contact. Opt out with `include_unavailable: true`. Verified: ghost names rejected end-to-end, real names accepted, mixed input handled correctly. Availability filter drops ~10 workers from a 305-row Cleveland OH Welder pool to 295 actually-available.
This commit is contained in:
parent
25b7e6c3a7
commit
20b0289aa9
@ -384,14 +384,21 @@ async function main() {
|
||||
// Tool: hybrid search
|
||||
if (url.pathname === "/search") {
|
||||
const b = await json();
|
||||
// Availability soft-filter: if the caller didn't constrain
|
||||
// availability and isn't explicitly opting out, auto-append
|
||||
// `availability > 0.5`. Recruiters calling this route expect
|
||||
// "available workers" by default; surfacing someone who's on
|
||||
// an active placement breaks trust on the first call.
|
||||
let filter = b.sql_filter as (string | undefined);
|
||||
const optOut = b.include_unavailable === true;
|
||||
if (!optOut && filter && !/availability/i.test(filter)) {
|
||||
filter = `(${filter}) AND CAST(availability AS DOUBLE) > 0.5`;
|
||||
}
|
||||
return ok(await api("POST", "/vectors/hybrid", {
|
||||
question: b.question, index_name: b.index || "workers_500k_v1",
|
||||
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
|
||||
sql_filter: filter, filter_dataset: b.dataset || "ethereal_workers",
|
||||
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
|
||||
use_playbook_memory: b.use_playbook_memory !== false,
|
||||
// Forward explicitly so Bun /search isn't capped by the
|
||||
// server's default — boost silently misses good matches when
|
||||
// memory has >25 entries and only top-5 playbooks are scanned.
|
||||
playbook_memory_k: b.playbook_memory_k ?? 25,
|
||||
}));
|
||||
}
|
||||
@ -450,7 +457,34 @@ async function main() {
|
||||
const result = String(b.result || "");
|
||||
const arrowIdx = result.indexOf("→");
|
||||
const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
|
||||
const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
|
||||
const rawEndorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
|
||||
|
||||
// Parse the contract's (city, state) from operation. Seed is
|
||||
// keyed by (city, state, name) so validation must match those
|
||||
// coordinates, not just the name.
|
||||
const opMatch = String(b.operation || "").match(/ in ([^,]+),\s*([A-Za-z]+)/);
|
||||
const city = opMatch ? opMatch[1].trim() : "";
|
||||
const state = opMatch ? opMatch[2].trim() : "";
|
||||
|
||||
// Ghost-name guard — /log previously accepted any endorsed
|
||||
// names without verification. Those ghosts landed in
|
||||
// playbook_memory, grew the entry count, but boost silently
|
||||
// never fired because no real worker chunk ever matched the
|
||||
// stored (city, state, name) tuple. Real-test on 2026-04-20
|
||||
// surfaced this. Validate against workers_500k before seeding.
|
||||
let endorsed: string[] = rawEndorsed;
|
||||
let rejected: string[] = [];
|
||||
if (rawEndorsed.length && city && state) {
|
||||
const quoted = rawEndorsed.map(n => `'${n.replace(/'/g, "''")}'`).join(",");
|
||||
const sql = `SELECT DISTINCT name FROM workers_500k `
|
||||
+ `WHERE name IN (${quoted}) AND city = '${city.replace(/'/g,"''")}' `
|
||||
+ `AND state = '${state.replace(/'/g,"''")}'`;
|
||||
const vr = await api("POST", "/query/sql", { sql }).catch(() => ({ rows: [] as any[] })) as any;
|
||||
const found = new Set((vr.rows ?? []).map((r: any) => r.name));
|
||||
endorsed = rawEndorsed.filter(n => found.has(n));
|
||||
rejected = rawEndorsed.filter(n => !found.has(n));
|
||||
}
|
||||
|
||||
let seeded = 0;
|
||||
let persisted_rows = 0;
|
||||
if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
|
||||
@ -465,11 +499,6 @@ async function main() {
|
||||
}).catch(() => null) as any;
|
||||
if (seedRes && seedRes.playbook_id) {
|
||||
seeded = endorsed.length;
|
||||
// After every successful seed, persist memory → SQL so the
|
||||
// successful_playbooks_live table reflects current operator
|
||||
// activity. /persist_sql writes the FULL state, which is safe
|
||||
// because in-memory playbook_memory IS the source of truth
|
||||
// (no concurrent writer outside this process modifies it).
|
||||
const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
|
||||
if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
|
||||
}
|
||||
@ -478,7 +507,10 @@ async function main() {
|
||||
logged: true,
|
||||
seeded,
|
||||
persisted_to_sql: persisted_rows,
|
||||
note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.",
|
||||
rejected_ghost_names: rejected,
|
||||
note: rejected.length
|
||||
? `${rejected.length} endorsed name(s) not found in workers_500k for ${city}, ${state} — skipped seeding to prevent silent boost failure.`
|
||||
: "successful_playbooks_live is the SQL surface for live operator activity. /log is non-destructive and name-validated.",
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user