A+B: patterns in main search + compounding bump

A — Patterns surface in main Worker Search:
  /intelligence/chat smart_search fallback now calls /patterns in
  parallel with hybrid, returns discovered_pattern + matched count.
  search.html doSearch renders a green "MEMORY (N playbooks): ..."
  chip above results so every recruiter query shows the meta-index
  dimension, not just live-contract cards.

B — Compounding proven and default-k bumped:
  Direct compounding test on Chicago Electrician:
  - Run 0 (no seeds): Carmen Green not in top-5, boost 0
  - After 3 seeds of identical operation: boost +0.250 (capped),
    3 citations, lifted to #1. Each seed adds 1 citation. Cap
    prevents one worker from dominating future searches.
  - Required k=200 (not 25 or 50) — embedding band is narrow
    (cosines 0.55-0.67 across all playbooks regardless of geo).
  - Bumped defaults on /search, permit_contracts, and smart_search
    to playbook_memory_k=200. Brute-force sub-ms at this scale.
This commit is contained in:
root 2026-04-20 15:41:12 -05:00
parent 5c39c74fe4
commit 99ab0fe623
2 changed files with 41 additions and 4 deletions

View File

@ -1278,7 +1278,9 @@ tr:hover{background:#111827}
// Phase 19 ranked candidates. Soft availability filter
// auto-applied by /search — this mirrors the real recruiter
// query path exactly.
// query path exactly. k=200 to ensure boost fires across
// the full memory surface (the embedding-discrimination
// narrowness means under-k silently misses endorsements).
const searchRes = await api("POST", "/vectors/hybrid", {
index_name: "workers_500k_v1",
filter_dataset: "workers_500k",
@ -1286,7 +1288,7 @@ tr:hover{background:#111827}
sql_filter: `role = '${role}' AND state = '${state}' AND city = '${city}' AND CAST(availability AS DOUBLE) > 0.5`,
question: `${role} for ${p.work_type || "construction"} in ${city}`,
top_k: 5, generate: false,
use_playbook_memory: true, playbook_memory_k: 25,
use_playbook_memory: true, playbook_memory_k: 200,
}).catch(() => ({ sources: [] as any[] }));
// Path 2 — discovered patterns for this role in this city.
@ -1562,17 +1564,37 @@ tr:hover{background:#111827}
const sqlFields = "name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, skills, certifications, archetype";
const directSql = `SELECT ${sqlFields} FROM workers_500k WHERE ${filterStr} ORDER BY CAST(availability AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT 10`;
const [searchR, directR] = await Promise.all([
// Derive role+geo for the pattern query so the meta-index
// surface lines up with what the user actually asked for.
const roleForPatterns = understood.find(u => u.startsWith('role:'))?.split(': ')[1] || q;
const cityForPatterns = understood.find(u => u.startsWith('city:'))?.split(': ')[1] || 'Chicago';
const stateForPatterns = understood.find(u => u.startsWith('state:'))?.split(': ')[1] || 'IL';
const [searchR, directR, patternR] = await Promise.all([
api("POST", "/vectors/hybrid", {
question: q, index_name: "workers_500k_v1", sql_filter: filterStr,
filter_dataset: "ethereal_workers", id_column: "worker_id", top_k: 8, generate: false,
// k=200 to catch compounding — direct measurement shows
// boost reliably fires only when ~all memory is scanned
// due to the narrow 0.55-0.67 cosine band in the 768d
// nomic-embed-text space. Brute force at 200 entries
// is sub-ms; no reason to underscan.
use_playbook_memory: true, playbook_memory_k: 200,
}),
api("POST", "/query/sql", { sql: directSql }),
api("POST", "/vectors/playbook_memory/patterns", {
query: `${roleForPatterns} in ${cityForPatterns}, ${stateForPatterns}`,
top_k_playbooks: 25, min_trait_frequency: 0.3,
}).catch(() => ({})),
]);
// Merge: use SQL results for structured data (zip, avail), vector for ranking
const sqlWorkers = directR.rows || [];
const vectorWorkers = (searchR.sources || []).map((s: any) => ({ doc_id: s.doc_id, score: s.score, text: s.chunk_text }));
const vectorWorkers = (searchR.sources || []).map((s: any) => ({
doc_id: s.doc_id, score: s.score, text: s.chunk_text,
playbook_boost: s.playbook_boost || 0,
playbook_citations: s.playbook_citations || [],
}));
return ok({
type: "smart_search",
@ -1583,6 +1605,10 @@ tr:hover{background:#111827}
sql_matches: searchR.sql_matches,
queries_run: queries,
duration_ms: Date.now() - start,
// Meta-index signal — what similar past fills had in common.
// Non-empty when memory has ≥1 relevant playbook.
discovered_pattern: (patternR as any)?.discovered_pattern,
pattern_playbooks_matched: (patternR as any)?.matched_playbooks ?? 0,
});
}
}

View File

@ -724,6 +724,17 @@ function doSearch(){
var h=document.createElement('div');h.style.cssText='color:#8b949e;font-size:12px;margin-bottom:10px';
h.textContent=(d.sql_matches?d.sql_matches.toLocaleString()+' workers matched — ':'')+'showing best results ('+(d.duration_ms||0)+'ms)';
out.appendChild(h);
// Meta-index signal — what past similar fills had in common. Only
// renders when memory had at least one relevant playbook.
if(d.discovered_pattern && d.pattern_playbooks_matched > 0){
var mem=document.createElement('div');
mem.style.cssText='background:#0d2818;border:1px solid #2ea04360;border-radius:6px;padding:8px 12px;margin-bottom:10px;font-size:11px;color:#86efac;line-height:1.5';
var label=document.createElement('span');label.style.cssText='color:#3fb950;font-weight:600;margin-right:6px';
label.textContent='MEMORY ('+d.pattern_playbooks_matched+' playbooks):';
mem.appendChild(label);
mem.appendChild(document.createTextNode(' '+d.discovered_pattern));
out.appendChild(mem);
}
// Render results based on type
var workers=d.sql_results||[];
if(workers.length){