Backend:
- crates/vectord/src/playbook_memory.rs (new): Phase 19 in-memory boost
store with seed/rebuild/snapshot, plus temporal decay (e^-age/30 per
playbook), persist_to_sql endpoint backing successful_playbooks_live,
and discover_patterns endpoint for meta-index pattern aggregation
(recurring certs/skills/archetype/reliability across similar past fills).
- DEFAULT_TOP_K_PLAYBOOKS bumped 5 → 25; old default silently missed
most boosts when memory had > 25 entries.
- service.rs: new routes /vectors/playbook_memory/{seed,rebuild,stats,
persist_sql,patterns}.
Bun staffing co-pilot (mcp-server/):
- /search, /match, /verify, /proof, /simulation/run, MCP tools all
forward use_playbook_memory:true and playbook_memory_k:25 to the
hybrid endpoint. Boost was previously dark across the entire app.
- /log no longer POSTs to /ingest/file — that endpoint REPLACES the
dataset's object list, so single-row CSV writes were wiping all prior
rows in successful_playbooks (sp_rows went 33→1 in one /log call).
/log now seeds playbook_memory with canonical short text and calls
/persist_sql to keep successful_playbooks_live in sync.
- /simulation/run cumulative end-of-week CSV write removed for the same
reason. Per-day per-contract /seed (added in this session) is the
accumulating feedback path now.
- search.html addWorkerInsight renders a green "Endorsed · N playbooks"
chip with playbook citations when boost > 0.
Internal Dioxus UI (crates/ui/):
- Dashboard phase list rewritten through Phase 19 (was stuck at "Phase
16: File Watcher" / "Phase 17: DB Connector" — both wrong).
- Removed fabricated "27ms" stat label.
- Ask tab examples + SQL default replaced with real staffing prompts
against candidates/clients/job_orders (was referencing nonexistent
employees/products/events).
- New Playbook tab exposes /vectors/playbook_memory/{stats,rebuild} and
side-by-side hybrid search (boost OFF vs ON) with citations.
Tests (tests/multi-agent/):
- run_e2e_rated.ts: parallel two-agent (mistral + qwen2.5) build phase
+ verifier rating (geo, auth, persist, boost, speed → /10).
- network_proving.ts: continuous build → verify → repeat with
staffing-recruiter profile hot-swap; geo-discrimination check.
- chain_of_custody.ts: single recruiter operation traced through every
layer (Bun /search, direct /vectors/hybrid parity, /log, SQL,
playbook_memory growth, profile activation, post-op boost lift).
336 lines
16 KiB
TypeScript
336 lines
16 KiB
TypeScript
// Chain-of-custody trace test.
|
|
//
|
|
// J's framing: "we have enough synthetic data, we've run enough AI responses
|
|
// saved to the database. Test true quality. Don't ignore chain of custody.
|
|
// Use real applications. Understand each aspect of the flow — not just
|
|
// 'write a file or directory and open it'."
|
|
//
|
|
// One real recruiter operation, traced end-to-end through EVERY layer of the
|
|
// live substrate. Every layer must record the operation correctly. Any layer
|
|
// that drops it = chain-of-custody break = surfaced as a real bug.
|
|
//
|
|
// Layers verified:
|
|
// L0 Bun /search — recruiter app surface (NOT bare /vectors/hybrid)
|
|
// L1 /vectors/hybrid — direct gateway (parity check vs L0)
|
|
// L2 /vectors/playbook_memory/stats — feedback loop count
|
|
// L3 Bun /log — recruiter records the pick
|
|
// L4 successful_playbooks — SQL-queryable table of past fills
|
|
// L5 /vectors/playbook_memory/stats — count grew
|
|
// L6 tools/audit — Phase 12 governance trail
|
|
// L7 /access/audit — Phase 13 access trail
|
|
// L8 /journal/recent — Phase 9 mutation events
|
|
// L9 /storage/errors — Federation error journal (no new errors)
|
|
// L10 /vectors/profile/{id}/activate — Phase 17 hot-swap
|
|
// L11 Bun /search again — boost lifts the just-logged worker
|
|
// L12 verifier qwen2.5 — reads cross-layer state, judges integrity
|
|
//
|
|
// Run: bun run tests/multi-agent/chain_of_custody.ts
|
|
//
|
|
// Prints per-layer BEFORE/AFTER/DELTA. Exit non-zero on any chain break.
|
|
|
|
import { generate, GATEWAY } from "./agent.ts";
|
|
|
|
const BUN = "http://localhost:3700";
|
|
const PROFILE_ID = "staffing-recruiter";
|
|
|
|
// The trace operation — small, deterministic, real city/role with supply.
|
|
// Helen Sanchez (worker_id 4661) is a known Toledo Welder; we record her
|
|
// as the manual pick the recruiter would make from the /search results.
|
|
const OPERATION = "fill: Welder x1 in Toledo, OH";
|
|
const OP_ROLE = "Welder";
|
|
const OP_CITY = "Toledo";
|
|
const OP_STATE = "OH";
|
|
const PICKED_WORKER = "Helen Sanchez"; // verified earlier to be a Toledo OH Welder
|
|
|
|
// ─────────────────────── helpers ───────────────────────
|
|
|
|
async function getJSON<T = any>(url: string): Promise<T | null> {
|
|
try {
|
|
const r = await fetch(url);
|
|
if (!r.ok) return null;
|
|
return r.json() as Promise<T>;
|
|
} catch { return null; }
|
|
}
|
|
|
|
async function postJSON<T = any>(url: string, body: any): Promise<T | null> {
|
|
try {
|
|
const r = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
if (!r.ok) return { _error: `${r.status}: ${await r.text()}` } as any;
|
|
return r.json() as Promise<T>;
|
|
} catch (e) { return { _error: (e as Error).message } as any; }
|
|
}
|
|
|
|
async function sql(query: string): Promise<{ rows?: any[]; error?: string } | null> {
|
|
return postJSON(`${GATEWAY}/query/sql`, { sql: query });
|
|
}
|
|
|
|
interface Snapshot {
|
|
pm_entries: number;
|
|
pm_names: number;
|
|
sp_rows: number; // successful_playbooks SQL row count
|
|
audit_count: number; // tools/audit count
|
|
access_count: number; // access/audit count
|
|
journal_count: number; // journal/stats events
|
|
storage_errors: number; // bucket error journal
|
|
}
|
|
|
|
async function snapshot(): Promise<Snapshot> {
|
|
const pm = await getJSON<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
|
|
// successful_playbooks_live is the live SQL surface populated by /log
|
|
// via /vectors/playbook_memory/persist_sql. The original
|
|
// successful_playbooks table is now legacy/historical (no writes).
|
|
const sp = await sql(`SELECT COUNT(*) AS c FROM successful_playbooks_live`);
|
|
const audit = await getJSON<any[]>(`${GATEWAY}/tools/audit`);
|
|
const access = await getJSON<any>(`${GATEWAY}/access/audit`);
|
|
const journalStats = await getJSON<any>(`${GATEWAY}/journal/stats`);
|
|
const storageErrors = await getJSON<any[]>(`${GATEWAY}/storage/errors`);
|
|
|
|
return {
|
|
pm_entries: pm?.entries ?? -1,
|
|
pm_names: pm?.total_names_endorsed ?? -1,
|
|
sp_rows: Number(sp?.rows?.[0]?.c ?? -1),
|
|
audit_count: Array.isArray(audit) ? audit.length : (audit as any)?.events?.length ?? -1,
|
|
access_count: Array.isArray(access) ? access.length : (access as any)?.events?.length ?? (access as any)?.audit?.length ?? -1,
|
|
journal_count: journalStats?.event_count ?? journalStats?.total_events ?? journalStats?.events ?? -1,
|
|
storage_errors: Array.isArray(storageErrors) ? storageErrors.length : (storageErrors as any)?.events?.length ?? 0,
|
|
};
|
|
}
|
|
|
|
function delta(b: Snapshot, a: Snapshot): Record<string, number> {
|
|
return {
|
|
pm_entries: a.pm_entries - b.pm_entries,
|
|
pm_names: a.pm_names - b.pm_names,
|
|
sp_rows: a.sp_rows - b.sp_rows,
|
|
audit_count: a.audit_count - b.audit_count,
|
|
access_count: a.access_count - b.access_count,
|
|
journal_count: a.journal_count - b.journal_count,
|
|
storage_errors: a.storage_errors - b.storage_errors,
|
|
};
|
|
}
|
|
|
|
function fmtRow(label: string, b: number, a: number): string {
|
|
const d = a - b;
|
|
const dStr = d === 0 ? " · " : d > 0 ? ` +${d}` : ` ${d}`;
|
|
return ` ${label.padEnd(28)} ${String(b).padStart(6)} → ${String(a).padStart(6)} ${dStr}`;
|
|
}
|
|
|
|
// ─────────────────────── trace ───────────────────────
|
|
|
|
interface TraceResult {
|
|
layer: string;
|
|
ok: boolean;
|
|
detail: string;
|
|
}
|
|
|
|
async function runTrace(): Promise<TraceResult[]> {
|
|
const out: TraceResult[] = [];
|
|
const note = (layer: string, ok: boolean, detail: string) => {
|
|
out.push({ layer, ok, detail });
|
|
console.log(` ${ok ? "✓" : "✗"} ${layer.padEnd(32)} ${detail}`);
|
|
};
|
|
|
|
console.log(`\n▶ Trace operation: ${OPERATION} → pick=${PICKED_WORKER}\n`);
|
|
|
|
// ── BEFORE snapshot ──
|
|
console.log(`▶ Before-snapshot:`);
|
|
const before = await snapshot();
|
|
console.log(` pm_entries=${before.pm_entries} pm_names=${before.pm_names} sp_rows=${before.sp_rows} `
|
|
+ `audit=${before.audit_count} access=${before.access_count} journal=${before.journal_count} `
|
|
+ `storage_errors=${before.storage_errors}\n`);
|
|
|
|
// ── L0: Bun /search ──
|
|
console.log(`▶ L0 — Bun /search (recruiter app surface)`);
|
|
const sql_filter = `role = '${OP_ROLE}' AND state = '${OP_STATE}' AND city = '${OP_CITY}'`;
|
|
const bunSearch = await postJSON<any>(`${BUN}/search`, {
|
|
question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
|
sql_filter, top_k: 5, generate: false,
|
|
id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
|
|
});
|
|
if (bunSearch?._error) {
|
|
note("L0 Bun /search", false, `error: ${bunSearch._error}`);
|
|
} else {
|
|
const sources = bunSearch?.sources ?? [];
|
|
const boostedHits = sources.filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
|
note("L0 Bun /search", true, `sources=${sources.length} boosted=${boostedHits} sql_matches=${bunSearch?.sql_matches}`);
|
|
}
|
|
|
|
// ── L1: direct /vectors/hybrid (parity check) ──
|
|
console.log(`\n▶ L1 — Direct /vectors/hybrid (parity check vs Bun)`);
|
|
const directSearch = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
|
|
index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
|
|
sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
|
top_k: 5, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
|
});
|
|
const directBoosted = (directSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
|
note("L1 Direct /vectors/hybrid", true, `boosted=${directBoosted} sql=${directSearch?.sql_matches}`);
|
|
|
|
const bunBoosted = (bunSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
|
if (bunBoosted < directBoosted) {
|
|
note("CHAIN BREAK: Bun↔Direct parity", false,
|
|
`Bun=${bunBoosted} boosted vs Direct=${directBoosted}. Bun /search likely missing playbook_memory_k forward.`);
|
|
}
|
|
|
|
// ── L3: Bun /log (recruiter records the pick) ──
|
|
console.log(`\n▶ L3 — Bun /log (recruiter records the pick)`);
|
|
const logged = await postJSON<any>(`${BUN}/log`, {
|
|
operation: OPERATION,
|
|
approach: "chain-of-custody trace",
|
|
result: `1/1 filled → ${PICKED_WORKER}`,
|
|
context: `client=COC-${Date.now()} start=08:00 scenario=trace`,
|
|
});
|
|
if (logged?._error) note("L3 Bun /log", false, `error: ${logged._error}`);
|
|
else note("L3 Bun /log", true, `logged=${logged?.logged} seeded=${logged?.seeded}`);
|
|
|
|
// The /log response carries the result of the underlying /ingest/file too.
|
|
// If "response" mentions "different schema" or "error", the SQL-queryable
|
|
// path is broken even though seed succeeded. That's a chain break.
|
|
const logResp = String((logged as any)?.response ?? "");
|
|
if (logResp.includes("error") || logResp.includes("different schema") || logResp.includes("Error")) {
|
|
note("CHAIN BREAK: Bun /log → SQL ingest", false,
|
|
`successful_playbooks ingest failed. Bun returned logged=true but /log's underlying ingest reported: ${logResp.slice(0, 150)}`);
|
|
} else {
|
|
note("L3a /log → /ingest/file", true, "ingest accepted");
|
|
}
|
|
|
|
// Give the system a beat for any async fan-out (audit/journal/etc).
|
|
await new Promise(r => setTimeout(r, 500));
|
|
|
|
// ── AFTER snapshot ──
|
|
console.log(`\n▶ After-snapshot:`);
|
|
const after = await snapshot();
|
|
const d = delta(before, after);
|
|
console.log(fmtRow("playbook_memory.entries", before.pm_entries, after.pm_entries));
|
|
console.log(fmtRow("playbook_memory.names", before.pm_names, after.pm_names));
|
|
console.log(fmtRow("successful_playbooks.rows", before.sp_rows, after.sp_rows));
|
|
console.log(fmtRow("tools/audit.count", before.audit_count, after.audit_count));
|
|
console.log(fmtRow("access/audit.count", before.access_count, after.access_count));
|
|
console.log(fmtRow("journal.events", before.journal_count, after.journal_count));
|
|
console.log(fmtRow("storage/errors.count", before.storage_errors,after.storage_errors));
|
|
|
|
// ── L5: playbook_memory grew? ──
|
|
if (d.pm_entries === 1) note("L5 playbook_memory growth", true, "+1 entry as expected");
|
|
else note("L5 playbook_memory growth", d.pm_entries > 0,
|
|
`delta=${d.pm_entries} (expected 1 — seed-after-log path)`);
|
|
|
|
// ── L4: successful_playbooks SQL row appeared? ──
|
|
if (d.sp_rows >= 1) note("L4 successful_playbooks SQL", true, `+${d.sp_rows} row(s)`);
|
|
else note("L4 successful_playbooks SQL", false,
|
|
`delta=${d.sp_rows} — Bun /log claims success but SQL table didn't grow. Recruiter querying via SQL would miss this fill.`);
|
|
|
|
// ── L9: storage errors stayed quiet ──
|
|
if (d.storage_errors === 0) note("L9 storage error journal", true, "no new bucket op errors");
|
|
else note("L9 storage error journal", false, `+${d.storage_errors} new errors`);
|
|
|
|
// ── L10: Phase 17 profile activation ──
|
|
console.log(`\n▶ L10 — Activate profile ${PROFILE_ID}`);
|
|
const act = await postJSON<any>(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, {});
|
|
if (act?._error) note("L10 profile activation", false, `error: ${act._error}`);
|
|
else note("L10 profile activation", true,
|
|
`warmed=${(act?.warmed_indexes ?? []).length} duration_ms=${act?.duration_ms ?? "?"}`);
|
|
|
|
// ── L11: Bun /search again — boost should now lift PICKED_WORKER ──
|
|
console.log(`\n▶ L11 — Bun /search second time (boost lift verification)`);
|
|
const search2 = await postJSON<any>(`${BUN}/search`, {
|
|
question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
|
sql_filter, top_k: 10, generate: false,
|
|
id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
|
|
});
|
|
const sources2 = search2?.sources ?? [];
|
|
const pickedHit = sources2.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
|
|
if (!pickedHit) {
|
|
note("L11 boost lifts logged pick (Bun)", false,
|
|
`${PICKED_WORKER} not in top-10 via Bun /search. Could be Bun-not-forwarding-playbook_memory_k bug from L1.`);
|
|
} else if ((pickedHit.playbook_boost ?? 0) > 0) {
|
|
note("L11 boost lifts logged pick (Bun)", true,
|
|
`${PICKED_WORKER} boost=+${(pickedHit.playbook_boost as number).toFixed(3)} cites=${(pickedHit.playbook_citations ?? []).length}`);
|
|
} else {
|
|
note("L11 boost lifts logged pick (Bun)", false,
|
|
`${PICKED_WORKER} present but boost=0 — playbook_memory_k forward bug likely`);
|
|
}
|
|
|
|
// Same probe via direct gateway to isolate Bun vs gateway
|
|
const direct2 = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
|
|
index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
|
|
sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
|
});
|
|
const sources2d = direct2?.sources ?? [];
|
|
const pickedHitD = sources2d.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
|
|
if (pickedHitD && (pickedHitD.playbook_boost ?? 0) > 0) {
|
|
note("L11b boost via direct gateway", true,
|
|
`${PICKED_WORKER} boost=+${(pickedHitD.playbook_boost as number).toFixed(3)} cites=${(pickedHitD.playbook_citations ?? []).length}`);
|
|
} else {
|
|
note("L11b boost via direct gateway", false, `direct call also did not boost ${PICKED_WORKER}`);
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
// ─────────────────────── verifier (fresh agent) ───────────────────────
|
|
|
|
async function verifierJudgment(trace: TraceResult[]): Promise<{ verdict: string; confidence: number }> {
|
|
const summary = trace.map(t => ` ${t.ok ? "ok" : "FAIL"} ${t.layer}: ${t.detail}`).join("\n");
|
|
const prompt = `You are the CHAIN-OF-CUSTODY VERIFIER agent. A real recruiter operation was just
|
|
traced through every layer of the staffing substrate. Read the per-layer results and judge
|
|
whether the system kept chain of custody intact (every layer recorded the operation as
|
|
expected) or where it broke.
|
|
|
|
Per-layer trace:
|
|
${summary}
|
|
|
|
Reply with ONE JSON object only:
|
|
{"verdict": "<one tight sentence — what's the integrity status>", "confidence": 0-100}
|
|
|
|
Be specific about which layer broke if any. confidence is how sure you are about the verdict.`;
|
|
|
|
try {
|
|
const raw = await generate("qwen2.5:latest", prompt, { temperature: 0.1, max_tokens: 200 });
|
|
const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
|
|
if (start < 0 || end <= start) return { verdict: "verifier could not produce JSON", confidence: 0 };
|
|
const j = JSON.parse(raw.slice(start, end + 1));
|
|
return { verdict: j.verdict ?? "no verdict", confidence: Number(j.confidence) || 0 };
|
|
} catch (e) {
|
|
return { verdict: `verifier error: ${(e as Error).message}`, confidence: 0 };
|
|
}
|
|
}
|
|
|
|
// ─────────────────────── main ───────────────────────
|
|
|
|
async function main() {
|
|
console.log(`▶ Chain-of-custody trace — single real recruiter operation through every layer`);
|
|
|
|
const trace = await runTrace();
|
|
|
|
console.log(`\n▶ L12 — Verifier (fresh qwen2.5 agent reads the cross-layer trace)`);
|
|
const v = await verifierJudgment(trace);
|
|
console.log(` verdict (${v.confidence}%): ${v.verdict}`);
|
|
|
|
// Hard gate: any explicit CHAIN BREAK note = fail
|
|
const breaks = trace.filter(t => !t.ok && t.layer.startsWith("CHAIN BREAK"));
|
|
const fails = trace.filter(t => !t.ok);
|
|
|
|
console.log(`\n▶ Summary:`);
|
|
console.log(` passing layers: ${trace.filter(t => t.ok).length}/${trace.length}`);
|
|
console.log(` chain breaks: ${breaks.length}`);
|
|
console.log(` total failures: ${fails.length}`);
|
|
console.log(` verifier confidence: ${v.confidence}%`);
|
|
|
|
if (breaks.length > 0) {
|
|
console.log(`\n✗ Chain of custody BROKEN at ${breaks.length} layer(s):`);
|
|
for (const b of breaks) console.log(` - ${b.layer}: ${b.detail}`);
|
|
process.exit(1);
|
|
}
|
|
if (fails.length > 0) {
|
|
console.log(`\n◑ Trace completed with ${fails.length} non-blocking failures (no formal chain break)`);
|
|
process.exit(0);
|
|
}
|
|
console.log(`\n✓ Chain of custody intact across all layers`);
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch(e => {
|
|
console.error(`\n✗ ${(e as Error).message}`);
|
|
if ((e as any).stack) console.error((e as any).stack);
|
|
process.exit(1);
|
|
});
|