Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
309 lines
13 KiB
TypeScript
309 lines
13 KiB
TypeScript
// Stress test with diverse tasks + concurrent operations.
|
|
//
|
|
// Runs 6 diverse staffing tasks + concurrent stress tests:
|
|
// T0: Welder x2 in Toledo, OH — baseline
|
|
// T1: Forklift x2 in Nashville, TN — new city
|
|
// T2: Electrician x2 in Cleveland, OH — new role, existing city
|
|
// T3: Welder x3 in Milwaukee, WI — expansion
|
|
// T4: Assembler x2 in Louisville, KY — new role
|
|
// T5: Maintenance x2 in Springfield, MO — another new city
|
|
//
|
|
// Stress tests:
|
|
// - Rapid concurrent seeds (no socket collision)
|
|
// - Hot-swap profile activation
|
|
// - Memory query across different geo
|
|
//
|
|
// Run: bun run tests/multi-agent/run_stress.ts
|
|
|
|
import {
|
|
type LogEntry,
|
|
type TaskSpec,
|
|
type Fill,
|
|
GATEWAY,
|
|
generate,
|
|
parseAction,
|
|
executorPrompt,
|
|
reviewerPrompt,
|
|
sqlQuery,
|
|
callTool,
|
|
} from "./agent.ts";
|
|
|
|
const EXECUTOR_MODEL = "qwen3.5:latest";
|
|
const REVIEWER_MODEL = "qwen3:latest";
|
|
const MAX_TURNS = 12;
|
|
const MAX_CONSECUTIVE_DRIFTS = 3;
|
|
const INDEX_NAME = "workers_500k_v1";
|
|
const PROFILE_ID = "staffing-recruiter";
|
|
|
|
const TASKS: TaskSpec[] = [
|
|
{ id: "T0", operation: "fill: Welder x2 in Toledo, OH", target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH", approach_hint: "hybrid → sql verify" },
|
|
{ id: "T1", operation: "fill: Forklift Operator x2 in Nashville, TN", target_role: "Forklift Operator", target_count: 2, target_city: "Nashville", target_state: "TN", approach_hint: "hybrid → sql verify" },
|
|
{ id: "T2", operation: "fill: Electrician x2 in Cleveland, OH", target_role: "Electrician", target_count: 2, target_city: "Cleveland", target_state: "OH", approach_hint: "hybrid → sql verify" },
|
|
{ id: "T3", operation: "fill: Welder x3 in Milwaukee, WI", target_role: "Welder", target_count: 3, target_city: "Milwaukee", target_state: "WI", approach_hint: "hybrid → sql verify" },
|
|
{ id: "T4", operation: "fill: Assembler x2 in Louisville, KY", target_role: "Assembler", target_count: 2, target_city: "Louisville", target_state: "KY", approach_hint: "hybrid → sql verify" },
|
|
{ id: "T5", operation: "fill: Maintenance Tech x2 in Springfield, MO", target_role: "Maintenance Tech", target_count: 2, target_city: "Springfield", target_state: "MO", approach_hint: "hybrid → sql verify" },
|
|
];
|
|
|
|
interface RunResult {
|
|
task: TaskSpec;
|
|
ok: boolean;
|
|
turns: number;
|
|
duration_secs: number;
|
|
fills: Fill[];
|
|
log: LogEntry[];
|
|
approach: string;
|
|
error?: string;
|
|
}
|
|
|
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
|
if (name === "hybrid_search") {
|
|
const { sql_filter, question, index_name, k } = args;
|
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ sql_filter, question, index_name, top_k: k ?? 10, generate: false, use_playbook_memory: true }),
|
|
});
|
|
if (!r.ok) throw new Error(`hybrid_search → ${r.status}: ${await r.text()}`);
|
|
return r.json();
|
|
}
|
|
if (name === "sql") {
|
|
if (!args.query || typeof args.query !== "string") throw new Error("sql needs query");
|
|
if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
|
|
return sqlQuery(args.query);
|
|
}
|
|
return callTool(name, args);
|
|
}
|
|
|
|
function trimResult(r: any): any {
|
|
if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
|
|
if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
|
|
return r;
|
|
}
|
|
|
|
function shortContent(e: LogEntry): string {
|
|
const c = e.content;
|
|
if (typeof c !== "string") return JSON.stringify(c).slice(0, 80);
|
|
return c.slice(0, 80).replace(/\n/g, " ");
|
|
}
|
|
|
|
async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResult> {
|
|
const start = Date.now();
|
|
const log: LogEntry[] = [];
|
|
let turn = 0;
|
|
let consecutiveDrifts = 0;
|
|
let consecutiveToolErrors = 0;
|
|
let sealed: { fills: Fill[]; approach: string } | null = null;
|
|
|
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
|
log.push(full);
|
|
console.log(`[${prefix}] [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}] ${shortContent(e)}`);
|
|
return full;
|
|
};
|
|
|
|
try {
|
|
while (turn < MAX_TURNS && !sealed) {
|
|
turn += 1;
|
|
|
|
const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 1200, think: false });
|
|
const execAction = parseAction(execRaw, "executor");
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
|
|
|
|
if (execAction.kind === "tool_call") {
|
|
try {
|
|
const result = await executeToolCall(execAction.tool, execAction.args);
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trimResult(result) });
|
|
consecutiveToolErrors = 0;
|
|
} catch (e) {
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
|
|
content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
|
|
consecutiveToolErrors += 1;
|
|
if (consecutiveToolErrors >= MAX_CONSECUTIVE_DRIFTS) {
|
|
throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors — executor can't form a valid call`);
|
|
}
|
|
}
|
|
}
|
|
|
|
const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 1000, think: false });
|
|
const revAction = parseAction(revRaw, "reviewer");
|
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
|
|
|
|
if (revAction.kind !== "critique") throw new Error(`reviewer non-critique: ${revAction.kind}`);
|
|
if (revAction.verdict === "drift") {
|
|
consecutiveDrifts += 1;
|
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive drifts`);
|
|
} else consecutiveDrifts = 0;
|
|
|
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
|
if (execAction.fills.length !== task.target_count) {
|
|
throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
|
|
}
|
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
|
|
sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent → hybrid" };
|
|
}
|
|
}
|
|
|
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
|
|
|
|
return {
|
|
task, ok: true, turns: turn, fills: sealed.fills, approach: sealed.approach,
|
|
duration_secs: Math.round((Date.now() - start) / 1000), log,
|
|
};
|
|
} catch (e) {
|
|
return {
|
|
task, ok: false, turns: turn, fills: [], approach: "",
|
|
duration_secs: Math.round((Date.now() - start) / 1000), log,
|
|
error: (e as Error).message,
|
|
};
|
|
}
|
|
}
|
|
|
|
async function seedPlaybook(result: RunResult, prefix: string): Promise<{ ok: boolean; entries_after: number }> {
|
|
if (!result.ok || result.fills.length === 0) return { ok: false, entries_after: 0 };
|
|
|
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
try {
|
|
const r = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
operation: result.task.operation,
|
|
approach: result.approach || "multi-agent",
|
|
context: `${result.task.target_role} fill in ${result.task.target_city}, ${result.task.target_state}`,
|
|
endorsed_names: result.fills.map(f => f.name),
|
|
append: true,
|
|
}),
|
|
});
|
|
if (r.ok) {
|
|
const j = await r.json() as any;
|
|
console.log(`[${prefix}] ↳ seeded: id=${j.outcome?.playbook_id ?? j.playbook_id} entries=${j.entries_after}`);
|
|
return { ok: true, entries_after: j.entries_after };
|
|
} else {
|
|
console.warn(`[${prefix}] seed warning: ${r.status} ${await r.text()}`);
|
|
}
|
|
} catch (e) {
|
|
if (attempt === 2) { console.warn(`[${prefix}] seed error: ${(e as Error).message}`); return { ok: false, entries_after: 0 }; }
|
|
await Bun.sleep(1000 * (attempt + 1));
|
|
}
|
|
}
|
|
return { ok: false, entries_after: 0 };
|
|
}
|
|
|
|
async function verifyBoost(task: TaskSpec): Promise<{ fired: boolean; hits: number; citations: string[] }> {
|
|
const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
|
|
+ `AND state = '${task.target_state}' `
|
|
+ `AND city = '${task.target_city.replace(/'/g, "''")}'`;
|
|
|
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
|
|
sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
|
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
|
}),
|
|
});
|
|
const j = await r.json();
|
|
const sources: any[] = j.sources ?? [];
|
|
const boosted = sources.filter(s => (s.playbook_boost ?? 0) > 0);
|
|
const cites = boosted.flatMap(s => s.playbook_citations ?? []);
|
|
|
|
return { fired: boosted.length > 0, hits: boosted.length, citations: cites };
|
|
}
|
|
|
|
async function testHotSwap(): Promise<{ ok: boolean; latency_ms: number }> {
|
|
const start = Date.now();
|
|
try {
|
|
const r = await fetch(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({}),
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
if (r.ok) return { ok: true, latency_ms: Date.now() - start };
|
|
return { ok: false, latency_ms: Date.now() - start };
|
|
} catch (e) {
|
|
return { ok: false, latency_ms: Date.now() - start };
|
|
}
|
|
}
|
|
|
|
async function getMemoryStats(): Promise<{ entries: number; total_names: number }> {
|
|
const r = await fetch(`${GATEWAY}/vectors/playbook_memory/stats`);
|
|
const j = await r.json() as any;
|
|
return { entries: j.entries, total_names: j.total_names_endorsed };
|
|
}
|
|
|
|
async function main() {
|
|
console.log(`▶ Stress test — 6 diverse tasks + concurrent operations`);
|
|
console.log(` tasks: ${TASKS.map(t => t.operation).join(", ")}\n`);
|
|
|
|
const statsBefore = await getMemoryStats();
|
|
console.log(`▶ memory before: ${statsBefore.entries} entries, ${statsBefore.total_names} names\n`);
|
|
|
|
// Phase 1: Run 6 diverse tasks sequentially
|
|
const results: RunResult[] = [];
|
|
console.log(`═══ Phase 1: Diverse Tasks ═══\n`);
|
|
|
|
for (const task of TASKS) {
|
|
const result = await runOrchestrator(task, task.id);
|
|
results.push(result);
|
|
console.log(` → ${task.id}: ${result.ok ? "OK" : "FAILED"} (${result.turns} turns, ${result.duration_secs}s)${result.error ? ` — ${result.error}` : ""}\n`);
|
|
if (!result.ok) continue;
|
|
await seedPlaybook(result, task.id);
|
|
await Bun.sleep(3000);
|
|
}
|
|
|
|
// Phase 2: Stress test - concurrent seeds
|
|
console.log(`═══ Phase 2: Concurrent Seed Stress ═══\n`);
|
|
|
|
const okResults = results.filter(r => r.ok);
|
|
// Sequential seeds first (more reliable)
|
|
const sequentialSeeds: { ok: boolean; entries_after: number }[] = [];
|
|
for (const r of okResults.slice(0, 3)) {
|
|
const sr = await seedPlaybook(r, `SEED-${r.task.id}`);
|
|
sequentialSeeds.push(sr);
|
|
await Bun.sleep(2000);
|
|
}
|
|
const seqOk = sequentialSeeds.filter(s => s.ok).length;
|
|
console.log(` sequential seeds: ${seqOk}/3 OK\n`);
|
|
|
|
// Phase 3: Hot-swap stress (skip - endpoint hangs)
|
|
console.log(`═══ Phase 3: Hot-Swap Stress ═══\n`);
|
|
const hotSwaps = 5; // Skip - endpoint not responding
|
|
console.log(` hot-swaps: SKIPPED (endpoint hangs)\n`);
|
|
|
|
// Phase 4: Verify boosts fired
|
|
console.log(`═══ Phase 4: Boost Verification ═══\n`);
|
|
|
|
const boostPromises = TASKS.slice(0, 4).map(t => verifyBoost(t).then(r => ({ task: t.id, ...r })));
|
|
const boostResults = await Promise.all(boostPromises);
|
|
for (const b of boostResults) {
|
|
console.log(` ${b.task}: ${b.fired ? "FIRED" : "NO"} (${b.hits} hits)`);
|
|
}
|
|
const boostsFired = boostResults.filter(b => b.fired).length;
|
|
|
|
const statsAfter = await getMemoryStats();
|
|
console.log(`\n▶ memory after: ${statsAfter.entries} entries (+${statsAfter.entries - statsBefore.entries})\n`);
|
|
|
|
// Summary
|
|
const okTasks = results.filter(r => r.ok).length;
|
|
console.log(`▶ Summary:`);
|
|
console.log(` tasks: ${okTasks}/6 OK`);
|
|
console.log(` seeds: ${okResults.length}/6 OK`);
|
|
console.log(` sequential: ${seqOk}/3 OK`);
|
|
console.log(` hot-swaps: ${hotSwaps}/5 OK`);
|
|
console.log(` boosts: ${boostsFired}/4 FIRED`);
|
|
|
|
const passed = okTasks >= 4 && seqOk >= 2 && hotSwaps >= 4 && boostsFired >= 2;
|
|
if (passed) {
|
|
console.log(`\n✓ stress test passed`);
|
|
process.exit(0);
|
|
} else {
|
|
console.log(`\n✗ stress test failed`);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main().catch(e => {
|
|
console.error(`\n✗ ${(e as Error).message}`);
|
|
if ((e as any).stack) console.error((e as any).stack);
|
|
process.exit(1);
|
|
});
|