lakehouse/tests/multi-agent/gen_scenarios.ts
root 330cb90f99 Lift k cap, drop ornamental reason field, scenario generator
ITEM 1 — k CAP + REASON FIELD
The hybrid_search default k was hard-coded to 10. For multi-fill events
(5× expansion, 4× emergency) that's pool=10 → propose 5-of-10, half
the candidates become the answer with no room for rejection. Executor
prompt now instructs k to scale with target_count: k = max(count*5, 20),
cap 80. Default helper bumped 10 → 20.

Fill.reason dropped from required to optional. Nothing downstream ever
consumed it — resolveWorkerIds, sealSale, retrospective all use
candidate_id and name. Models loved to write 100-150 char justifications
per fill; on 4+ fills that blew the JSON budget before the structure
closed. Test 1 run result after this change: FIRST EVER 5/5 on the
Riverfront Steel scenario, 13 total turns across 5 events. The event
that failed last run (emergency 4×Loader with truncated reason-field
continuation) now clears in 2 turns.

Progression:
  mistral baseline:                  0/5
  qwen3.5 + continuation + think:false: 4/5
  qwen3.5 + k=20 + no-reason:        5/5 ✓

ITEM 2 — SCENARIO GENERATOR (NOT YET TESTED E2E)
tests/multi-agent/gen_scenarios.ts emits N deterministic ScenarioSpecs
with varied clients (15 companies), cities (20 Midwest cities known
to exist in workers_500k), role mixes (14 industrial staffing roles,
weighted realistic), and event sequences. Each gets a unique sig_hash
so the KB populates with distinct neighbor signatures.

scripts/run_kb_batch.sh runs all generated specs sequentially against
scenario.ts, logs per-scenario outcomes, and reports KB state at the
end. Each run takes ~2-4min; 20-30 scenarios = 1-2hr unattended.

Next: test the generator+batch on a small N (3-5) to verify KB
populates correctly and pathway recommendations start getting neighbor
signal instead of cold-starts. Then item 3 (Rust re-weighting of
hybrid_search by playbook_memory success).
2026-04-20 20:31:34 -05:00

188 lines
6.9 KiB
TypeScript

// Scenario generator for Phase 22 KB corpus-building.
//
// Emits N unique ScenarioSpec JSON files under
// tests/multi-agent/scenarios/ covering:
// - different clients (so sig varies even when events match)
// - different city/state combos actually present in workers_500k
// - varied event sequences (baseline/recurring/expansion/emergency/misplacement)
// - varied role mixes from the industrial staffing taxonomy
//
// Each scenario spec is written as scen_NN_CLIENT_CITY.json and can be
// fed to scenario.ts as argv[2]. A sibling run_batch.sh runs them all
// sequentially so the KB populates overnight.
//
// Determinism: the RNG seed is argv[2] (defaulting to 42) so repeat
// invocations produce identical specs.
import { mkdir, writeFile } from "node:fs/promises";
import { join } from "node:path";
// Deterministic PRNG — mulberry32, same as many test harnesses. Stable
// across bun versions; not cryptographic.
function mulberry32(seed: number) {
let s = seed >>> 0;
return () => {
s = (s + 0x6D2B79F5) >>> 0;
let t = s;
t = Math.imul(t ^ (t >>> 15), t | 1);
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};
}
// Cities known to exist in workers_500k, chosen to avoid false-empty
// searches. All Midwest because that's the target persona's geography.
const CITIES: Array<{ city: string; state: string }> = [
{ city: "Toledo", state: "OH" },
{ city: "Cleveland", state: "OH" },
{ city: "Columbus", state: "OH" },
{ city: "Cincinnati", state: "OH" },
{ city: "Akron", state: "OH" },
{ city: "Detroit", state: "MI" },
{ city: "Grand Rapids", state: "MI" },
{ city: "Flint", state: "MI" },
{ city: "Indianapolis", state: "IN" },
{ city: "Fort Wayne", state: "IN" },
{ city: "Gary", state: "IN" },
{ city: "Chicago", state: "IL" },
{ city: "Joliet", state: "IL" },
{ city: "Rockford", state: "IL" },
{ city: "Milwaukee", state: "WI" },
{ city: "Madison", state: "WI" },
{ city: "Louisville", state: "KY" },
{ city: "Lexington", state: "KY" },
{ city: "Kansas City", state: "MO" },
{ city: "St. Louis", state: "MO" },
];
// Industrial staffing role taxonomy. Weighted so common roles appear
// more often (realistic distribution).
const ROLES: Array<{ role: string; weight: number }> = [
{ role: "Warehouse Associate", weight: 5 },
{ role: "Machine Operator", weight: 4 },
{ role: "Forklift Operator", weight: 4 },
{ role: "Loader", weight: 3 },
{ role: "Material Handler", weight: 3 },
{ role: "Assembler", weight: 3 },
{ role: "Quality Tech", weight: 2 },
{ role: "Picker", weight: 3 },
{ role: "Packer", weight: 3 },
{ role: "Shipping Clerk", weight: 2 },
{ role: "Receiving Clerk", weight: 2 },
{ role: "Welder", weight: 2 },
{ role: "CNC Operator", weight: 2 },
{ role: "Maintenance Tech", weight: 1 },
];
const CLIENTS = [
"Riverfront Steel", "Northland Logistics", "Great Lakes Mfg",
"Midway Distribution", "Pioneer Assembly", "Cornerstone Fabrication",
"Horizon Supply", "Keystone Plastics", "Apex Warehouse",
"Heritage Foods", "Summit Industrial", "Vanguard Components",
"Centennial Packaging", "Parallel Machining", "Beacon Freight",
];
function pickWeighted<T extends { weight: number }>(rng: () => number, items: T[]): T {
const total = items.reduce((s, x) => s + x.weight, 0);
let r = rng() * total;
for (const x of items) { r -= x.weight; if (r <= 0) return x; }
return items[items.length - 1];
}
function pick<T>(rng: () => number, items: T[]): T {
return items[Math.floor(rng() * items.length)];
}
// Event shape templates. Each scenario picks 3-6 of these at random.
// Multi-fill counts skew low to make the harness quicker; 5+ fill
// events are the hardest and should be rarer in a corpus run.
type EventKind = "baseline_fill" | "recurring" | "expansion" | "emergency" | "misplacement";
function makeEvent(
rng: () => number,
kind: EventKind,
at: string,
city: string,
state: string,
): any {
const { role } = pickWeighted(rng, ROLES);
const count = kind === "misplacement" ? 1
: kind === "expansion" ? 2 + Math.floor(rng() * 4) // 2-5
: kind === "baseline_fill" ? 1 + Math.floor(rng() * 3) // 1-3
: kind === "recurring" ? 1 + Math.floor(rng() * 2) // 1-2
: /* emergency */ 2 + Math.floor(rng() * 3); // 2-4
const hour = 8 + Math.floor(rng() * 10);
const min = Math.random() > 0.5 ? 0 : 30;
const at_real = `${String(hour).padStart(2, "0")}:${String(min).padStart(2, "0")}`;
return {
kind,
at: at_real,
role,
count,
city,
state,
shift_start: `${at_real.replace(":", ":")} AM`,
};
}
function genSpec(rng: () => number, id: number): any {
const client = pick(rng, CLIENTS);
const { city, state } = pick(rng, CITIES);
const today = new Date();
const date = new Date(today.getTime() + id * 86400000)
.toISOString().split("T")[0];
// Scenario shape mix — 60% pure fill (baseline+recurring+expansion),
// 40% mixed (add emergency and/or misplacement).
const includeEmergency = rng() > 0.6;
const includeMisplacement = rng() > 0.6;
const events: any[] = [];
// always at least one baseline
events.push(makeEvent(rng, "baseline_fill", "08:00", city, state));
if (rng() > 0.3) events.push(makeEvent(rng, "recurring", "10:30", city, state));
if (rng() > 0.5) events.push(makeEvent(rng, "expansion", "12:15", city, state));
if (includeEmergency) events.push(makeEvent(rng, "emergency", "14:00", city, state));
if (includeMisplacement) {
const e = makeEvent(rng, "misplacement", "15:45", city, state);
if (events.length > 0) e.replaces_event = events[0].at;
events.push(e);
}
return { client, date, events };
}
async function main() {
const n = Number(process.argv[2] ?? 20);
const seed = Number(process.argv[3] ?? 42);
const rng = mulberry32(seed);
const outDir = "tests/multi-agent/scenarios";
await mkdir(outDir, { recursive: true });
const manifest: Array<{ file: string; client: string; city: string; events: number }> = [];
for (let i = 0; i < n; i++) {
const spec = genSpec(rng, i);
const cityLabel = spec.events[0].city.replace(/\s+/g, "_");
const fname = `scen_${String(i).padStart(3, "0")}_${spec.client.replace(/\s+/g, "_")}_${cityLabel}.json`;
await writeFile(join(outDir, fname), JSON.stringify(spec, null, 2));
manifest.push({
file: fname,
client: spec.client,
city: spec.events[0].city,
events: spec.events.length,
});
}
await writeFile(
join(outDir, "manifest.json"),
JSON.stringify({ count: n, seed, scenarios: manifest }, null, 2),
);
console.log(`✓ generated ${n} scenarios → ${outDir}/ (seed=${seed})`);
for (const m of manifest.slice(0, 5)) {
console.log(` ${m.file}${m.client} (${m.city}), ${m.events} events`);
}
if (manifest.length > 5) console.log(` ... +${manifest.length - 5} more`);
}
main().catch(e => {
console.error("gen_scenarios failed:", (e as Error).message);
process.exit(1);
});