lakehouse/tests/multi-agent/gen_scenarios.ts
root 6b71c8e9b2 Phase 23 — contract terms + staffer identity + competence-weighted retrieval
Matrix-index the "who handled this" dimension so top staffers become
the training signal and juniors inherit their playbooks automatically
via the boost pipeline. Auto-discovered indicators emerge from
comparing trajectories across staffers on similar contracts — that was
always the architectural point; this wires the last piece.

ContractTerms:
- deadline, budget_total_usd, budget_per_hour_max, local_bonus_per_hour,
  local_bonus_radius_mi, fill_requirement ("paramount" | "preferred")
- Attached to ScenarioSpec, propagated into T3 checkpoint + cloud
  rescue prompts so cloud reasons about trade-offs (pivot within bonus
  radius first; respect per-hour cap; split across cities when
  fill_requirement=paramount).

Staffer:
- {id, name, tenure_months, role: senior|mid|junior|trainee}
- On ScenarioSpec; logged at scenario start; attached to KB outcome
- Recomputed StafferStats written to data/_kb/staffers.jsonl after
  every run: total_runs, fill_rate, avg_turns, avg_citations,
  rescue_rate, competence_score.
- Competence formula: 0.45*fill_rate + 0.20*turn_efficiency +
  0.20*citation_density + 0.15*rescue_rate. Normalized to 0..1.

findNeighbors now returns weighted_score = cosine × best_staffer_competence
(floored at 0.3 so high-similarity low-competence neighbors still
surface). pathway_recommender prompt shows the top staffer's identity
so cloud knows WHOSE playbook it's synthesizing from.

Demo infrastructure:
- tests/multi-agent/gen_staffer_demo.ts: 4 personas (Maria senior,
  James mid, Sam junior, Alex trainee) × 3 contracts (Nashville Welder,
  Joliet Warehouse, Indianapolis Assembly). 12 scenarios total.
- scripts/run_staffer_demo.sh: runs the 12 sequentially with
  LH_OVERVIEW_CLOUD=1. Post-run calls kb_staffer_report.py.
- scripts/kb_staffer_report.py: leaderboard + cross-staffer worker
  overlap (names endorsed by ≥2 staffers → auto-discovered high-value
  workers). Top vs bottom differential.

gen_scenarios.ts (Phase 22 generator) also now emits contract terms
on 70% of generated specs — future KB batches populate with realistic
constraint patterns instead of bare role+city+count.

Stress scenario from item A intentionally NOT the production test.
Real staffing has constraints; Nashville contract + staffer demo is
the honest test of whether the architecture produces measurable
differential between coordinator skill levels.

Demo batch launched — 12 runs × ~3min each ≈ 40min unattended. Report
emitted after batch.
2026-04-20 22:16:09 -05:00

211 lines
7.9 KiB
TypeScript

// Scenario generator for Phase 22 KB corpus-building.
//
// Emits N unique ScenarioSpec JSON files under
// tests/multi-agent/scenarios/ covering:
// - different clients (so sig varies even when events match)
// - different city/state combos actually present in workers_500k
// - varied event sequences (baseline/recurring/expansion/emergency/misplacement)
// - varied role mixes from the industrial staffing taxonomy
//
// Each scenario spec is written as scen_NN_CLIENT_CITY.json and can be
// fed to scenario.ts as argv[2]. A sibling run_batch.sh runs them all
// sequentially so the KB populates overnight.
//
// Determinism: the RNG seed is argv[2] (defaulting to 42) so repeat
// invocations produce identical specs.
import { mkdir, writeFile } from "node:fs/promises";
import { join } from "node:path";
// Deterministic PRNG — mulberry32, same as many test harnesses. Stable
// across bun versions; not cryptographic.
function mulberry32(seed: number) {
let s = seed >>> 0;
return () => {
s = (s + 0x6D2B79F5) >>> 0;
let t = s;
t = Math.imul(t ^ (t >>> 15), t | 1);
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};
}
// Cities known to exist in workers_500k, chosen to avoid false-empty
// searches. All Midwest because that's the target persona's geography.
const CITIES: Array<{ city: string; state: string }> = [
{ city: "Toledo", state: "OH" },
{ city: "Cleveland", state: "OH" },
{ city: "Columbus", state: "OH" },
{ city: "Cincinnati", state: "OH" },
{ city: "Akron", state: "OH" },
{ city: "Detroit", state: "MI" },
{ city: "Grand Rapids", state: "MI" },
{ city: "Flint", state: "MI" },
{ city: "Indianapolis", state: "IN" },
{ city: "Fort Wayne", state: "IN" },
{ city: "Gary", state: "IN" },
{ city: "Chicago", state: "IL" },
{ city: "Joliet", state: "IL" },
{ city: "Rockford", state: "IL" },
{ city: "Milwaukee", state: "WI" },
{ city: "Madison", state: "WI" },
{ city: "Louisville", state: "KY" },
{ city: "Lexington", state: "KY" },
{ city: "Kansas City", state: "MO" },
{ city: "St. Louis", state: "MO" },
];
// Industrial staffing role taxonomy. Weighted so common roles appear
// more often (realistic distribution).
const ROLES: Array<{ role: string; weight: number }> = [
{ role: "Warehouse Associate", weight: 5 },
{ role: "Machine Operator", weight: 4 },
{ role: "Forklift Operator", weight: 4 },
{ role: "Loader", weight: 3 },
{ role: "Material Handler", weight: 3 },
{ role: "Assembler", weight: 3 },
{ role: "Quality Tech", weight: 2 },
{ role: "Picker", weight: 3 },
{ role: "Packer", weight: 3 },
{ role: "Shipping Clerk", weight: 2 },
{ role: "Receiving Clerk", weight: 2 },
{ role: "Welder", weight: 2 },
{ role: "CNC Operator", weight: 2 },
{ role: "Maintenance Tech", weight: 1 },
];
const CLIENTS = [
"Riverfront Steel", "Northland Logistics", "Great Lakes Mfg",
"Midway Distribution", "Pioneer Assembly", "Cornerstone Fabrication",
"Horizon Supply", "Keystone Plastics", "Apex Warehouse",
"Heritage Foods", "Summit Industrial", "Vanguard Components",
"Centennial Packaging", "Parallel Machining", "Beacon Freight",
];
function pickWeighted<T extends { weight: number }>(rng: () => number, items: T[]): T {
const total = items.reduce((s, x) => s + x.weight, 0);
let r = rng() * total;
for (const x of items) { r -= x.weight; if (r <= 0) return x; }
return items[items.length - 1];
}
function pick<T>(rng: () => number, items: T[]): T {
return items[Math.floor(rng() * items.length)];
}
// Event shape templates. Each scenario picks 3-6 of these at random.
// Multi-fill counts skew low to make the harness quicker; 5+ fill
// events are the hardest and should be rarer in a corpus run.
type EventKind = "baseline_fill" | "recurring" | "expansion" | "emergency" | "misplacement";
function makeEvent(
rng: () => number,
kind: EventKind,
at: string,
city: string,
state: string,
): any {
const { role } = pickWeighted(rng, ROLES);
const count = kind === "misplacement" ? 1
: kind === "expansion" ? 2 + Math.floor(rng() * 4) // 2-5
: kind === "baseline_fill" ? 1 + Math.floor(rng() * 3) // 1-3
: kind === "recurring" ? 1 + Math.floor(rng() * 2) // 1-2
: /* emergency */ 2 + Math.floor(rng() * 3); // 2-4
const hour = 8 + Math.floor(rng() * 10);
const min = Math.random() > 0.5 ? 0 : 30;
const at_real = `${String(hour).padStart(2, "0")}:${String(min).padStart(2, "0")}`;
return {
kind,
at: at_real,
role,
count,
city,
state,
shift_start: `${at_real.replace(":", ":")} AM`,
};
}
function genSpec(rng: () => number, id: number): any {
const client = pick(rng, CLIENTS);
const { city, state } = pick(rng, CITIES);
const today = new Date();
const date = new Date(today.getTime() + id * 86400000)
.toISOString().split("T")[0];
// Scenario shape mix — 60% pure fill (baseline+recurring+expansion),
// 40% mixed (add emergency and/or misplacement).
const includeEmergency = rng() > 0.6;
const includeMisplacement = rng() > 0.6;
const events: any[] = [];
// always at least one baseline
events.push(makeEvent(rng, "baseline_fill", "08:00", city, state));
if (rng() > 0.3) events.push(makeEvent(rng, "recurring", "10:30", city, state));
if (rng() > 0.5) events.push(makeEvent(rng, "expansion", "12:15", city, state));
if (includeEmergency) events.push(makeEvent(rng, "emergency", "14:00", city, state));
if (includeMisplacement) {
const e = makeEvent(rng, "misplacement", "15:45", city, state);
if (events.length > 0) e.replaces_event = events[0].at;
events.push(e);
}
// Contract terms — most real staffing contracts have these. 70% of
// generated specs carry them so KB + T3 learn to reason about budget
// and radius trade-offs, not just geography. Distributions are
// deliberately varied: 2 week to 45 day deadlines, $22-$38/hr caps,
// 25-150mi local radii.
const contract = rng() > 0.3 ? (() => {
const deadlineDays = 10 + Math.floor(rng() * 35);
const deadlineDate = new Date(today.getTime() + (id + deadlineDays) * 86400000)
.toISOString().split("T")[0];
const budgetPerHour = 22 + Math.floor(rng() * 17);
const bonusRadius = 25 + Math.floor(rng() * 125);
const bonusPerHour = 2 + Math.floor(rng() * 5);
const fill: "paramount" | "preferred" = rng() > 0.4 ? "paramount" : "preferred";
return {
deadline: deadlineDate,
budget_per_hour_max: budgetPerHour,
local_bonus_per_hour: bonusPerHour,
local_bonus_radius_mi: bonusRadius,
fill_requirement: fill,
};
})() : undefined;
return { client, date, events, ...(contract ? { contract } : {}) };
}
async function main() {
const n = Number(process.argv[2] ?? 20);
const seed = Number(process.argv[3] ?? 42);
const rng = mulberry32(seed);
const outDir = "tests/multi-agent/scenarios";
await mkdir(outDir, { recursive: true });
const manifest: Array<{ file: string; client: string; city: string; events: number }> = [];
for (let i = 0; i < n; i++) {
const spec = genSpec(rng, i);
const cityLabel = spec.events[0].city.replace(/\s+/g, "_");
const fname = `scen_${String(i).padStart(3, "0")}_${spec.client.replace(/\s+/g, "_")}_${cityLabel}.json`;
await writeFile(join(outDir, fname), JSON.stringify(spec, null, 2));
manifest.push({
file: fname,
client: spec.client,
city: spec.events[0].city,
events: spec.events.length,
});
}
await writeFile(
join(outDir, "manifest.json"),
JSON.stringify({ count: n, seed, scenarios: manifest }, null, 2),
);
console.log(`✓ generated ${n} scenarios → ${outDir}/ (seed=${seed})`);
for (const m of manifest.slice(0, 5)) {
console.log(` ${m.file}${m.client} (${m.city}), ${m.events} events`);
}
if (manifest.length > 5) console.log(` ... +${manifest.length - 5} more`);
}
main().catch(e => {
console.error("gen_scenarios failed:", (e as Error).message);
process.exit(1);
});