lakehouse/tests/multi-agent/gen_staffer_demo.ts
root 5e89407939 Phase 23 refinement — per-staffer tool_level variance
Staffer.tool_level now controls which subsystems a specific run gets:

  full     — qwen3.5 + qwen3 + cloud T3 + cloud rescue
  local    — qwen3.5 + qwen3 + local gpt-oss:20b T3 + rescue
  basic    — qwen2.5 + qwen2.5 + local T3, no rescue
  minimal  — qwen2.5 + qwen2.5, NO T3, NO rescue. Playbook
             inheritance only.

applyToolLevel() mutates module-scoped ACTIVE_* slots each run from the
env defaults, so prior staffer's overrides never leak. Hot-path code
reads ACTIVE_EXECUTOR / ACTIVE_REVIEWER / ACTIVE_T3_DISABLED /
ACTIVE_OVERVIEW_CLOUD / ACTIVE_RETRY_ON_FAIL instead of the baked
constants.

The architectural question this answers: does playbook_memory
inheritance carry enough knowledge to let a weakly-tooled coordinator
still produce usable outcomes? "Minimal" Alex runs qwen2.5 exec + no
reviewer overseer + no cloud rescue. If Alex still fills events at a
reasonable rate, the playbook system is the real knowledge carrier —
the senior stack is nice-to-have, not the sine qua non.

Demo personas mapped:
  Maria (senior, 48mo, full)
  James (mid, 14mo, local)
  Sam (junior, 4mo, basic)
  Alex (trainee, 1mo, minimal)

Same 3 contracts (Nashville downtown, Joliet warehouse, Indianapolis
assembly) across all four → 12 runs. KB + kb_staffer_report.py
leaderboard already wired; competence_score will now reflect real tool
asymmetry instead of LLM sampling variance.
2026-04-20 22:50:05 -05:00

125 lines
5.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Phase 23 demo — 4 staffer personas × 3 contract scenarios each.
// Same contracts run against different staffers to measure competence
// differential. After the batch, findNeighbors should rank top-staffer
// playbooks above junior-staffer playbooks for similar scenarios.
//
// Output: 12 spec files under tests/multi-agent/scenarios/staffer_demo/
import { mkdir, writeFile } from "node:fs/promises";
import { join } from "node:path";
// Per-staffer tool_level mirrors the real-world asymmetry: senior gets
// the most powerful stack, trainee gets the least. The architectural
// question is whether the playbook inheritance is strong enough to let
// the trainee still produce usable outcomes when the big tools are off.
const STAFFERS = [
{ id: "S-001", name: "Maria Chen", tenure_months: 48, role: "senior" as const, tool_level: "full" as const },
{ id: "S-002", name: "James Park", tenure_months: 14, role: "mid" as const, tool_level: "local" as const },
{ id: "S-003", name: "Sam Torres", tenure_months: 4, role: "junior" as const, tool_level: "basic" as const },
{ id: "S-004", name: "Alex Rivera", tenure_months: 1, role: "trainee" as const, tool_level: "minimal" as const },
];
// Three contract shapes — one downtown assembly, one warehouse ramp,
// one emergency recovery. Different cities to vary the sig_hash.
const CONTRACTS = [
{
tag: "nashville_downtown",
client: "Riverline Logistics — Nashville Downtown Build-Out",
city: "Nashville", state: "TN",
contract: {
deadline: "2026-05-19",
budget_total_usd: 180000,
budget_per_hour_max: 32,
local_bonus_per_hour: 4,
local_bonus_radius_mi: 75,
fill_requirement: "paramount" as const,
},
events: [
{ kind: "baseline_fill", at: "07:00", role: "Welder", count: 4 },
{ kind: "expansion", at: "08:30", role: "Packaging Operator", count: 6 },
{ kind: "baseline_fill", at: "09:00", role: "Shipping Clerk", count: 2 },
{ kind: "emergency", at: "13:00", role: "Welder", count: 2, deadline: "15:00" },
{ kind: "misplacement", at: "15:30", role: "Packaging Operator", count: 1, replaces_event: "08:30" },
],
},
{
tag: "joliet_warehouse",
client: "Midway Distribution — Joliet DC Ramp",
city: "Joliet", state: "IL",
contract: {
deadline: "2026-05-12",
budget_total_usd: 120000,
budget_per_hour_max: 28,
local_bonus_per_hour: 3,
local_bonus_radius_mi: 50,
fill_requirement: "preferred" as const,
},
events: [
{ kind: "baseline_fill", at: "07:00", role: "Warehouse Associate", count: 5 },
{ kind: "recurring", at: "10:00", role: "Forklift Operator", count: 3 },
{ kind: "expansion", at: "12:30", role: "Picker", count: 4 },
{ kind: "misplacement", at: "15:00", role: "Forklift Operator", count: 1, replaces_event: "10:00" },
],
},
{
tag: "indianapolis_assembly",
client: "Pioneer Assembly — Indianapolis Plant Expansion",
city: "Indianapolis", state: "IN",
contract: {
deadline: "2026-05-26",
budget_total_usd: 220000,
budget_per_hour_max: 30,
local_bonus_per_hour: 5,
local_bonus_radius_mi: 60,
fill_requirement: "paramount" as const,
},
events: [
{ kind: "baseline_fill", at: "07:30", role: "Assembler", count: 6 },
{ kind: "recurring", at: "09:30", role: "Quality Tech", count: 2 },
{ kind: "expansion", at: "11:00", role: "Machine Operator", count: 5 },
{ kind: "emergency", at: "14:00", role: "Machine Operator", count: 3, deadline: "16:00" },
{ kind: "misplacement", at: "16:00", role: "Assembler", count: 1, replaces_event: "07:30" },
],
},
];
async function main() {
const outDir = "tests/multi-agent/scenarios/staffer_demo";
await mkdir(outDir, { recursive: true });
const manifest: Array<{ file: string; staffer: string; contract: string; client: string }> = [];
let day = 0;
for (const staffer of STAFFERS) {
for (const ct of CONTRACTS) {
day += 1;
const date = new Date(Date.now() + day * 86400000).toISOString().split("T")[0];
const spec = {
client: ct.client,
date,
contract: ct.contract,
staffer,
events: ct.events.map(e => ({
...e,
city: ct.city,
state: ct.state,
shift_start: `${e.at} ${e.at.startsWith("0") ? "AM" : "PM"}`,
scenario_note: `Staffed by ${staffer.name} (${staffer.role}, ${staffer.tenure_months}mo). Contract deadline ${ct.contract.deadline}, fill=${ct.contract.fill_requirement}.`,
})),
};
const fname = `${staffer.id}_${ct.tag}.json`;
await writeFile(join(outDir, fname), JSON.stringify(spec, null, 2));
manifest.push({ file: fname, staffer: staffer.name, contract: ct.tag, client: ct.client });
}
}
await writeFile(
join(outDir, "manifest.json"),
JSON.stringify({ count: manifest.length, scenarios: manifest }, null, 2),
);
console.log(`${manifest.length} staffer-demo specs → ${outDir}/`);
for (const m of manifest) console.log(` ${m.file}${m.staffer} × ${m.contract}`);
}
main().catch(e => {
console.error("gen_staffer_demo failed:", (e as Error).message);
process.exit(1);
});