From 5e89407939f07aebbe788d951715c1e24ceaf380 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 20 Apr 2026 22:50:05 -0500 Subject: [PATCH] =?UTF-8?q?Phase=2023=20refinement=20=E2=80=94=20per-staff?= =?UTF-8?q?er=20tool=5Flevel=20variance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Staffer.tool_level now controls which subsystems a specific run gets: full — qwen3.5 + qwen3 + cloud T3 + cloud rescue local — qwen3.5 + qwen3 + local gpt-oss:20b T3 + rescue basic — qwen2.5 + qwen2.5 + local T3, no rescue minimal — qwen2.5 + qwen2.5, NO T3, NO rescue. Playbook inheritance only. applyToolLevel() mutates module-scoped ACTIVE_* slots each run from the env defaults, so prior staffer's overrides never leak. Hot-path code reads ACTIVE_EXECUTOR / ACTIVE_REVIEWER / ACTIVE_T3_DISABLED / ACTIVE_OVERVIEW_CLOUD / ACTIVE_RETRY_ON_FAIL instead of the baked constants. The architectural question this answers: does playbook_memory inheritance carry enough knowledge to let a weakly-tooled coordinator still produce usable outcomes? "Minimal" Alex runs qwen2.5 exec + no reviewer overseer + no cloud rescue. If Alex still fills events at a reasonable rate, the playbook system is the real knowledge carrier — the senior stack is nice-to-have, not the sine qua non. Demo personas mapped: Maria (senior, 48mo, full) James (mid, 14mo, local) Sam (junior, 4mo, basic) Alex (trainee, 1mo, minimal) Same 3 contracts (Nashville downtown, Joliet warehouse, Indianapolis assembly) across all four → 12 runs. KB + kb_staffer_report.py leaderboard already wired; competence_score will now reflect real tool asymmetry instead of LLM sampling variance. --- tests/multi-agent/gen_staffer_demo.ts | 12 +- tests/multi-agent/scenario.ts | 113 ++++++++++++++---- .../S-001_indianapolis_assembly.json | 3 +- .../staffer_demo/S-001_joliet_warehouse.json | 3 +- .../S-001_nashville_downtown.json | 3 +- .../S-002_indianapolis_assembly.json | 3 +- .../staffer_demo/S-002_joliet_warehouse.json | 3 +- .../S-002_nashville_downtown.json | 3 +- .../S-003_indianapolis_assembly.json | 3 +- .../staffer_demo/S-003_joliet_warehouse.json | 3 +- .../S-003_nashville_downtown.json | 3 +- .../S-004_indianapolis_assembly.json | 3 +- .../staffer_demo/S-004_joliet_warehouse.json | 3 +- .../S-004_nashville_downtown.json | 3 +- 14 files changed, 119 insertions(+), 42 deletions(-) diff --git a/tests/multi-agent/gen_staffer_demo.ts b/tests/multi-agent/gen_staffer_demo.ts index 847cd6b..2a45fb7 100644 --- a/tests/multi-agent/gen_staffer_demo.ts +++ b/tests/multi-agent/gen_staffer_demo.ts @@ -8,11 +8,15 @@ import { mkdir, writeFile } from "node:fs/promises"; import { join } from "node:path"; +// Per-staffer tool_level mirrors the real-world asymmetry: senior gets +// the most powerful stack, trainee gets the least. The architectural +// question is whether the playbook inheritance is strong enough to let +// the trainee still produce usable outcomes when the big tools are off. const STAFFERS = [ - { id: "S-001", name: "Maria Chen", tenure_months: 48, role: "senior" as const }, - { id: "S-002", name: "James Park", tenure_months: 14, role: "mid" as const }, - { id: "S-003", name: "Sam Torres", tenure_months: 4, role: "junior" as const }, - { id: "S-004", name: "Alex Rivera", tenure_months: 1, role: "trainee" as const }, + { id: "S-001", name: "Maria Chen", tenure_months: 48, role: "senior" as const, tool_level: "full" as const }, + { id: "S-002", name: "James Park", tenure_months: 14, role: "mid" as const, tool_level: "local" as const }, + { id: "S-003", name: "Sam Torres", tenure_months: 4, role: "junior" as const, tool_level: "basic" as const }, + { id: "S-004", name: "Alex Rivera", tenure_months: 1, role: "trainee" as const, tool_level: "minimal" as const }, ]; // Three contract shapes — one downtown assembly, one warehouse ramp, diff --git a/tests/multi-agent/scenario.ts b/tests/multi-agent/scenario.ts index 6ca3cba..597fd86 100644 --- a/tests/multi-agent/scenario.ts +++ b/tests/multi-agent/scenario.ts @@ -85,6 +85,49 @@ const T3_DISABLED = process.env.LH_T3_DISABLE === "1"; // LH_RETRY_ON_FAIL=0 to compare baseline outcomes without rescue. const RETRY_ON_FAIL = process.env.LH_RETRY_ON_FAIL !== "0"; +// Phase 23 refinement — per-staffer tool_level overrides. Evaluated +// per run in main() once we know the spec's staffer. These are +// package-scoped mutable slots intentionally — the primary constant +// above is the DEFAULT; main() flips them for the duration of the run +// based on staffer.tool_level before calling anything else. +let ACTIVE_EXECUTOR = EXECUTOR_MODEL; +let ACTIVE_REVIEWER = REVIEWER_MODEL; +let ACTIVE_T3_DISABLED = T3_DISABLED; +let ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD; +let ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL; + +function applyToolLevel(level: Staffer["tool_level"] | undefined): void { + // Start from env defaults each time so previous staffer's overrides + // don't leak. + ACTIVE_EXECUTOR = EXECUTOR_MODEL; + ACTIVE_REVIEWER = REVIEWER_MODEL; + ACTIVE_T3_DISABLED = T3_DISABLED; + ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD; + ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL; + if (!level) return; + switch (level) { + case "full": + ACTIVE_OVERVIEW_CLOUD = true; + break; + case "local": + ACTIVE_OVERVIEW_CLOUD = false; + break; + case "basic": + ACTIVE_EXECUTOR = "qwen2.5:latest"; + ACTIVE_REVIEWER = "qwen2.5:latest"; + ACTIVE_OVERVIEW_CLOUD = false; + ACTIVE_RETRY_ON_FAIL = false; + break; + case "minimal": + ACTIVE_EXECUTOR = "qwen2.5:latest"; + ACTIVE_REVIEWER = "qwen2.5:latest"; + ACTIVE_T3_DISABLED = true; + ACTIVE_OVERVIEW_CLOUD = false; + ACTIVE_RETRY_ON_FAIL = false; + break; + } +} + // Dispatcher: route T3 calls to local sidecar or Ollama Cloud depending // on the LH_OVERVIEW_CLOUD flag. Hot-path T1/T2 always stay local. // T3 outputs are free-form prose (lesson/hint), so shape=text — the @@ -95,7 +138,7 @@ async function overviewGenerate(prompt: string, opts: { temperature?: number; ma max_tokens: opts.max_tokens ?? 1000, shape: "text", max_continuations: 2, - cloud: OVERVIEW_CLOUD, + cloud: ACTIVE_OVERVIEW_CLOUD, }); } @@ -148,6 +191,19 @@ interface Staffer { name: string; // "Maria Chen" tenure_months: number; role: "senior" | "mid" | "junior" | "trainee"; + // Phase 23 refinement — tool_level controls which subsystems this + // staffer's runs get to use. The mechanism always leaves + // playbook_memory ON so inherited playbooks drive the outcome even + // when T3 / cloud rescue / the bigger executor are disabled. + // + // full — qwen3.5 executor + qwen3 reviewer + cloud T3 + rescue + // local — qwen3.5 + qwen3 + local gpt-oss:20b T3 + rescue + // basic — qwen2.5 + qwen2.5 + local T3, no rescue + // minimal — qwen2.5 + qwen2.5, NO T3, NO rescue. Only playbook + // inheritance to lean on. This is the honest test of + // whether the playbook system carries knowledge on its + // own. + tool_level?: "full" | "local" | "basic" | "minimal"; } interface ScenarioSpec { @@ -450,7 +506,7 @@ async function runAgentFill( // reasoning. Burning ~650 thinking tokens on a 400-token JSON was // exactly the bug we just solved. const execRaw = await generateContinuable( - EXECUTOR_MODEL, + ACTIVE_EXECUTOR, withExtras(executorPrompt(task, log)), { temperature: 0.2, @@ -459,7 +515,7 @@ async function runAgentFill( max_continuations: 3, think: false, on_continuation: (n, len) => - append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "note", + append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "note", content: { continuation: n, combined_chars: len } }), }, ); @@ -467,11 +523,11 @@ async function runAgentFill( try { execAction = parseAction(execRaw, "executor"); } catch (e) { - append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error", + append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "error", content: { message: (e as Error).message, raw: execRaw.slice(0, 300) } }); throw e; } - append({ turn, role: "executor", model: EXECUTOR_MODEL, + append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: execAction.kind as any, content: execAction }); if (execAction.kind === "tool_call") { @@ -490,7 +546,7 @@ async function runAgentFill( } } const trimmed = trimResult(filtered); - append({ turn, role: "executor", model: EXECUTOR_MODEL, + append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "tool_result", content: trimmed }); // Accumulate playbook citations from any hybrid result that @@ -503,7 +559,7 @@ async function runAgentFill( } } } catch (e) { - append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", + append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "tool_result", content: { error: (e as Error).message, tool: execAction.tool } }); consecutiveDrifts += 1; if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) { @@ -513,7 +569,7 @@ async function runAgentFill( } const revRaw = await generateContinuable( - REVIEWER_MODEL, + ACTIVE_REVIEWER, withExtras(reviewerPrompt(task, log)), { temperature: 0.1, @@ -522,7 +578,7 @@ async function runAgentFill( max_continuations: 3, think: false, on_continuation: (n, len) => - append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "note", + append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "note", content: { continuation: n, combined_chars: len } }), }, ); @@ -530,11 +586,11 @@ async function runAgentFill( try { revAction = parseAction(revRaw, "reviewer"); } catch (e) { - append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error", + append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "error", content: { message: (e as Error).message, raw: revRaw.slice(0, 300) } }); throw e; } - append({ turn, role: "reviewer", model: REVIEWER_MODEL, + append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "critique", content: revAction }); if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`); @@ -559,7 +615,7 @@ async function runAgentFill( throw new Error(`consensus proposed excluded worker ${f.candidate_id}`); } } - append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", + append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "consensus_done", content: { fills: execAction.fills } }); sealed = { fills: execAction.fills, approach: execAction.rationale ?? "multi-agent hybrid" }; } @@ -1008,7 +1064,7 @@ async function runOverviewCheckpoint( prior: EventResult[], contract?: ContractTerms, ): Promise { - if (T3_DISABLED) return null; + if (ACTIVE_T3_DISABLED) return null; const start = Date.now(); const priorSummary = prior.slice(-3).map(p => @@ -1084,7 +1140,7 @@ async function requestCloudRemediation( result: EventResult, contract?: ContractTerms, ): Promise<{ remediation: CloudRemediation; duration_secs: number } | null> { - if (T3_DISABLED) return null; + if (ACTIVE_T3_DISABLED) return null; const start = Date.now(); const diag = extractDiagnostics(result.diagnostic_log); @@ -1144,7 +1200,7 @@ ${contract ? `- CONTRACT AWARENESS: fill_requirement=${contract.fill_requirement } async function runCrossDayLesson(ctx: ScenarioContext, checkpoints: OverviewCheckpoint[]): Promise { - if (T3_DISABLED) return null; + if (ACTIVE_T3_DISABLED) return null; const eventDigest = ctx.results.map(r => { const diag = extractDiagnostics(r.diagnostic_log); @@ -1188,7 +1244,7 @@ async function writeRetrospective(ctx: ScenarioContext): Promise { const lines: string[] = []; lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`); lines.push(""); - lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\` Overview(T3): \`${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}\``); + lines.push(`Executor: \`${ACTIVE_EXECUTOR}\` Reviewer: \`${ACTIVE_REVIEWER}\` Draft: \`${DRAFT_MODEL}\` Overview(T3): \`${ACTIVE_T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (ACTIVE_OVERVIEW_CLOUD ? " (cloud)" : "")}\``); lines.push(`Prior lessons loaded into executor context: **${ctx.prior_lessons.length}**${ctx.prior_lessons.length > 0 ? " (from " + ctx.prior_lessons.map(p => p.date).join(", ") + ")" : " (baseline — no prior T3 history)"}`); lines.push(""); @@ -1376,15 +1432,20 @@ async function main() { const checkpoints: OverviewCheckpoint[] = []; + // Phase 23 refinement — per-staffer tool_level override. Fires once + // per run. If no staffer or no tool_level, defaults hold. + applyToolLevel(spec.staffer?.tool_level); + console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`); if (spec.staffer) { - console.log(`▶ staffer: ${spec.staffer.id} ${spec.staffer.name} (${spec.staffer.role}, ${spec.staffer.tenure_months}mo)`); + const level = spec.staffer.tool_level ?? "(default)"; + console.log(`▶ staffer: ${spec.staffer.id} ${spec.staffer.name} (${spec.staffer.role}, ${spec.staffer.tenure_months}mo, tools=${level})`); } if (spec.contract) { const c = spec.contract; console.log(`▶ contract: deadline=${c.deadline} fill=${c.fill_requirement ?? "preferred"}${c.budget_per_hour_max ? ` budget=$${c.budget_per_hour_max}/hr` : ""}${c.local_bonus_radius_mi ? ` local_radius=${c.local_bonus_radius_mi}mi+$${c.local_bonus_per_hour ?? 0}` : ""}`); } - console.log(`▶ models: exec=${EXECUTOR_MODEL} review=${REVIEWER_MODEL} overview=${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}`); + console.log(`▶ models: exec=${ACTIVE_EXECUTOR} review=${ACTIVE_REVIEWER} overview=${ACTIVE_T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (ACTIVE_OVERVIEW_CLOUD ? " (cloud)" : "")}`); console.log(`▶ out: ${out_dir}\n`); for (let i = 0; i < spec.events.length; i++) { @@ -1419,7 +1480,7 @@ async function main() { // (city, role, count). Capped at 1 retry per event to keep the // budget bounded and avoid infinite loops on genuinely-impossible // scenarios. - if (!result.ok && RETRY_ON_FAIL && !T3_DISABLED) { + if (!result.ok && ACTIVE_RETRY_ON_FAIL && !ACTIVE_T3_DISABLED) { console.log(` ▶ cloud rescue requested for ${event.at} ${event.kind}…`); const rescue = await requestCloudRemediation(event, result, spec.contract); if (rescue && rescue.remediation.retry) { @@ -1506,7 +1567,7 @@ async function main() { // Option B — T3 checkpoint after every misplacement, and every N-th event. const isLast = i === spec.events.length - 1; const nthHit = T3_CHECKPOINT_EVERY > 0 && ((i + 1) % T3_CHECKPOINT_EVERY === 0); - const shouldCheckpoint = !T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast); + const shouldCheckpoint = !ACTIVE_T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast); if (shouldCheckpoint) { const cp = await runOverviewCheckpoint(event, result, ctx.results.slice(0, -1), spec.contract); if (cp) { @@ -1527,7 +1588,7 @@ async function main() { // Option A — T3 cross-day lesson. One final call distills the whole run. // Saved to lesson.md and also seeded into playbook_memory so tomorrow's // agent can retrieve it on similar setups. - if (!T3_DISABLED) { + if (!ACTIVE_T3_DISABLED) { console.log(`\n▶ T3 cross-day lesson via ${OVERVIEW_MODEL}…`); const tLesson = Date.now(); const lesson = await runCrossDayLesson(ctx, checkpoints); @@ -1564,7 +1625,7 @@ async function main() { events_ok: ctx.results.filter(r => r.ok).length, checkpoint_count: checkpoints.length, model: OVERVIEW_MODEL, - cloud: OVERVIEW_CLOUD, + cloud: ACTIVE_OVERVIEW_CLOUD, lesson: lesson.trim(), checkpoints: checkpoints.map(c => ({ after: c.after_event, risk: c.risk, hint: c.hint })), created_at: new Date().toISOString(), @@ -1589,17 +1650,17 @@ async function main() { out_dir, { client: spec.client, date: spec.date, events: spec.events, staffer: spec.staffer }, { - executor: EXECUTOR_MODEL, - reviewer: REVIEWER_MODEL, + executor: ACTIVE_EXECUTOR, + reviewer: ACTIVE_REVIEWER, overview: OVERVIEW_MODEL, - overview_cloud: OVERVIEW_CLOUD, + overview_cloud: ACTIVE_OVERVIEW_CLOUD, }, elapsed, ); console.log(`▶ KB indexed: sig=${sig_hash} (${elapsed.toFixed(1)}s)`); const newRec = await recommendFor(spec, { overview_model: OVERVIEW_MODEL, - cloud: OVERVIEW_CLOUD, + cloud: ACTIVE_OVERVIEW_CLOUD, k: 5, }); if (newRec) { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json index b41a028..b0a8501 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json @@ -13,7 +13,8 @@ "id": "S-001", "name": "Maria Chen", "tenure_months": 48, - "role": "senior" + "role": "senior", + "tool_level": "full" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json index 30df52d..f1f3bab 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json @@ -13,7 +13,8 @@ "id": "S-001", "name": "Maria Chen", "tenure_months": 48, - "role": "senior" + "role": "senior", + "tool_level": "full" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json index 529816a..a93f988 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json @@ -13,7 +13,8 @@ "id": "S-001", "name": "Maria Chen", "tenure_months": 48, - "role": "senior" + "role": "senior", + "tool_level": "full" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json index f8aeddb..fb54d1a 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json @@ -13,7 +13,8 @@ "id": "S-002", "name": "James Park", "tenure_months": 14, - "role": "mid" + "role": "mid", + "tool_level": "local" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json index dcba63a..bb0de14 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json @@ -13,7 +13,8 @@ "id": "S-002", "name": "James Park", "tenure_months": 14, - "role": "mid" + "role": "mid", + "tool_level": "local" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json index e560270..3e5b94b 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json @@ -13,7 +13,8 @@ "id": "S-002", "name": "James Park", "tenure_months": 14, - "role": "mid" + "role": "mid", + "tool_level": "local" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json index f49da0f..1d00ef0 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json @@ -13,7 +13,8 @@ "id": "S-003", "name": "Sam Torres", "tenure_months": 4, - "role": "junior" + "role": "junior", + "tool_level": "basic" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json index b276ac9..4393f8b 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json @@ -13,7 +13,8 @@ "id": "S-003", "name": "Sam Torres", "tenure_months": 4, - "role": "junior" + "role": "junior", + "tool_level": "basic" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json index 9969ca0..fdfd8ce 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json @@ -13,7 +13,8 @@ "id": "S-003", "name": "Sam Torres", "tenure_months": 4, - "role": "junior" + "role": "junior", + "tool_level": "basic" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json index 0b5790b..1296a1d 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json @@ -13,7 +13,8 @@ "id": "S-004", "name": "Alex Rivera", "tenure_months": 1, - "role": "trainee" + "role": "trainee", + "tool_level": "minimal" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json index fcf570e..3e75b5a 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json @@ -13,7 +13,8 @@ "id": "S-004", "name": "Alex Rivera", "tenure_months": 1, - "role": "trainee" + "role": "trainee", + "tool_level": "minimal" }, "events": [ { diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json index 8b32392..f65e50b 100644 --- a/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json +++ b/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json @@ -13,7 +13,8 @@ "id": "S-004", "name": "Alex Rivera", "tenure_months": 1, - "role": "trainee" + "role": "trainee", + "tool_level": "minimal" }, "events": [ {