From 5e89407939f07aebbe788d951715c1e24ceaf380 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Mon, 20 Apr 2026 22:50:05 -0500
Subject: [PATCH] =?UTF-8?q?Phase=2023=20refinement=20=E2=80=94=20per-staff?=
 =?UTF-8?q?er=20tool=5Flevel=20variance?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Staffer.tool_level now controls which subsystems a specific run gets:

  full     — qwen3.5 + qwen3 + cloud T3 + cloud rescue
  local    — qwen3.5 + qwen3 + local gpt-oss:20b T3 + rescue
  basic    — qwen2.5 + qwen2.5 + local T3, no rescue
  minimal  — qwen2.5 + qwen2.5, NO T3, NO rescue. Playbook
             inheritance only.

applyToolLevel() mutates module-scoped ACTIVE_* slots each run from the
env defaults, so prior staffer's overrides never leak. Hot-path code
reads ACTIVE_EXECUTOR / ACTIVE_REVIEWER / ACTIVE_T3_DISABLED /
ACTIVE_OVERVIEW_CLOUD / ACTIVE_RETRY_ON_FAIL instead of the baked
constants.

The architectural question this answers: does playbook_memory
inheritance carry enough knowledge to let a weakly-tooled coordinator
still produce usable outcomes? "Minimal" Alex runs qwen2.5 exec + no
reviewer overseer + no cloud rescue. If Alex still fills events at a
reasonable rate, the playbook system is the real knowledge carrier —
the senior stack is nice-to-have, not the sine qua non.

Demo personas mapped:
  Maria (senior, 48mo, full)
  James (mid, 14mo, local)
  Sam (junior, 4mo, basic)
  Alex (trainee, 1mo, minimal)

Same 3 contracts (Nashville downtown, Joliet warehouse, Indianapolis
assembly) across all four → 12 runs. KB + kb_staffer_report.py
leaderboard already wired; competence_score will now reflect real tool
asymmetry instead of LLM sampling variance.
---
 tests/multi-agent/gen_staffer_demo.ts         |  12 +-
 tests/multi-agent/scenario.ts                 | 113 ++++++++++++++----
 .../S-001_indianapolis_assembly.json          |   3 +-
 .../staffer_demo/S-001_joliet_warehouse.json  |   3 +-
 .../S-001_nashville_downtown.json             |   3 +-
 .../S-002_indianapolis_assembly.json          |   3 +-
 .../staffer_demo/S-002_joliet_warehouse.json  |   3 +-
 .../S-002_nashville_downtown.json             |   3 +-
 .../S-003_indianapolis_assembly.json          |   3 +-
 .../staffer_demo/S-003_joliet_warehouse.json  |   3 +-
 .../S-003_nashville_downtown.json             |   3 +-
 .../S-004_indianapolis_assembly.json          |   3 +-
 .../staffer_demo/S-004_joliet_warehouse.json  |   3 +-
 .../S-004_nashville_downtown.json             |   3 +-
 14 files changed, 119 insertions(+), 42 deletions(-)

diff --git a/tests/multi-agent/gen_staffer_demo.ts b/tests/multi-agent/gen_staffer_demo.ts
index 847cd6b..2a45fb7 100644
--- a/tests/multi-agent/gen_staffer_demo.ts
+++ b/tests/multi-agent/gen_staffer_demo.ts
@@ -8,11 +8,15 @@
 import { mkdir, writeFile } from "node:fs/promises";
 import { join } from "node:path";
 
+// Per-staffer tool_level mirrors the real-world asymmetry: senior gets
+// the most powerful stack, trainee gets the least. The architectural
+// question is whether the playbook inheritance is strong enough to let
+// the trainee still produce usable outcomes when the big tools are off.
 const STAFFERS = [
-  { id: "S-001", name: "Maria Chen",   tenure_months: 48, role: "senior"  as const },
-  { id: "S-002", name: "James Park",   tenure_months: 14, role: "mid"     as const },
-  { id: "S-003", name: "Sam Torres",   tenure_months: 4,  role: "junior"  as const },
-  { id: "S-004", name: "Alex Rivera",  tenure_months: 1,  role: "trainee" as const },
+  { id: "S-001", name: "Maria Chen",   tenure_months: 48, role: "senior"  as const, tool_level: "full"    as const },
+  { id: "S-002", name: "James Park",   tenure_months: 14, role: "mid"     as const, tool_level: "local"   as const },
+  { id: "S-003", name: "Sam Torres",   tenure_months: 4,  role: "junior"  as const, tool_level: "basic"   as const },
+  { id: "S-004", name: "Alex Rivera",  tenure_months: 1,  role: "trainee" as const, tool_level: "minimal" as const },
 ];
 
 // Three contract shapes — one downtown assembly, one warehouse ramp,
diff --git a/tests/multi-agent/scenario.ts b/tests/multi-agent/scenario.ts
index 6ca3cba..597fd86 100644
--- a/tests/multi-agent/scenario.ts
+++ b/tests/multi-agent/scenario.ts
@@ -85,6 +85,49 @@ const T3_DISABLED = process.env.LH_T3_DISABLE === "1";
 // LH_RETRY_ON_FAIL=0 to compare baseline outcomes without rescue.
 const RETRY_ON_FAIL = process.env.LH_RETRY_ON_FAIL !== "0";
 
+// Phase 23 refinement — per-staffer tool_level overrides. Evaluated
+// per run in main() once we know the spec's staffer. These are
+// package-scoped mutable slots intentionally — the primary constant
+// above is the DEFAULT; main() flips them for the duration of the run
+// based on staffer.tool_level before calling anything else.
+let ACTIVE_EXECUTOR = EXECUTOR_MODEL;
+let ACTIVE_REVIEWER = REVIEWER_MODEL;
+let ACTIVE_T3_DISABLED = T3_DISABLED;
+let ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD;
+let ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL;
+
+function applyToolLevel(level: Staffer["tool_level"] | undefined): void {
+  // Start from env defaults each time so previous staffer's overrides
+  // don't leak.
+  ACTIVE_EXECUTOR = EXECUTOR_MODEL;
+  ACTIVE_REVIEWER = REVIEWER_MODEL;
+  ACTIVE_T3_DISABLED = T3_DISABLED;
+  ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD;
+  ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL;
+  if (!level) return;
+  switch (level) {
+    case "full":
+      ACTIVE_OVERVIEW_CLOUD = true;
+      break;
+    case "local":
+      ACTIVE_OVERVIEW_CLOUD = false;
+      break;
+    case "basic":
+      ACTIVE_EXECUTOR = "qwen2.5:latest";
+      ACTIVE_REVIEWER = "qwen2.5:latest";
+      ACTIVE_OVERVIEW_CLOUD = false;
+      ACTIVE_RETRY_ON_FAIL = false;
+      break;
+    case "minimal":
+      ACTIVE_EXECUTOR = "qwen2.5:latest";
+      ACTIVE_REVIEWER = "qwen2.5:latest";
+      ACTIVE_T3_DISABLED = true;
+      ACTIVE_OVERVIEW_CLOUD = false;
+      ACTIVE_RETRY_ON_FAIL = false;
+      break;
+  }
+}
+
 // Dispatcher: route T3 calls to local sidecar or Ollama Cloud depending
 // on the LH_OVERVIEW_CLOUD flag. Hot-path T1/T2 always stay local.
 // T3 outputs are free-form prose (lesson/hint), so shape=text — the
@@ -95,7 +138,7 @@ async function overviewGenerate(prompt: string, opts: { temperature?: number; ma
     max_tokens: opts.max_tokens ?? 1000,
     shape: "text",
     max_continuations: 2,
-    cloud: OVERVIEW_CLOUD,
+    cloud: ACTIVE_OVERVIEW_CLOUD,
   });
 }
 
@@ -148,6 +191,19 @@ interface Staffer {
   name: string;                                      // "Maria Chen"
   tenure_months: number;
   role: "senior" | "mid" | "junior" | "trainee";
+  // Phase 23 refinement — tool_level controls which subsystems this
+  // staffer's runs get to use. The mechanism always leaves
+  // playbook_memory ON so inherited playbooks drive the outcome even
+  // when T3 / cloud rescue / the bigger executor are disabled.
+  //
+  //   full     — qwen3.5 executor + qwen3 reviewer + cloud T3 + rescue
+  //   local    — qwen3.5 + qwen3 + local gpt-oss:20b T3 + rescue
+  //   basic    — qwen2.5 + qwen2.5 + local T3, no rescue
+  //   minimal  — qwen2.5 + qwen2.5, NO T3, NO rescue. Only playbook
+  //              inheritance to lean on. This is the honest test of
+  //              whether the playbook system carries knowledge on its
+  //              own.
+  tool_level?: "full" | "local" | "basic" | "minimal";
 }
 
 interface ScenarioSpec {
@@ -450,7 +506,7 @@ async function runAgentFill(
     // reasoning. Burning ~650 thinking tokens on a 400-token JSON was
     // exactly the bug we just solved.
     const execRaw = await generateContinuable(
-      EXECUTOR_MODEL,
+      ACTIVE_EXECUTOR,
       withExtras(executorPrompt(task, log)),
       {
         temperature: 0.2,
@@ -459,7 +515,7 @@ async function runAgentFill(
         max_continuations: 3,
         think: false,
         on_continuation: (n, len) =>
-          append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "note",
+          append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "note",
             content: { continuation: n, combined_chars: len } }),
       },
     );
@@ -467,11 +523,11 @@ async function runAgentFill(
     try {
       execAction = parseAction(execRaw, "executor");
     } catch (e) {
-      append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
+      append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "error",
         content: { message: (e as Error).message, raw: execRaw.slice(0, 300) } });
       throw e;
     }
-    append({ turn, role: "executor", model: EXECUTOR_MODEL,
+    append({ turn, role: "executor", model: ACTIVE_EXECUTOR,
       kind: execAction.kind as any, content: execAction });
 
     if (execAction.kind === "tool_call") {
@@ -490,7 +546,7 @@ async function runAgentFill(
           }
         }
         const trimmed = trimResult(filtered);
-        append({ turn, role: "executor", model: EXECUTOR_MODEL,
+        append({ turn, role: "executor", model: ACTIVE_EXECUTOR,
           kind: "tool_result", content: trimmed });
 
         // Accumulate playbook citations from any hybrid result that
@@ -503,7 +559,7 @@ async function runAgentFill(
           }
         }
       } catch (e) {
-        append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
+        append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "tool_result",
           content: { error: (e as Error).message, tool: execAction.tool } });
         consecutiveDrifts += 1;
         if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
@@ -513,7 +569,7 @@ async function runAgentFill(
     }
 
     const revRaw = await generateContinuable(
-      REVIEWER_MODEL,
+      ACTIVE_REVIEWER,
       withExtras(reviewerPrompt(task, log)),
       {
         temperature: 0.1,
@@ -522,7 +578,7 @@ async function runAgentFill(
         max_continuations: 3,
         think: false,
         on_continuation: (n, len) =>
-          append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "note",
+          append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "note",
             content: { continuation: n, combined_chars: len } }),
       },
     );
@@ -530,11 +586,11 @@ async function runAgentFill(
     try {
       revAction = parseAction(revRaw, "reviewer");
     } catch (e) {
-      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
+      append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "error",
         content: { message: (e as Error).message, raw: revRaw.slice(0, 300) } });
       throw e;
     }
-    append({ turn, role: "reviewer", model: REVIEWER_MODEL,
+    append({ turn, role: "reviewer", model: ACTIVE_REVIEWER,
       kind: "critique", content: revAction });
 
     if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
@@ -559,7 +615,7 @@ async function runAgentFill(
           throw new Error(`consensus proposed excluded worker ${f.candidate_id}`);
         }
       }
-      append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
+      append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "consensus_done",
         content: { fills: execAction.fills } });
       sealed = { fills: execAction.fills, approach: execAction.rationale ?? "multi-agent hybrid" };
     }
@@ -1008,7 +1064,7 @@ async function runOverviewCheckpoint(
   prior: EventResult[],
   contract?: ContractTerms,
 ): Promise<OverviewCheckpoint | null> {
-  if (T3_DISABLED) return null;
+  if (ACTIVE_T3_DISABLED) return null;
   const start = Date.now();
 
   const priorSummary = prior.slice(-3).map(p =>
@@ -1084,7 +1140,7 @@ async function requestCloudRemediation(
   result: EventResult,
   contract?: ContractTerms,
 ): Promise<{ remediation: CloudRemediation; duration_secs: number } | null> {
-  if (T3_DISABLED) return null;
+  if (ACTIVE_T3_DISABLED) return null;
   const start = Date.now();
   const diag = extractDiagnostics(result.diagnostic_log);
 
@@ -1144,7 +1200,7 @@ ${contract ? `- CONTRACT AWARENESS: fill_requirement=${contract.fill_requirement
 }
 
 async function runCrossDayLesson(ctx: ScenarioContext, checkpoints: OverviewCheckpoint[]): Promise<string | null> {
-  if (T3_DISABLED) return null;
+  if (ACTIVE_T3_DISABLED) return null;
 
   const eventDigest = ctx.results.map(r => {
     const diag = extractDiagnostics(r.diagnostic_log);
@@ -1188,7 +1244,7 @@ async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
   const lines: string[] = [];
   lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
   lines.push("");
-  lines.push(`Executor: \`${EXECUTOR_MODEL}\`   Reviewer: \`${REVIEWER_MODEL}\`   Draft: \`${DRAFT_MODEL}\`   Overview(T3): \`${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}\``);
+  lines.push(`Executor: \`${ACTIVE_EXECUTOR}\`   Reviewer: \`${ACTIVE_REVIEWER}\`   Draft: \`${DRAFT_MODEL}\`   Overview(T3): \`${ACTIVE_T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (ACTIVE_OVERVIEW_CLOUD ? " (cloud)" : "")}\``);
   lines.push(`Prior lessons loaded into executor context: **${ctx.prior_lessons.length}**${ctx.prior_lessons.length > 0 ? " (from " + ctx.prior_lessons.map(p => p.date).join(", ") + ")" : " (baseline — no prior T3 history)"}`);
   lines.push("");
 
@@ -1376,15 +1432,20 @@ async function main() {
 
   const checkpoints: OverviewCheckpoint[] = [];
 
+  // Phase 23 refinement — per-staffer tool_level override. Fires once
+  // per run. If no staffer or no tool_level, defaults hold.
+  applyToolLevel(spec.staffer?.tool_level);
+
   console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
   if (spec.staffer) {
-    console.log(`▶ staffer: ${spec.staffer.id} ${spec.staffer.name} (${spec.staffer.role}, ${spec.staffer.tenure_months}mo)`);
+    const level = spec.staffer.tool_level ?? "(default)";
+    console.log(`▶ staffer: ${spec.staffer.id} ${spec.staffer.name} (${spec.staffer.role}, ${spec.staffer.tenure_months}mo, tools=${level})`);
   }
   if (spec.contract) {
     const c = spec.contract;
     console.log(`▶ contract: deadline=${c.deadline} fill=${c.fill_requirement ?? "preferred"}${c.budget_per_hour_max ? ` budget=$${c.budget_per_hour_max}/hr` : ""}${c.local_bonus_radius_mi ? ` local_radius=${c.local_bonus_radius_mi}mi+$${c.local_bonus_per_hour ?? 0}` : ""}`);
   }
-  console.log(`▶ models: exec=${EXECUTOR_MODEL} review=${REVIEWER_MODEL} overview=${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}`);
+  console.log(`▶ models: exec=${ACTIVE_EXECUTOR} review=${ACTIVE_REVIEWER} overview=${ACTIVE_T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (ACTIVE_OVERVIEW_CLOUD ? " (cloud)" : "")}`);
   console.log(`▶ out: ${out_dir}\n`);
 
   for (let i = 0; i < spec.events.length; i++) {
@@ -1419,7 +1480,7 @@ async function main() {
     // (city, role, count). Capped at 1 retry per event to keep the
     // budget bounded and avoid infinite loops on genuinely-impossible
     // scenarios.
-    if (!result.ok && RETRY_ON_FAIL && !T3_DISABLED) {
+    if (!result.ok && ACTIVE_RETRY_ON_FAIL && !ACTIVE_T3_DISABLED) {
       console.log(`   ▶ cloud rescue requested for ${event.at} ${event.kind}…`);
       const rescue = await requestCloudRemediation(event, result, spec.contract);
       if (rescue && rescue.remediation.retry) {
@@ -1506,7 +1567,7 @@ async function main() {
     // Option B — T3 checkpoint after every misplacement, and every N-th event.
     const isLast = i === spec.events.length - 1;
     const nthHit = T3_CHECKPOINT_EVERY > 0 && ((i + 1) % T3_CHECKPOINT_EVERY === 0);
-    const shouldCheckpoint = !T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast);
+    const shouldCheckpoint = !ACTIVE_T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast);
     if (shouldCheckpoint) {
       const cp = await runOverviewCheckpoint(event, result, ctx.results.slice(0, -1), spec.contract);
       if (cp) {
@@ -1527,7 +1588,7 @@ async function main() {
   // Option A — T3 cross-day lesson. One final call distills the whole run.
   // Saved to lesson.md and also seeded into playbook_memory so tomorrow's
   // agent can retrieve it on similar setups.
-  if (!T3_DISABLED) {
+  if (!ACTIVE_T3_DISABLED) {
     console.log(`\n▶ T3 cross-day lesson via ${OVERVIEW_MODEL}…`);
     const tLesson = Date.now();
     const lesson = await runCrossDayLesson(ctx, checkpoints);
@@ -1564,7 +1625,7 @@ async function main() {
           events_ok: ctx.results.filter(r => r.ok).length,
           checkpoint_count: checkpoints.length,
           model: OVERVIEW_MODEL,
-          cloud: OVERVIEW_CLOUD,
+          cloud: ACTIVE_OVERVIEW_CLOUD,
           lesson: lesson.trim(),
           checkpoints: checkpoints.map(c => ({ after: c.after_event, risk: c.risk, hint: c.hint })),
           created_at: new Date().toISOString(),
@@ -1589,17 +1650,17 @@ async function main() {
       out_dir,
       { client: spec.client, date: spec.date, events: spec.events, staffer: spec.staffer },
       {
-        executor: EXECUTOR_MODEL,
-        reviewer: REVIEWER_MODEL,
+        executor: ACTIVE_EXECUTOR,
+        reviewer: ACTIVE_REVIEWER,
         overview: OVERVIEW_MODEL,
-        overview_cloud: OVERVIEW_CLOUD,
+        overview_cloud: ACTIVE_OVERVIEW_CLOUD,
       },
       elapsed,
     );
     console.log(`▶ KB indexed: sig=${sig_hash} (${elapsed.toFixed(1)}s)`);
     const newRec = await recommendFor(spec, {
       overview_model: OVERVIEW_MODEL,
-      cloud: OVERVIEW_CLOUD,
+      cloud: ACTIVE_OVERVIEW_CLOUD,
       k: 5,
     });
     if (newRec) {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json
index b41a028..b0a8501 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-001_indianapolis_assembly.json
@@ -13,7 +13,8 @@
     "id": "S-001",
     "name": "Maria Chen",
     "tenure_months": 48,
-    "role": "senior"
+    "role": "senior",
+    "tool_level": "full"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json
index 30df52d..f1f3bab 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-001_joliet_warehouse.json
@@ -13,7 +13,8 @@
     "id": "S-001",
     "name": "Maria Chen",
     "tenure_months": 48,
-    "role": "senior"
+    "role": "senior",
+    "tool_level": "full"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json
index 529816a..a93f988 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-001_nashville_downtown.json
@@ -13,7 +13,8 @@
     "id": "S-001",
     "name": "Maria Chen",
     "tenure_months": 48,
-    "role": "senior"
+    "role": "senior",
+    "tool_level": "full"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json
index f8aeddb..fb54d1a 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-002_indianapolis_assembly.json
@@ -13,7 +13,8 @@
     "id": "S-002",
     "name": "James Park",
     "tenure_months": 14,
-    "role": "mid"
+    "role": "mid",
+    "tool_level": "local"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json
index dcba63a..bb0de14 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-002_joliet_warehouse.json
@@ -13,7 +13,8 @@
     "id": "S-002",
     "name": "James Park",
     "tenure_months": 14,
-    "role": "mid"
+    "role": "mid",
+    "tool_level": "local"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json
index e560270..3e5b94b 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-002_nashville_downtown.json
@@ -13,7 +13,8 @@
     "id": "S-002",
     "name": "James Park",
     "tenure_months": 14,
-    "role": "mid"
+    "role": "mid",
+    "tool_level": "local"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json
index f49da0f..1d00ef0 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-003_indianapolis_assembly.json
@@ -13,7 +13,8 @@
     "id": "S-003",
     "name": "Sam Torres",
     "tenure_months": 4,
-    "role": "junior"
+    "role": "junior",
+    "tool_level": "basic"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json
index b276ac9..4393f8b 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-003_joliet_warehouse.json
@@ -13,7 +13,8 @@
     "id": "S-003",
     "name": "Sam Torres",
     "tenure_months": 4,
-    "role": "junior"
+    "role": "junior",
+    "tool_level": "basic"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json
index 9969ca0..fdfd8ce 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-003_nashville_downtown.json
@@ -13,7 +13,8 @@
     "id": "S-003",
     "name": "Sam Torres",
     "tenure_months": 4,
-    "role": "junior"
+    "role": "junior",
+    "tool_level": "basic"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json b/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json
index 0b5790b..1296a1d 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-004_indianapolis_assembly.json
@@ -13,7 +13,8 @@
     "id": "S-004",
     "name": "Alex Rivera",
     "tenure_months": 1,
-    "role": "trainee"
+    "role": "trainee",
+    "tool_level": "minimal"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json b/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json
index fcf570e..3e75b5a 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-004_joliet_warehouse.json
@@ -13,7 +13,8 @@
     "id": "S-004",
     "name": "Alex Rivera",
     "tenure_months": 1,
-    "role": "trainee"
+    "role": "trainee",
+    "tool_level": "minimal"
   },
   "events": [
     {
diff --git a/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json b/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json
index 8b32392..f65e50b 100644
--- a/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json
+++ b/tests/multi-agent/scenarios/staffer_demo/S-004_nashville_downtown.json
@@ -13,7 +13,8 @@
     "id": "S-004",
     "name": "Alex Rivera",
     "tenure_months": 1,
-    "role": "trainee"
+    "role": "trainee",
+    "tool_level": "minimal"
   },
   "events": [
     {