lakehouse/tests/battery/tasks.json

{
  "description": "Compounding stress battery tasks. Each iteration runs α (baseline) + β (drift) + γ (impossible) phases. The SAME tasks repeat across iterations so we can measure compounding (turns_used, overseer_called_rate, correction_effective).",
  "phases": {
    "alpha_baseline": [
      {
        "task_class": "staffing.fill",
        "operation": "fill: Warehouse Associate x3 in Columbus, OH",
        "spec": { "target_role": "Warehouse Associate", "target_count": 3, "target_city": "Columbus", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1" }
      },
      {
        "task_class": "staffing.fill",
        "operation": "fill: Forklift Operator x2 in Toledo, OH",
        "spec": { "target_role": "Forklift Operator", "target_count": 2, "target_city": "Toledo", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1" }
      },
      {
        "task_class": "staffing.fill",
        "operation": "fill: Packer x4 in Cleveland, OH",
        "spec": { "target_role": "Packer", "target_count": 4, "target_city": "Cleveland", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1" }
      }
    ],
    "beta_drift": [
      {
        "task_class": "staffing.fill",
        "operation": "fill: Machine Operator x2 in Youngstown, OH (requires OSHA 30 + bilingual Spanish)",
        "spec": { "target_role": "Machine Operator", "target_count": 2, "target_city": "Youngstown", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1; prefer candidates with OSHA certification and Spanish" }
      },
      {
        "task_class": "staffing.fill",
        "operation": "fill: Welder x2 in Dayton, OH (AWS D1.1 certified, night shift)",
        "spec": { "target_role": "Welder", "target_count": 2, "target_city": "Dayton", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1; filter by certification and shift flexibility" }
      },
      {
        "task_class": "staffing.fill",
        "operation": "fill: Assembler x5 in Akron, OH (SMT experience, cleanroom)",
        "spec": { "target_role": "Assembler", "target_count": 5, "target_city": "Akron", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1" }
      }
    ],
    "gamma_impossible": [
      {
        "task_class": "staffing.fill",
        "operation": "fill: Underwater Welder x2 in Toledo, OH",
        "spec": { "target_role": "Underwater Welder", "target_count": 2, "target_city": "Toledo", "target_state": "OH", "approach_hint": "hybrid search against workers_500k_v1 (expected to fail — no supply)" }
      },
      {
        "task_class": "staffing.fill",
        "operation": "fill: Astronaut x1 in Springfield, OH",
        "spec": { "target_role": "Astronaut", "target_count": 1, "target_city": "Springfield", "target_state": "OH", "approach_hint": "(expected to fail — out-of-domain role)" }
      }
    ]
  },
  "models": {
    "executor_cloud": "gpt-oss:20b",
    "reviewer_cloud": "gpt-oss:20b",
    "overseer_cloud": "gpt-oss:120b",
    "notes": "gpt-oss:20b for hot path (faster, cheaper per call), gpt-oss:120b for meta-reviews. All cloud per 2026-04-23 'cloud modes are on' directive."
  }
}