#!/usr/bin/env bash # Run the 12 staffer-demo scenarios (4 personas × 3 contracts) with # cloud T3 enabled. After each run, KB indexes the outcome and # recomputes that staffer's competence_score. By the end, the top # staffer's playbooks will be surfacing first in neighbor retrieval. set -e cd "$(dirname "$0")/.." export OLLAMA_CLOUD_KEY="$(python3 -c "import json; print(json.load(open('/root/llm_team_config.json'))['providers']['ollama_cloud']['api_key'])" 2>/dev/null || echo '')" MANIFEST="tests/multi-agent/scenarios/staffer_demo/manifest.json" if [ ! -f "$MANIFEST" ]; then echo "✗ no manifest — run: bun tests/multi-agent/gen_staffer_demo.ts" exit 1 fi START_TS=$(date -Iseconds) LOG_DIR="/tmp/lakehouse_staffer_demo_$(date +%s)" mkdir -p "$LOG_DIR" echo "▶ Staffer demo start: $START_TS, logs → $LOG_DIR" python3 -c " import json m = json.load(open('$MANIFEST')) for s in m['scenarios']: print(s['file'], '|', s['staffer'], '|', s['contract']) " | while IFS='|' read -r SCEN STAFFER CONTRACT; do SCEN=$(echo "$SCEN" | xargs) STAFFER=$(echo "$STAFFER" | xargs) CONTRACT=$(echo "$CONTRACT" | xargs) SPEC="tests/multi-agent/scenarios/staffer_demo/$SCEN" BASE=$(basename "$SPEC" .json) LOG="$LOG_DIR/${BASE}.log" echo " ▶ $STAFFER × $CONTRACT" LH_OVERVIEW_CLOUD=1 bun tests/multi-agent/scenario.ts "$SPEC" > "$LOG" 2>&1 || true OK=$(grep -oP '\d+/\d+ events succeeded' "$LOG" | tail -1 || echo "no-result") RESCUES=$(grep -c "cloud rescue requested" "$LOG" || true) RESCUE_OK=$(grep -c "retry outcome: ✓" "$LOG" || true) SIG=$(grep -oP 'KB indexed: sig=\K[a-f0-9]+' "$LOG" | tail -1 || echo "-") echo " → $OK; rescues=$RESCUES (${RESCUE_OK} succeeded); sig=$SIG" done echo "▶ Staffer demo done: $(date -Iseconds)" echo "▶ Staffer competence leaderboard:" python3 scripts/kb_staffer_report.py 2>/dev/null || echo "(run scripts/kb_staffer_report.py after batch completes)"