#!/usr/bin/env bash # Playbook-lift reality test — measure whether the 5-loop substrate # (matrix retrieve+merge + playbook + small-model judge) actually beats # raw cosine on staffing queries. # # Pipeline: # 1. Boot the Go stack (storaged, embedd, vectord, matrixd, gateway) # 2. Ingest workers (default 5000) + candidates corpora # 3. Run the playbook_lift driver: cold pass → judge → record → # warm pass → measure # 4. Generate markdown report from the JSON evidence # # Output: # reports/reality-tests/playbook_lift_.json — raw evidence # reports/reality-tests/playbook_lift_.md — human report # # Requires: Ollama on :11434 with nomic-embed-text + the judge model # loaded. Skips (exit 0) if Ollama is absent. # # Usage: # ./scripts/playbook_lift.sh # run #001 with defaults # RUN_ID=002 ./scripts/playbook_lift.sh # explicit run id # JUDGE_MODEL=qwen2.5:latest ./scripts/playbook_lift.sh # WORKERS_LIMIT=2000 ./scripts/playbook_lift.sh set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" RUN_ID="${RUN_ID:-001}" JUDGE_MODEL="${JUDGE_MODEL:-qwen3.5:latest}" WORKERS_LIMIT="${WORKERS_LIMIT:-5000}" QUERIES_FILE="${QUERIES_FILE:-tests/reality/playbook_lift_queries.txt}" CORPORA="${CORPORA:-workers,candidates}" K="${K:-10}" OUT_JSON="reports/reality-tests/playbook_lift_${RUN_ID}.json" OUT_MD="reports/reality-tests/playbook_lift_${RUN_ID}.md" if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then echo "[lift] Ollama not reachable on :11434 — skipping" exit 0 fi if ! curl -sS http://localhost:11434/api/tags | jq -e --arg m "$JUDGE_MODEL" \ '.models[] | select(.name == $m)' >/dev/null 2>&1; then echo "[lift] judge model '$JUDGE_MODEL' not loaded in Ollama — pull it first" exit 1 fi echo "[lift] building binaries..." go build -o bin/ ./cmd/storaged ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway \ ./scripts/staffing_workers ./scripts/staffing_candidates \ ./scripts/playbook_lift pkill -f "bin/(storaged|embedd|vectord|matrixd|gateway)" 2>/dev/null || true sleep 0.3 PIDS=() TMP="$(mktemp -d)" CFG="$TMP/lift.toml" cleanup() { echo "[lift] cleanup" for p in "${PIDS[@]:-}"; do [ -n "${p:-}" ] && kill "$p" 2>/dev/null || true; done rm -rf "$TMP" } trap cleanup EXIT INT TERM cat > "$CFG" </dev/null 2>&1; then return 0; fi sleep 0.05 done return 1 } echo "[lift] launching stack..." ./bin/storaged -config "$CFG" > /tmp/storaged.log 2>&1 & PIDS+=($!) poll_health 3211 || { echo "storaged failed"; exit 1; } ./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!) poll_health 3216 || { echo "embedd failed"; exit 1; } ./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!) poll_health 3215 || { echo "vectord failed"; exit 1; } ./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!) poll_health 3218 || { echo "matrixd failed"; exit 1; } ./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!) poll_health 3110 || { echo "gateway failed"; exit 1; } echo echo "[lift] ingest workers (limit=$WORKERS_LIMIT)..." ./bin/staffing_workers -limit "$WORKERS_LIMIT" echo echo "[lift] ingest candidates..." ./bin/staffing_candidates -skip-populate=false -query "warmup" 2>&1 \ | grep -v "^\[candidates\]\(matrix\|reality\)" || true echo echo "[lift] running driver — judge=$JUDGE_MODEL · queries=$QUERIES_FILE · k=$K" ./bin/playbook_lift \ -gateway "http://127.0.0.1:3110" \ -ollama "http://localhost:11434" \ -queries "$QUERIES_FILE" \ -corpora "$CORPORA" \ -judge "$JUDGE_MODEL" \ -k "$K" \ -out "$OUT_JSON" echo echo "[lift] generating markdown report → $OUT_MD" generate_md() { local json="$1" md="$2" local total discovery lift no_change boosted mean_delta gen_at total=$(jq -r '.summary.total' "$json") discovery=$(jq -r '.summary.with_discovery' "$json") lift=$(jq -r '.summary.lift_count' "$json") no_change=$(jq -r '.summary.no_change' "$json") boosted=$(jq -r '.summary.playbook_boosted_total' "$json") mean_delta=$(jq -r '.summary.mean_top1_delta_distance' "$json") gen_at=$(jq -r '.summary.generated_at' "$json") cat > "$md" <> "$md" cat >> "$md" <