golangLAKEHOUSE/scripts/replay_smoke.sh

#!/usr/bin/env bash
# replay smoke — Go port of scripts/distillation/replay.ts.
# Validates that the replay tool:
#   - Builds a context bundle from a synthetic playbooks corpus
#   - Runs --dry-run end-to-end without an LLM
#   - Logs a row to data/_kb/replay_runs.jsonl with schema=replay_run.v1
#   - Honors --no-retrieval (no bundle, empty rag_ids)
#   - Exits non-zero when validation fails

set -euo pipefail
cd "$(dirname "$0")/.."

export PATH="$PATH:/usr/local/go/bin"

echo "[replay-smoke] building bin/replay..."
go build -o bin/replay ./cmd/replay

ROOT="$(mktemp -d)"
trap 'rm -rf "$ROOT"' EXIT INT TERM

mkdir -p "$ROOT/exports/rag"
cat > "$ROOT/exports/rag/playbooks.jsonl" <<'EOF'
{"id":"p1","title":"build verification","content":"verify the build, check tests pass before merge\nensure no regressions in suites","tags":["scrum"],"source_run_id":"r-1","success_score":"accepted","source_category":"scrum_review"}
{"id":"p2","title":"merge cleanup","content":"verify the build, then assert tests passed, then merge","tags":["scrum"],"source_run_id":"r-2","success_score":"accepted","source_category":"scrum_review"}
{"id":"p3","title":"partial fix","content":"verify the build, sometimes assert tests passed","tags":["scrum"],"source_run_id":"r-3","success_score":"partially_accepted","source_category":"scrum_review"}
EOF

echo "[replay-smoke] dry-run (with retrieval)"
./bin/replay -task "verify the build before merging" -dry-run -root "$ROOT" > /tmp/replay_smoke_a.txt 2>&1 || true
grep -q "retrieval: " /tmp/replay_smoke_a.txt || {
  echo "missing retrieval line"; cat /tmp/replay_smoke_a.txt; exit 1;
}
grep -q "escalation_path: qwen3.5:latest" /tmp/replay_smoke_a.txt || {
  echo "missing escalation_path line"; cat /tmp/replay_smoke_a.txt; exit 1;
}

LOG="$ROOT/data/_kb/replay_runs.jsonl"
[ -s "$LOG" ] || { echo "expected $LOG to be written"; exit 1; }
grep -q "replay_run.v1" "$LOG" || {
  echo "schema=replay_run.v1 missing in log";
  cat "$LOG";
  exit 1;
}

echo "[replay-smoke] dry-run (no retrieval)"
./bin/replay -task "verify build" -dry-run -no-retrieval -root "$ROOT" > /tmp/replay_smoke_b.txt 2>&1 || true
grep -q "retrieval: DISABLED" /tmp/replay_smoke_b.txt || {
  echo "expected retrieval: DISABLED";
  cat /tmp/replay_smoke_b.txt;
  exit 1;
}

LINES_BEFORE=$(wc -l < "$LOG")

echo "[replay-smoke] forced-fail with escalation"
# Force validation failure by putting a hedge phrase as the FIRST
# accepted sample's first verify line. extractValidationSteps walks
# corpus order, and the dry-run synthesizer surfaces the first 3 steps,
# so the hedge phrase needs to be in an early-corpus accepted sample.
cat > "$ROOT/exports/rag/playbooks.jsonl" <<'EOF'
{"id":"p9","title":"hedged step","content":"verify auth as an AI and proceed without checking","tags":["security"],"source_run_id":"r-9","success_score":"accepted","source_category":"audit"}
{"id":"p1","title":"build verification","content":"verify the build, check tests pass before merge","tags":["scrum"],"source_run_id":"r-1","success_score":"accepted","source_category":"scrum_review"}
EOF
./bin/replay -task "verify auth proceed" -dry-run -allow-escalation -root "$ROOT" > /tmp/replay_smoke_c.txt 2>&1 || true
grep -q "escalation_path: qwen3.5:latest → deepseek-v3.1:671b" /tmp/replay_smoke_c.txt || {
  echo "expected escalation path to deepseek when validation fails";
  cat /tmp/replay_smoke_c.txt;
  exit 1;
}

LINES_AFTER=$(wc -l < "$LOG")
[ "$LINES_AFTER" -gt "$LINES_BEFORE" ] || {
  echo "expected log file to grow: before=$LINES_BEFORE after=$LINES_AFTER";
  exit 1;
}

echo "[replay-smoke] PASS"