#!/usr/bin/env bash # replay smoke — Go port of scripts/distillation/replay.ts. # Validates that the replay tool: # - Builds a context bundle from a synthetic playbooks corpus # - Runs --dry-run end-to-end without an LLM # - Logs a row to data/_kb/replay_runs.jsonl with schema=replay_run.v1 # - Honors --no-retrieval (no bundle, empty rag_ids) # - Exits non-zero when validation fails set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" echo "[replay-smoke] building bin/replay..." go build -o bin/replay ./cmd/replay ROOT="$(mktemp -d)" trap 'rm -rf "$ROOT"' EXIT INT TERM mkdir -p "$ROOT/exports/rag" cat > "$ROOT/exports/rag/playbooks.jsonl" <<'EOF' {"id":"p1","title":"build verification","content":"verify the build, check tests pass before merge\nensure no regressions in suites","tags":["scrum"],"source_run_id":"r-1","success_score":"accepted","source_category":"scrum_review"} {"id":"p2","title":"merge cleanup","content":"verify the build, then assert tests passed, then merge","tags":["scrum"],"source_run_id":"r-2","success_score":"accepted","source_category":"scrum_review"} {"id":"p3","title":"partial fix","content":"verify the build, sometimes assert tests passed","tags":["scrum"],"source_run_id":"r-3","success_score":"partially_accepted","source_category":"scrum_review"} EOF echo "[replay-smoke] dry-run (with retrieval)" ./bin/replay -task "verify the build before merging" -dry-run -root "$ROOT" > /tmp/replay_smoke_a.txt 2>&1 || true grep -q "retrieval: " /tmp/replay_smoke_a.txt || { echo "missing retrieval line"; cat /tmp/replay_smoke_a.txt; exit 1; } grep -q "escalation_path: qwen3.5:latest" /tmp/replay_smoke_a.txt || { echo "missing escalation_path line"; cat /tmp/replay_smoke_a.txt; exit 1; } LOG="$ROOT/data/_kb/replay_runs.jsonl" [ -s "$LOG" ] || { echo "expected $LOG to be written"; exit 1; } grep -q "replay_run.v1" "$LOG" || { echo "schema=replay_run.v1 missing in log"; cat "$LOG"; exit 1; } echo "[replay-smoke] dry-run (no retrieval)" ./bin/replay -task "verify build" -dry-run -no-retrieval -root "$ROOT" > /tmp/replay_smoke_b.txt 2>&1 || true grep -q "retrieval: DISABLED" /tmp/replay_smoke_b.txt || { echo "expected retrieval: DISABLED"; cat /tmp/replay_smoke_b.txt; exit 1; } LINES_BEFORE=$(wc -l < "$LOG") echo "[replay-smoke] forced-fail with escalation" # Force validation failure by putting a hedge phrase as the FIRST # accepted sample's first verify line. extractValidationSteps walks # corpus order, and the dry-run synthesizer surfaces the first 3 steps, # so the hedge phrase needs to be in an early-corpus accepted sample. cat > "$ROOT/exports/rag/playbooks.jsonl" <<'EOF' {"id":"p9","title":"hedged step","content":"verify auth as an AI and proceed without checking","tags":["security"],"source_run_id":"r-9","success_score":"accepted","source_category":"audit"} {"id":"p1","title":"build verification","content":"verify the build, check tests pass before merge","tags":["scrum"],"source_run_id":"r-1","success_score":"accepted","source_category":"scrum_review"} EOF ./bin/replay -task "verify auth proceed" -dry-run -allow-escalation -root "$ROOT" > /tmp/replay_smoke_c.txt 2>&1 || true grep -q "escalation_path: qwen3.5:latest → deepseek-v3.1:671b" /tmp/replay_smoke_c.txt || { echo "expected escalation path to deepseek when validation fails"; cat /tmp/replay_smoke_c.txt; exit 1; } LINES_AFTER=$(wc -l < "$LOG") [ "$LINES_AFTER" -gt "$LINES_BEFORE" ] || { echo "expected log file to grow: before=$LINES_BEFORE after=$LINES_AFTER"; exit 1; } echo "[replay-smoke] PASS"