#!/usr/bin/env bash # run_proof.sh — orchestrator for the proof harness. # # Usage: # tests/proof/run_proof.sh --mode contract # tests/proof/run_proof.sh --mode integration # tests/proof/run_proof.sh --mode performance # tests/proof/run_proof.sh --mode integration --no-bootstrap # assume services up # tests/proof/run_proof.sh --regenerate-rankings # rebuild expected/rankings.json # # Bootstraps services (storaged → catalogd → ingestd → queryd → # vectord → embedd → gateway) once at the start unless --no-bootstrap. # Iterates matching cases in numerical order. Aggregates per-case JSONL # evidence into summary.md + summary.json under tests/proof/reports/proof-/. # # Designed per CLAUDE_REFACTOR_GUARDRAILS.md: bash + curl + jq only, # no Go test framework, no DSL. Each case is a thin shell script that # sources lib/*.sh and writes evidence; this harness orchestrates them. set -uo pipefail # ── arg parsing ──────────────────────────────────────────────────────────── MODE="contract" NO_BOOTSTRAP=0 REGENERATE_RANKINGS=0 REGENERATE_BASELINE=0 while [ $# -gt 0 ]; do case "$1" in --mode) MODE="$2"; shift 2 ;; --mode=*) MODE="${1#--mode=}"; shift ;; --no-bootstrap) NO_BOOTSTRAP=1; shift ;; --regenerate-rankings) REGENERATE_RANKINGS=1; shift ;; --regenerate-baseline) REGENERATE_BASELINE=1; shift ;; -h|--help) sed -n '1,16p' "$0" | sed 's/^# *//' exit 0 ;; *) echo "unknown arg: $1" >&2; exit 2 ;; esac done case "$MODE" in contract|integration|performance) ;; *) echo "[run_proof] invalid --mode '$MODE' (must be contract|integration|performance)" >&2; exit 2 ;; esac export PROOF_MODE="$MODE" export PROOF_REGENERATE_RANKINGS="$REGENERATE_RANKINGS" export PROOF_REGENERATE_BASELINE="$REGENERATE_BASELINE" # ── env setup ───────────────────────────────────────────────────────────── SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR/../.." # Establish the report directory before sourcing env.sh so cases see it. ts="$(date -u +%Y%m%d-%H%M%SZ)" export PROOF_REPORT_DIR="$(pwd)/tests/proof/reports/proof-${ts}" mkdir -p "$PROOF_REPORT_DIR" # shellcheck source=lib/env.sh source "${SCRIPT_DIR}/lib/env.sh" # shellcheck source=lib/http.sh source "${SCRIPT_DIR}/lib/http.sh" # shellcheck source=lib/assert.sh source "${SCRIPT_DIR}/lib/assert.sh" # shellcheck source=lib/metrics.sh source "${SCRIPT_DIR}/lib/metrics.sh" echo "[run_proof] mode=${MODE} report=${PROOF_REPORT_DIR}" echo "[run_proof] git_sha=${PROOF_GIT_SHA}" # ── service lifecycle ──────────────────────────────────────────────────── PIDS=() WE_BOOTED=0 cleanup() { if [ "$WE_BOOTED" -eq 1 ] && [ "${#PIDS[@]}" -gt 0 ]; then echo "[run_proof] cleanup: killing ${#PIDS[@]} services we started" kill "${PIDS[@]}" 2>/dev/null || true wait 2>/dev/null || true fi } trap cleanup EXIT INT TERM poll_health() { local name="$1" port="$2" deadline=$(($(date +%s) + 8)) while [ "$(date +%s)" -lt "$deadline" ]; do if curl -sS --max-time 1 "http://127.0.0.1:${port}/health" >/dev/null 2>&1; then return 0 fi sleep 0.1 done return 1 } bootstrap_services() { echo "[run_proof] bootstrap: building binaries..." export PATH="/usr/local/go/bin:${PATH}" if ! go build -o bin/ ./cmd/... > "${PROOF_REPORT_DIR}/raw/logs/build.log" 2>&1; then echo "[run_proof] BUILD FAILED — see raw/logs/build.log" return 1 fi echo "[run_proof] bootstrap: launching services in dep order..." for SPEC in "storaged:3211" "catalogd:3212" "ingestd:3213" "queryd:3214" "vectord:3215" "embedd:3216" "gateway:3110"; do local NAME="${SPEC%:*}" PORT="${SPEC#*:}" # Skip if already up. if curl -sS --max-time 1 "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then echo " ✓ ${NAME} (:${PORT}) already up — leaving as-is" continue fi ./bin/"$NAME" > "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" 2>&1 & PIDS+=("$!") if poll_health "$NAME" "$PORT"; then echo " ✓ ${NAME} (:${PORT}) booted" WE_BOOTED=1 else echo " ✗ ${NAME} (:${PORT}) failed to bind in 8s — see raw/logs/${NAME}.log" tail -20 "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" | sed 's/^/ /' return 1 fi done } if [ "$NO_BOOTSTRAP" -eq 0 ]; then if ! bootstrap_services; then echo "[run_proof] FATAL — bootstrap failed" exit 1 fi else echo "[run_proof] --no-bootstrap — assuming services already up" fi # ── case discovery + filtering ─────────────────────────────────────────── discover_cases() { # Returns case files matching the current mode, sorted by NN prefix. # Each case declares CASE_TYPE; we re-source in a subshell to read it. local f case_type for f in "${SCRIPT_DIR}/cases/"*.sh; do [ -e "$f" ] || continue case_type=$(bash -c "source '$f' --metadata-only 2>/dev/null; echo \${CASE_TYPE:-}" 2>/dev/null || echo "") # contract mode runs contract cases only # integration mode runs contract + integration # performance mode runs contract + integration + performance case "$MODE:$case_type" in contract:contract|\ integration:contract|integration:integration|\ performance:contract|performance:integration|performance:performance) echo "$f" ;; esac done } CASES=() while IFS= read -r line; do CASES+=("$line"); done < <(discover_cases) echo "[run_proof] cases for mode=${MODE}: ${#CASES[@]}" # ── case execution ─────────────────────────────────────────────────────── CASE_PASS=0 CASE_FAIL=0 CASE_SKIP=0 REQUIRED_FAIL=0 for case_file in "${CASES[@]}"; do case_name=$(basename "$case_file" .sh) echo "" echo "[run_proof] running ${case_name} ..." SECONDS=0 if bash "$case_file" >> "${PROOF_REPORT_DIR}/raw/logs/${case_name}.log" 2>&1; then echo " → wrapper exit 0 (${SECONDS}s)" else echo " → wrapper exit non-zero (${SECONDS}s) — see raw/logs/${case_name}.log" fi done # ── aggregation ────────────────────────────────────────────────────────── echo "" echo "[run_proof] aggregating evidence..." ALL_RECORDS_FILE="${PROOF_REPORT_DIR}/raw/all_records.jsonl" > "$ALL_RECORDS_FILE" for f in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do [ -e "$f" ] || continue cat "$f" >> "$ALL_RECORDS_FILE" done # grep -c exits 1 with output "0" when no matches; the `|| echo 0` form # concatenates "0\n0" and breaks jq --argjson + arithmetic. Capture the # count and force a clean integer fallback on non-zero exit. _count() { local pattern="$1" file="$2" n n=$(grep -c "$pattern" "$file" 2>/dev/null) || n=0 echo "$n" } if [ -s "$ALL_RECORDS_FILE" ]; then pass=$(_count '"result":"pass"' "$ALL_RECORDS_FILE") fail=$(_count '"result":"fail"' "$ALL_RECORDS_FILE") skip=$(_count '"result":"skip"' "$ALL_RECORDS_FILE") else pass=0; fail=0; skip=0 fi # summary.json jq -n \ --arg mode "$MODE" \ --arg ts "$(date -u -Iseconds)" \ --arg sha "$PROOF_GIT_SHA" \ --argjson pass "$pass" \ --argjson fail "$fail" \ --argjson skip "$skip" \ --argjson cases "${#CASES[@]}" \ '{mode: $mode, timestamp_utc: $ts, git_sha: $sha, counts: {pass: $pass, fail: $fail, skip: $skip}, cases_run: $cases, evidence_dir: "raw/"}' \ > "${PROOF_REPORT_DIR}/summary.json" # summary.md { echo "# proof-${ts} — ${MODE} mode" echo "" echo "- git_sha: \`${PROOF_GIT_SHA}\`" echo "- timestamp: $(date -u -Iseconds)" echo "- cases run: ${#CASES[@]}" echo "- assertions: ${pass} pass · ${fail} fail · ${skip} skip" echo "" echo "## per-case-id" echo "" echo "| case_id | pass | fail | skip |" echo "|---|---:|---:|---:|" # Iterate JSONL files (one per CASE_ID), not case scripts — a single # case file may emit under multiple CASE_IDs and this preserves the # mapping faithfully. for jsonl in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do [ -e "$jsonl" ] || continue cid=$(basename "$jsonl" .jsonl) cp=$(_count '"result":"pass"' "$jsonl") cfl=$(_count '"result":"fail"' "$jsonl") cs=$(_count '"result":"skip"' "$jsonl") echo "| ${cid} | ${cp} | ${cfl} | ${cs} |" done echo "" if [ "$fail" -gt 0 ]; then echo "## failed assertions" echo "" grep '"result":"fail"' "$ALL_RECORDS_FILE" | jq -r '"- **\(.case_id)** — \(.claim) — expected: \(.expected) actual: \(.actual)"' fi } > "${PROOF_REPORT_DIR}/summary.md" # ── exit ───────────────────────────────────────────────────────────────── echo "" echo "[run_proof] DONE — summary: ${PROOF_REPORT_DIR}/summary.md" echo " ${pass} pass · ${fail} fail · ${skip} skip" if [ "$fail" -gt 0 ]; then exit 1; fi exit 0