golangLAKEHOUSE/tests/proof/run_proof.sh
root a81291e38c proof harness Phase A: scaffolding + canary case green
Per docs/TEST_PROOF_SCOPE.md, building the claims-verification tier
above the smoke chain. This commit lays the scaffolding and proves
the orchestrator end-to-end with one canary case (00_health).

What landed:

  tests/proof/
    README.md             how to read a report, layout, modes
    claims.yaml           24 claims enumerated (GOLAKE-001..100)
    run_proof.sh          orchestrator with --mode {contract|integration|performance}
                          and --no-bootstrap / --regenerate-{rankings,baseline}
    lib/
      env.sh              service URLs, report dir, mode, git context
      http.sh             curl wrappers writing per-probe JSON + body + headers
      assert.sh           proof_assert_{eq,ne,contains,lt,gt,status,json_eq} +
                          proof_skip — each emits one JSONL record per call
      metrics.sh          start/stop timers, value capture, RSS sampling,
                          percentile compute (for Phase D)
    cases/
      00_health.sh        canary — gateway + 6 services /health → 200,
                          body identifies service, latency < 500ms (21 assertions)
    fixtures/
      csv/workers.csv     spec's 5-row deterministic CSV
      text/docs.txt       4 deterministic vector docs
      expected/queries.json  expected results for the 5 SQL assertions

Wired into the task runner:

  just proof contract       # canary only this commit
  just proof integration    # Phase C
  just proof performance    # Phase D

.gitignore: /tests/proof/reports/* with !.gitkeep — same pattern as
reports/scrum/_evidence/. Per-run output is a runtime artifact.

Specs landed alongside (J's drops):
  docs/TEST_PROOF_SCOPE.md           the harness contract this implements
  docs/CLAUDE_REFACTOR_GUARDRAILS.md process discipline this harness obeys

Verified end-to-end (cached binaries):
  just proof contract        wall < 2s, 21 pass / 0 fail / 0 skip
  just verify                wall 31s, vet + test + 9 smokes still green

Two bugs fixed during canary run, both in run_proof.sh aggregation:
- grep -c exits 1 on zero matches; the `|| echo 0` form concatenated
  "0\n0" and broke jq --argjson + integer comparison. Fixed via a
  _count helper that captures count-or-zero cleanly.
- per-case table iterated case scripts (filename-based) but cases
  write evidence under CASE_ID. Switched to JSONL-file iteration so
  multi-case scripts work and the mapping is faithful.

Phase B (contract cases) lands next: 05_embedding, 06_vector_add,
08_gateway_contracts, 09_failure_modes. Each sourcing the same lib
helpers and writing to the same report shape.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 05:08:51 -05:00

258 lines
9.7 KiB
Bash
Executable File

#!/usr/bin/env bash
# run_proof.sh — orchestrator for the proof harness.
#
# Usage:
# tests/proof/run_proof.sh --mode contract
# tests/proof/run_proof.sh --mode integration
# tests/proof/run_proof.sh --mode performance
# tests/proof/run_proof.sh --mode integration --no-bootstrap # assume services up
# tests/proof/run_proof.sh --regenerate-rankings # rebuild expected/rankings.json
#
# Bootstraps services (storaged → catalogd → ingestd → queryd →
# vectord → embedd → gateway) once at the start unless --no-bootstrap.
# Iterates matching cases in numerical order. Aggregates per-case JSONL
# evidence into summary.md + summary.json under tests/proof/reports/proof-<ts>/.
#
# Designed per CLAUDE_REFACTOR_GUARDRAILS.md: bash + curl + jq only,
# no Go test framework, no DSL. Each case is a thin shell script that
# sources lib/*.sh and writes evidence; this harness orchestrates them.
set -uo pipefail
# ── arg parsing ────────────────────────────────────────────────────────────
MODE="contract"
NO_BOOTSTRAP=0
REGENERATE_RANKINGS=0
REGENERATE_BASELINE=0
while [ $# -gt 0 ]; do
case "$1" in
--mode) MODE="$2"; shift 2 ;;
--mode=*) MODE="${1#--mode=}"; shift ;;
--no-bootstrap) NO_BOOTSTRAP=1; shift ;;
--regenerate-rankings) REGENERATE_RANKINGS=1; shift ;;
--regenerate-baseline) REGENERATE_BASELINE=1; shift ;;
-h|--help)
sed -n '1,16p' "$0" | sed 's/^# *//'
exit 0 ;;
*) echo "unknown arg: $1" >&2; exit 2 ;;
esac
done
case "$MODE" in
contract|integration|performance) ;;
*) echo "[run_proof] invalid --mode '$MODE' (must be contract|integration|performance)" >&2; exit 2 ;;
esac
export PROOF_MODE="$MODE"
export PROOF_REGENERATE_RANKINGS="$REGENERATE_RANKINGS"
export PROOF_REGENERATE_BASELINE="$REGENERATE_BASELINE"
# ── env setup ─────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR/../.."
# Establish the report directory before sourcing env.sh so cases see it.
ts="$(date -u +%Y%m%d-%H%M%SZ)"
export PROOF_REPORT_DIR="$(pwd)/tests/proof/reports/proof-${ts}"
mkdir -p "$PROOF_REPORT_DIR"
# shellcheck source=lib/env.sh
source "${SCRIPT_DIR}/lib/env.sh"
# shellcheck source=lib/http.sh
source "${SCRIPT_DIR}/lib/http.sh"
# shellcheck source=lib/assert.sh
source "${SCRIPT_DIR}/lib/assert.sh"
# shellcheck source=lib/metrics.sh
source "${SCRIPT_DIR}/lib/metrics.sh"
echo "[run_proof] mode=${MODE} report=${PROOF_REPORT_DIR}"
echo "[run_proof] git_sha=${PROOF_GIT_SHA}"
# ── service lifecycle ────────────────────────────────────────────────────
PIDS=()
WE_BOOTED=0
cleanup() {
if [ "$WE_BOOTED" -eq 1 ] && [ "${#PIDS[@]}" -gt 0 ]; then
echo "[run_proof] cleanup: killing ${#PIDS[@]} services we started"
kill "${PIDS[@]}" 2>/dev/null || true
wait 2>/dev/null || true
fi
}
trap cleanup EXIT INT TERM
poll_health() {
local name="$1" port="$2" deadline=$(($(date +%s) + 8))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:${port}/health" >/dev/null 2>&1; then
return 0
fi
sleep 0.1
done
return 1
}
bootstrap_services() {
echo "[run_proof] bootstrap: building binaries..."
export PATH="/usr/local/go/bin:${PATH}"
if ! go build -o bin/ ./cmd/... > "${PROOF_REPORT_DIR}/raw/logs/build.log" 2>&1; then
echo "[run_proof] BUILD FAILED — see raw/logs/build.log"
return 1
fi
echo "[run_proof] bootstrap: launching services in dep order..."
for SPEC in "storaged:3211" "catalogd:3212" "ingestd:3213" "queryd:3214" "vectord:3215" "embedd:3216" "gateway:3110"; do
local NAME="${SPEC%:*}" PORT="${SPEC#*:}"
# Skip if already up.
if curl -sS --max-time 1 "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
echo "${NAME} (:${PORT}) already up — leaving as-is"
continue
fi
./bin/"$NAME" > "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" 2>&1 &
PIDS+=("$!")
if poll_health "$NAME" "$PORT"; then
echo "${NAME} (:${PORT}) booted"
WE_BOOTED=1
else
echo "${NAME} (:${PORT}) failed to bind in 8s — see raw/logs/${NAME}.log"
tail -20 "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" | sed 's/^/ /'
return 1
fi
done
}
if [ "$NO_BOOTSTRAP" -eq 0 ]; then
if ! bootstrap_services; then
echo "[run_proof] FATAL — bootstrap failed"
exit 1
fi
else
echo "[run_proof] --no-bootstrap — assuming services already up"
fi
# ── case discovery + filtering ───────────────────────────────────────────
discover_cases() {
# Returns case files matching the current mode, sorted by NN prefix.
# Each case declares CASE_TYPE; we re-source in a subshell to read it.
local f case_type
for f in "${SCRIPT_DIR}/cases/"*.sh; do
[ -e "$f" ] || continue
case_type=$(bash -c "source '$f' --metadata-only 2>/dev/null; echo \${CASE_TYPE:-}" 2>/dev/null || echo "")
# contract mode runs contract cases only
# integration mode runs contract + integration
# performance mode runs contract + integration + performance
case "$MODE:$case_type" in
contract:contract|\
integration:contract|integration:integration|\
performance:contract|performance:integration|performance:performance)
echo "$f" ;;
esac
done
}
CASES=()
while IFS= read -r line; do CASES+=("$line"); done < <(discover_cases)
echo "[run_proof] cases for mode=${MODE}: ${#CASES[@]}"
# ── case execution ───────────────────────────────────────────────────────
CASE_PASS=0
CASE_FAIL=0
CASE_SKIP=0
REQUIRED_FAIL=0
for case_file in "${CASES[@]}"; do
case_name=$(basename "$case_file" .sh)
echo ""
echo "[run_proof] running ${case_name} ..."
SECONDS=0
if bash "$case_file" >> "${PROOF_REPORT_DIR}/raw/logs/${case_name}.log" 2>&1; then
echo " → wrapper exit 0 (${SECONDS}s)"
else
echo " → wrapper exit non-zero (${SECONDS}s) — see raw/logs/${case_name}.log"
fi
done
# ── aggregation ──────────────────────────────────────────────────────────
echo ""
echo "[run_proof] aggregating evidence..."
ALL_RECORDS_FILE="${PROOF_REPORT_DIR}/raw/all_records.jsonl"
> "$ALL_RECORDS_FILE"
for f in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do
[ -e "$f" ] || continue
cat "$f" >> "$ALL_RECORDS_FILE"
done
# grep -c exits 1 with output "0" when no matches; the `|| echo 0` form
# concatenates "0\n0" and breaks jq --argjson + arithmetic. Capture the
# count and force a clean integer fallback on non-zero exit.
_count() {
local pattern="$1" file="$2" n
n=$(grep -c "$pattern" "$file" 2>/dev/null) || n=0
echo "$n"
}
if [ -s "$ALL_RECORDS_FILE" ]; then
pass=$(_count '"result":"pass"' "$ALL_RECORDS_FILE")
fail=$(_count '"result":"fail"' "$ALL_RECORDS_FILE")
skip=$(_count '"result":"skip"' "$ALL_RECORDS_FILE")
else
pass=0; fail=0; skip=0
fi
# summary.json
jq -n \
--arg mode "$MODE" \
--arg ts "$(date -u -Iseconds)" \
--arg sha "$PROOF_GIT_SHA" \
--argjson pass "$pass" \
--argjson fail "$fail" \
--argjson skip "$skip" \
--argjson cases "${#CASES[@]}" \
'{mode: $mode, timestamp_utc: $ts, git_sha: $sha,
counts: {pass: $pass, fail: $fail, skip: $skip},
cases_run: $cases, evidence_dir: "raw/"}' \
> "${PROOF_REPORT_DIR}/summary.json"
# summary.md
{
echo "# proof-${ts}${MODE} mode"
echo ""
echo "- git_sha: \`${PROOF_GIT_SHA}\`"
echo "- timestamp: $(date -u -Iseconds)"
echo "- cases run: ${#CASES[@]}"
echo "- assertions: ${pass} pass · ${fail} fail · ${skip} skip"
echo ""
echo "## per-case-id"
echo ""
echo "| case_id | pass | fail | skip |"
echo "|---|---:|---:|---:|"
# Iterate JSONL files (one per CASE_ID), not case scripts — a single
# case file may emit under multiple CASE_IDs and this preserves the
# mapping faithfully.
for jsonl in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do
[ -e "$jsonl" ] || continue
cid=$(basename "$jsonl" .jsonl)
cp=$(_count '"result":"pass"' "$jsonl")
cfl=$(_count '"result":"fail"' "$jsonl")
cs=$(_count '"result":"skip"' "$jsonl")
echo "| ${cid} | ${cp} | ${cfl} | ${cs} |"
done
echo ""
if [ "$fail" -gt 0 ]; then
echo "## failed assertions"
echo ""
grep '"result":"fail"' "$ALL_RECORDS_FILE" | jq -r '"- **\(.case_id)** — \(.claim) — expected: \(.expected) actual: \(.actual)"'
fi
} > "${PROOF_REPORT_DIR}/summary.md"
# ── exit ─────────────────────────────────────────────────────────────────
echo ""
echo "[run_proof] DONE — summary: ${PROOF_REPORT_DIR}/summary.md"
echo " ${pass} pass · ${fail} fail · ${skip} skip"
if [ "$fail" -gt 0 ]; then exit 1; fi
exit 0