golangLAKEHOUSE/tests/proof/run_proof.sh

#!/usr/bin/env bash
# run_proof.sh — orchestrator for the proof harness.
#
# Usage:
#   tests/proof/run_proof.sh --mode contract
#   tests/proof/run_proof.sh --mode integration
#   tests/proof/run_proof.sh --mode performance
#   tests/proof/run_proof.sh --mode integration --no-bootstrap   # assume services up
#   tests/proof/run_proof.sh --regenerate-rankings               # rebuild expected/rankings.json
#
# Bootstraps services (storaged → catalogd → ingestd → queryd →
# vectord → embedd → gateway) once at the start unless --no-bootstrap.
# Iterates matching cases in numerical order. Aggregates per-case JSONL
# evidence into summary.md + summary.json under tests/proof/reports/proof-<ts>/.
#
# Designed per CLAUDE_REFACTOR_GUARDRAILS.md: bash + curl + jq only,
# no Go test framework, no DSL. Each case is a thin shell script that
# sources lib/*.sh and writes evidence; this harness orchestrates them.

set -uo pipefail

# ── arg parsing ────────────────────────────────────────────────────────────
MODE="contract"
NO_BOOTSTRAP=0
REGENERATE_RANKINGS=0
REGENERATE_BASELINE=0

while [ $# -gt 0 ]; do
    case "$1" in
        --mode)                MODE="$2"; shift 2 ;;
        --mode=*)              MODE="${1#--mode=}"; shift ;;
        --no-bootstrap)        NO_BOOTSTRAP=1; shift ;;
        --regenerate-rankings) REGENERATE_RANKINGS=1; shift ;;
        --regenerate-baseline) REGENERATE_BASELINE=1; shift ;;
        -h|--help)
            sed -n '1,16p' "$0" | sed 's/^# *//'
            exit 0 ;;
        *) echo "unknown arg: $1" >&2; exit 2 ;;
    esac
done

case "$MODE" in
    contract|integration|performance) ;;
    *) echo "[run_proof] invalid --mode '$MODE' (must be contract|integration|performance)" >&2; exit 2 ;;
esac

export PROOF_MODE="$MODE"
export PROOF_REGENERATE_RANKINGS="$REGENERATE_RANKINGS"
export PROOF_REGENERATE_BASELINE="$REGENERATE_BASELINE"

# ── env setup ─────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR/../.."

# Establish the report directory before sourcing env.sh so cases see it.
ts="$(date -u +%Y%m%d-%H%M%SZ)"
export PROOF_REPORT_DIR="$(pwd)/tests/proof/reports/proof-${ts}"
mkdir -p "$PROOF_REPORT_DIR"

# shellcheck source=lib/env.sh
source "${SCRIPT_DIR}/lib/env.sh"
# shellcheck source=lib/http.sh
source "${SCRIPT_DIR}/lib/http.sh"
# shellcheck source=lib/assert.sh
source "${SCRIPT_DIR}/lib/assert.sh"
# shellcheck source=lib/metrics.sh
source "${SCRIPT_DIR}/lib/metrics.sh"

echo "[run_proof] mode=${MODE} report=${PROOF_REPORT_DIR}"
echo "[run_proof] git_sha=${PROOF_GIT_SHA}"

# ── service lifecycle ────────────────────────────────────────────────────
PIDS=()
WE_BOOTED=0

cleanup() {
    if [ "$WE_BOOTED" -eq 1 ]; then
        # Kill the original PIDs we recorded plus any restarts a case
        # might have done (07_vector_persistence_restart kills+restarts
        # vectord mid-case, which orphans the original PID and creates
        # a new one we never tracked). pgrep pattern is anchored to
        # bin/<name> at start-of-argv per memory feedback_pkill_scope.md.
        echo "[run_proof] cleanup: stopping services we started (incl. any restarts)"
        if [ "${#PIDS[@]}" -gt 0 ]; then
            kill "${PIDS[@]}" 2>/dev/null || true
        fi
        for svc in storaged catalogd ingestd queryd vectord embedd gateway; do
            pgrep -f "^[./]*bin/${svc}($| )" 2>/dev/null \
                | xargs -r kill 2>/dev/null || true
        done
        wait 2>/dev/null || true
    fi
}
trap cleanup EXIT INT TERM

poll_health() {
    local name="$1" port="$2" deadline=$(($(date +%s) + 8))
    while [ "$(date +%s)" -lt "$deadline" ]; do
        if curl -sS --max-time 1 "http://127.0.0.1:${port}/health" >/dev/null 2>&1; then
            return 0
        fi
        sleep 0.1
    done
    return 1
}

bootstrap_services() {
    echo "[run_proof] bootstrap: building binaries..."
    export PATH="/usr/local/go/bin:${PATH}"
    if ! go build -o bin/ ./cmd/... > "${PROOF_REPORT_DIR}/raw/logs/build.log" 2>&1; then
        echo "[run_proof] BUILD FAILED — see raw/logs/build.log"
        return 1
    fi

    # Override queryd's refresh_every to 500ms so cases see new
    # manifests within a tick — production default is 30s, which races
    # against ingest→query cases. Default config left alone for prod.
    local CFG_OVERRIDE="${PROOF_REPORT_DIR}/raw/lakehouse_proof.toml"
    sed 's/^refresh_every *=.*/refresh_every = "500ms"/' lakehouse.toml > "$CFG_OVERRIDE"
    export PROOF_LAKEHOUSE_CONFIG="$CFG_OVERRIDE"

    echo "[run_proof] bootstrap: launching services in dep order..."
    for SPEC in "storaged:3211" "catalogd:3212" "ingestd:3213" "queryd:3214" "vectord:3215" "embedd:3216" "gateway:3110"; do
        local NAME="${SPEC%:*}" PORT="${SPEC#*:}"
        # Skip if already up.
        if curl -sS --max-time 1 "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
            echo "  ✓ ${NAME} (:${PORT}) already up — leaving as-is"
            continue
        fi
        ./bin/"$NAME" --config "$CFG_OVERRIDE" \
            > "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" 2>&1 &
        PIDS+=("$!")
        if poll_health "$NAME" "$PORT"; then
            echo "  ✓ ${NAME} (:${PORT}) booted"
            WE_BOOTED=1
        else
            echo "  ✗ ${NAME} (:${PORT}) failed to bind in 8s — see raw/logs/${NAME}.log"
            tail -20 "${PROOF_REPORT_DIR}/raw/logs/${NAME}.log" | sed 's/^/      /'
            return 1
        fi
    done
}

if [ "$NO_BOOTSTRAP" -eq 0 ]; then
    if ! bootstrap_services; then
        echo "[run_proof] FATAL — bootstrap failed"
        exit 1
    fi
else
    echo "[run_proof] --no-bootstrap — assuming services already up"
fi

# ── case discovery + filtering ───────────────────────────────────────────
discover_cases() {
    # Returns case files matching the current mode, sorted by NN prefix.
    # Each case declares CASE_TYPE; we re-source in a subshell to read it.
    local f case_type
    for f in "${SCRIPT_DIR}/cases/"*.sh; do
        [ -e "$f" ] || continue
        case_type=$(bash -c "source '$f' --metadata-only 2>/dev/null; echo \${CASE_TYPE:-}" 2>/dev/null || echo "")
        # contract mode runs contract cases only
        # integration mode runs contract + integration
        # performance mode runs contract + integration + performance
        case "$MODE:$case_type" in
            contract:contract|\
            integration:contract|integration:integration|\
            performance:contract|performance:integration|performance:performance)
                echo "$f" ;;
        esac
    done
}

CASES=()
while IFS= read -r line; do CASES+=("$line"); done < <(discover_cases)

echo "[run_proof] cases for mode=${MODE}: ${#CASES[@]}"

# ── case execution ───────────────────────────────────────────────────────
CASE_PASS=0
CASE_FAIL=0
CASE_SKIP=0
REQUIRED_FAIL=0

for case_file in "${CASES[@]}"; do
    case_name=$(basename "$case_file" .sh)
    echo ""
    echo "[run_proof] running ${case_name} ..."
    SECONDS=0
    if bash "$case_file" >> "${PROOF_REPORT_DIR}/raw/logs/${case_name}.log" 2>&1; then
        echo "  → wrapper exit 0 (${SECONDS}s)"
    else
        echo "  → wrapper exit non-zero (${SECONDS}s) — see raw/logs/${case_name}.log"
    fi
done

# ── aggregation ──────────────────────────────────────────────────────────
echo ""
echo "[run_proof] aggregating evidence..."

ALL_RECORDS_FILE="${PROOF_REPORT_DIR}/raw/all_records.jsonl"
> "$ALL_RECORDS_FILE"
for f in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do
    [ -e "$f" ] || continue
    cat "$f" >> "$ALL_RECORDS_FILE"
done

# grep -c exits 1 with output "0" when no matches; the `|| echo 0` form
# concatenates "0\n0" and breaks jq --argjson + arithmetic. Capture the
# count and force a clean integer fallback on non-zero exit.
_count() {
    local pattern="$1" file="$2" n
    n=$(grep -c "$pattern" "$file" 2>/dev/null) || n=0
    echo "$n"
}

if [ -s "$ALL_RECORDS_FILE" ]; then
    pass=$(_count '"result":"pass"' "$ALL_RECORDS_FILE")
    fail=$(_count '"result":"fail"' "$ALL_RECORDS_FILE")
    skip=$(_count '"result":"skip"' "$ALL_RECORDS_FILE")
else
    pass=0; fail=0; skip=0
fi

# summary.json
jq -n \
    --arg mode "$MODE" \
    --arg ts "$(date -u -Iseconds)" \
    --arg sha "$PROOF_GIT_SHA" \
    --argjson pass "$pass" \
    --argjson fail "$fail" \
    --argjson skip "$skip" \
    --argjson cases "${#CASES[@]}" \
    '{mode: $mode, timestamp_utc: $ts, git_sha: $sha,
      counts: {pass: $pass, fail: $fail, skip: $skip},
      cases_run: $cases, evidence_dir: "raw/"}' \
    > "${PROOF_REPORT_DIR}/summary.json"

# summary.md
{
    echo "# proof-${ts} — ${MODE} mode"
    echo ""
    echo "- git_sha: \`${PROOF_GIT_SHA}\`"
    echo "- timestamp: $(date -u -Iseconds)"
    echo "- cases run: ${#CASES[@]}"
    echo "- assertions: ${pass} pass · ${fail} fail · ${skip} skip"
    echo ""
    echo "## per-case-id"
    echo ""
    echo "| case_id | pass | fail | skip |"
    echo "|---|---:|---:|---:|"
    # Iterate JSONL files (one per CASE_ID), not case scripts — a single
    # case file may emit under multiple CASE_IDs and this preserves the
    # mapping faithfully.
    for jsonl in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do
        [ -e "$jsonl" ] || continue
        cid=$(basename "$jsonl" .jsonl)
        cp=$(_count '"result":"pass"' "$jsonl")
        cfl=$(_count '"result":"fail"' "$jsonl")
        cs=$(_count '"result":"skip"' "$jsonl")
        echo "| ${cid} | ${cp} | ${cfl} | ${cs} |"
    done
    echo ""
    if [ "$fail" -gt 0 ]; then
        echo "## failed assertions"
        echo ""
        grep '"result":"fail"' "$ALL_RECORDS_FILE" | jq -r '"- **\(.case_id)** — \(.claim) — expected: \(.expected) actual: \(.actual)"'
    fi
} > "${PROOF_REPORT_DIR}/summary.md"

# ── exit ─────────────────────────────────────────────────────────────────
echo ""
echo "[run_proof] DONE — summary: ${PROOF_REPORT_DIR}/summary.md"
echo "  ${pass} pass · ${fail} fail · ${skip} skip"

if [ "$fail" -gt 0 ]; then exit 1; fi
exit 0