#!/usr/bin/env bash # 07_vector_persistence_restart.sh — GOLAKE-070. # Verifies vectord persistence: add vectors, search, kill vectord, # restart, search again — top-1 ID and distance must match within # float-noise tolerance. The orchestrator's cleanup uses pgrep so the # restarted vectord gets cleaned up regardless of PID tracking. set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/../lib/env.sh" source "${SCRIPT_DIR}/../lib/http.sh" source "${SCRIPT_DIR}/../lib/assert.sh" CASE_ID="GOLAKE-070" CASE_NAME="Vector persistence — kill+restart preserves state" CASE_TYPE="integration" if [ "${1:-}" = "--metadata-only" ]; then return 0 2>/dev/null || exit 0; fi INDEX_NAME="proof_persist_${PROOF_RUN_ID}" VECTORD_LOG="${PROOF_REPORT_DIR}/raw/logs/vectord_restart.log" # Pre-flight: vectord must be reachable. if ! curl -sf -m 1 "${PROOF_VECTORD_URL}/health" >/dev/null 2>&1; then proof_skip "$CASE_ID" "Persistence test — vectord unreachable" \ "vectord not responding on :3215; harness bootstrap may have failed" return 0 2>/dev/null || exit 0 fi # Build deterministic vectors. Unit basis vectors so search is unambiguous. proof_post "$CASE_ID" "create_index" "${PROOF_GATEWAY_URL}/v1/vectors/index" \ "application/json" \ "{\"name\":\"${INDEX_NAME}\",\"dimension\":4}" >/dev/null proof_assert_eq "$CASE_ID" "create index → 201" "201" \ "$(proof_status_of "$CASE_ID" "create_index")" add_body='{"items":[ {"id":"p1","vector":[1,0,0,0]}, {"id":"p2","vector":[0,1,0,0]}, {"id":"p3","vector":[0,0,1,0]}, {"id":"p4","vector":[0,0,0,1]} ]}' proof_post "$CASE_ID" "add_vectors" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${INDEX_NAME}/add" \ "application/json" "$add_body" >/dev/null proof_assert_eq "$CASE_ID" "add 4 vectors → 200" "200" \ "$(proof_status_of "$CASE_ID" "add_vectors")" # Pre-restart search — record top-1 as the canonical reference. search_body='{"vector":[1,0,0,0],"k":2}' proof_post "$CASE_ID" "pre_restart_search" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${INDEX_NAME}/search" \ "application/json" "$search_body" >/dev/null pre_body="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/pre_restart_search.body" pre_top1=$(jq -r '.results[0].id' "$pre_body") pre_dist=$(jq -r '.results[0].distance' "$pre_body") proof_assert_eq "$CASE_ID" "pre-restart top-1 = p1" "p1" "$pre_top1" # ── kill vectord ──────────────────────────────────────────────── echo "[case-07] killing vectord..." >> "$VECTORD_LOG" old_pid=$(pgrep -f "^[./]*bin/vectord($| )" | head -1) if [ -z "$old_pid" ]; then proof_skip "$CASE_ID" "vectord PID not found — can't test restart" \ "pgrep returned no match for ^bin/vectord" return 0 2>/dev/null || exit 0 fi kill "$old_pid" 2>/dev/null || true # Wait for vectord to actually go down (so the restart path is exercised). deadline=$(($(date +%s) + 5)) while [ "$(date +%s)" -lt "$deadline" ]; do if ! curl -sf -m 1 "${PROOF_VECTORD_URL}/health" >/dev/null 2>&1; then break fi sleep 0.1 done # Confirm it's down — if still up, kill -9. if curl -sf -m 1 "${PROOF_VECTORD_URL}/health" >/dev/null 2>&1; then kill -9 "$old_pid" 2>/dev/null || true sleep 0.5 fi # ── restart vectord ───────────────────────────────────────────── cd "$PROOF_REPO_ROOT" ./bin/vectord --config "$PROOF_LAKEHOUSE_CONFIG" >> "$VECTORD_LOG" 2>&1 & new_pid=$! # Poll for readiness — give it 8s like the bootstrap does. deadline=$(($(date +%s) + 8)) ready=0 while [ "$(date +%s)" -lt "$deadline" ]; do if curl -sf -m 1 "${PROOF_VECTORD_URL}/health" >/dev/null 2>&1; then ready=1; break fi sleep 0.1 done if [ "$ready" -eq 0 ]; then _proof_record "$CASE_ID" "vectord restart binds within 8s" \ fail "ready" "timeout" "vectord did not respond to /health after restart; pid=${new_pid}" return 0 2>/dev/null || exit 0 fi _proof_record "$CASE_ID" "vectord restart binds within 8s" \ pass "ready" "ready" "old_pid=${old_pid} new_pid=${new_pid}" # ── post-restart search ───────────────────────────────────────── proof_post "$CASE_ID" "post_restart_search" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${INDEX_NAME}/search" \ "application/json" "$search_body" >/dev/null post_status=$(proof_status_of "$CASE_ID" "post_restart_search") proof_assert_eq "$CASE_ID" "post-restart search → 200" "200" "$post_status" if [ "$post_status" != "200" ]; then proof_skip "$CASE_ID" "value assertions skipped — search failed" \ "post-restart search returned ${post_status}; index may not have rehydrated" else post_body="${PROOF_REPORT_DIR}/raw/http/${CASE_ID}/post_restart_search.body" post_top1=$(jq -r '.results[0].id' "$post_body") post_dist=$(jq -r '.results[0].distance' "$post_body") proof_assert_eq "$CASE_ID" "post-restart top-1 ID matches pre-restart" \ "$pre_top1" "$post_top1" # Distances should be bit-identical (same float32 graph reloaded). proof_assert_eq "$CASE_ID" "post-restart top-1 distance matches pre-restart" \ "$pre_dist" "$post_dist" fi # Cleanup. proof_delete "$CASE_ID" "post_clean" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${INDEX_NAME}" >/dev/null