#!/usr/bin/env bash # 10_perf_baseline.sh — GOLAKE-100. # Performance baseline: rows/sec ingest, vectors/sec add, p50/p95 # query latency, p50/p95 search latency, peak RSS per service. # # First run (or --regenerate-baseline) writes tests/proof/baseline.json. # Subsequent runs diff against it; >10% regression emits a SKIP record # with REGRESSION detail (not a fail — perf claim is required:false in # claims.yaml so the gate stays green; the human summary tells the # regression story honestly). # # Skipped with loud reason if any earlier case in this run failed, # per spec: "performance mode runs only after contract+integration pass." set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/../lib/env.sh" source "${SCRIPT_DIR}/../lib/http.sh" source "${SCRIPT_DIR}/../lib/assert.sh" source "${SCRIPT_DIR}/../lib/metrics.sh" CASE_ID="GOLAKE-100" CASE_NAME="Performance baseline — rows/sec, vectors/sec, p50/p95 latencies" CASE_TYPE="performance" if [ "${1:-}" = "--metadata-only" ]; then return 0 2>/dev/null || exit 0; fi BASELINE_FILE="${PROOF_REPO_ROOT}/tests/proof/baseline.json" PERF_INDEX="proof_perf_${PROOF_RUN_ID}" PERF_DATASET="proof_perf_${PROOF_RUN_ID}" # ── pre-flight: any earlier case fail? then skip ──────────────── prior_fail=0 for jsonl in "${PROOF_REPORT_DIR}/raw/cases/"*.jsonl; do [ -e "$jsonl" ] || continue if grep -q '"result":"fail"' "$jsonl" 2>/dev/null; then prior_fail=1; break fi done if [ "$prior_fail" = 1 ]; then proof_skip "$CASE_ID" "Performance baseline — earlier case failed" \ "perf measurements are only meaningful after contract+integration green; see prior cases for failures" return 0 2>/dev/null || exit 0 fi # ── measurement: rows/sec ingest ───────────────────────────────── # Generate a deterministic 1000-row CSV inline. Using ID-derived field # values so SHA is stable across runs and parquet_size is reproducible. PERF_CSV="${PROOF_REPORT_DIR}/raw/outputs/${CASE_ID}_perf.csv" mkdir -p "$(dirname "$PERF_CSV")" { echo "id,name,role,city,score" awk 'BEGIN{ roles[0]="welder"; roles[1]="electrician"; roles[2]="operator"; roles[3]="pipefitter"; roles[4]="safety"; cities[0]="Chicago"; cities[1]="Detroit"; cities[2]="Houston"; cities[3]="Cleveland"; cities[4]="St Louis"; for (i=1; i<=1000; i++) { r = roles[(i-1)%5] c = cities[(i-1)%5] s = 50 + (i*7) % 50 printf "%d,Worker%04d,%s,%s,%d\n", i, i, r, c, s } }' } > "$PERF_CSV" proof_metric_start "$CASE_ID" "ingest" proof_call "$CASE_ID" "perf_ingest" POST \ "${PROOF_GATEWAY_URL}/v1/ingest?name=${PERF_DATASET}" \ -F "file=@${PERF_CSV}" >/dev/null ingest_ms=$(proof_metric_stop "$CASE_ID" "ingest") ingest_status=$(proof_status_of "$CASE_ID" "perf_ingest") if [ "$ingest_status" != "200" ]; then proof_skip "$CASE_ID" "Performance baseline — perf ingest failed" \ "ingest of 1000-row CSV returned ${ingest_status}; cannot baseline downstream metrics" return 0 2>/dev/null || exit 0 fi ingest_rows_per_sec=$(awk -v ms="$ingest_ms" -v rows=1000 \ 'BEGIN{ if (ms == 0) ms = 1; printf "%.0f", rows * 1000 / ms }') proof_metric_value "$CASE_ID" "ingest_rows_per_sec" "$ingest_rows_per_sec" "rows/s" # ── measurement: query p50/p95 latency ────────────────────────── # Run the same SELECT 20 times; collect latencies; compute percentiles. QUERY_LATENCIES="${PROOF_REPORT_DIR}/raw/metrics/_query_latencies" > "$QUERY_LATENCIES" sql_body=$(jq -nc --arg s "SELECT count(*) AS n FROM ${PERF_DATASET}" '{sql:$s}') for i in $(seq 1 20); do proof_post "$CASE_ID" "query_${i}" "${PROOF_GATEWAY_URL}/v1/sql" \ "application/json" "$sql_body" >/dev/null proof_latency_of "$CASE_ID" "query_${i}" >> "$QUERY_LATENCIES" done query_p50=$(proof_compute_percentile "$QUERY_LATENCIES" 50) query_p95=$(proof_compute_percentile "$QUERY_LATENCIES" 95) proof_metric_value "$CASE_ID" "query_p50_ms" "$query_p50" "ms" proof_metric_value "$CASE_ID" "query_p95_ms" "$query_p95" "ms" # ── measurement: vectors/sec add ──────────────────────────────── # 200 deterministic dim=4 vectors. Pure throughput metric — no # embedding in the loop (we already measured embedding contract # latency separately). proof_post "$CASE_ID" "perf_create_index" \ "${PROOF_GATEWAY_URL}/v1/vectors/index" \ "application/json" "{\"name\":\"${PERF_INDEX}\",\"dimension\":4}" >/dev/null # Build add body via jq — 200 items, vector[i] = [i*0.01, (i*0.01)+1, (i*0.01)+2, (i*0.01)+3]. add_body=$(jq -nc ' {items: [range(0; 200) | { id: ("perf-" + (. | tostring)), vector: [(. * 0.01), (. * 0.01 + 1), (. * 0.01 + 2), (. * 0.01 + 3)] }]} ') proof_metric_start "$CASE_ID" "vector_add" proof_post "$CASE_ID" "perf_add" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${PERF_INDEX}/add" \ "application/json" "$add_body" >/dev/null add_ms=$(proof_metric_stop "$CASE_ID" "vector_add") add_status=$(proof_status_of "$CASE_ID" "perf_add") if [ "$add_status" = "200" ]; then vectors_per_sec=$(awk -v ms="$add_ms" -v n=200 \ 'BEGIN{ if (ms == 0) ms = 1; printf "%.0f", n * 1000 / ms }') proof_metric_value "$CASE_ID" "vectors_per_sec_add" "$vectors_per_sec" "vec/s" fi # ── measurement: search p50/p95 ───────────────────────────────── SEARCH_LATENCIES="${PROOF_REPORT_DIR}/raw/metrics/_search_latencies" > "$SEARCH_LATENCIES" search_body='{"vector":[1,2,3,4],"k":5}' for i in $(seq 1 20); do proof_post "$CASE_ID" "search_${i}" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${PERF_INDEX}/search" \ "application/json" "$search_body" >/dev/null proof_latency_of "$CASE_ID" "search_${i}" >> "$SEARCH_LATENCIES" done search_p50=$(proof_compute_percentile "$SEARCH_LATENCIES" 50) search_p95=$(proof_compute_percentile "$SEARCH_LATENCIES" 95) proof_metric_value "$CASE_ID" "search_p50_ms" "$search_p50" "ms" proof_metric_value "$CASE_ID" "search_p95_ms" "$search_p95" "ms" # ── measurement: peak RSS per service ─────────────────────────── declare -A rss_now for svc in storaged catalogd ingestd queryd vectord embedd gateway; do rss=$(proof_sample_rss "$CASE_ID" "bin/${svc}" 2>/dev/null || echo 0) rss_now[$svc]="${rss:-0}" done # Cleanup the perf index. Dataset stays — small, harmless. proof_delete "$CASE_ID" "perf_clean" \ "${PROOF_GATEWAY_URL}/v1/vectors/index/${PERF_INDEX}" >/dev/null # ── baseline write or diff ────────────────────────────────────── write_baseline() { cat > "$BASELINE_FILE" <10% regression = SKIP with REGRESSION detail. # Faster-than-baseline always passes (no upper bound on improvement). # For RSS and latency: higher = worse. For throughput: lower = worse. diff_metric() { local name="$1" actual="$2" direction="$3" # "lower_is_better" or "higher_is_better" local baseline_val baseline_val=$(jq -r ".metrics.${name} // 0" "$BASELINE_FILE") if awk -v b="$baseline_val" 'BEGIN{exit !(b == 0)}'; then proof_skip "$CASE_ID" "${name}: baseline missing or zero" \ "actual=${actual} ${direction}; baseline.json has no value to compare" return fi local pct pct=$(awk -v a="$actual" -v b="$baseline_val" \ 'BEGIN{printf "%.1f", (a - b) * 100.0 / b}') local detail="actual=${actual} baseline=${baseline_val} delta=${pct}%" if [ "$direction" = "higher_is_better" ]; then # Throughput: actual < baseline*0.9 = regression. if awk -v a="$actual" -v b="$baseline_val" 'BEGIN{exit !(a < b * 0.9)}'; then proof_skip "$CASE_ID" "REGRESSION: ${name}" "$detail" else _proof_record "$CASE_ID" "${name}: within 10% of baseline" pass "≥90% of baseline" "$actual" "$detail" fi else # Latency / RSS: actual > baseline*1.1 = regression. if awk -v a="$actual" -v b="$baseline_val" 'BEGIN{exit !(a > b * 1.1)}'; then proof_skip "$CASE_ID" "REGRESSION: ${name}" "$detail" else _proof_record "$CASE_ID" "${name}: within 10% of baseline" pass "≤110% of baseline" "$actual" "$detail" fi fi } diff_metric "ingest_rows_per_sec" "${ingest_rows_per_sec:-0}" "higher_is_better" diff_metric "query_p50_ms" "${query_p50:-0}" "lower_is_better" diff_metric "query_p95_ms" "${query_p95:-0}" "lower_is_better" diff_metric "vectors_per_sec_add" "${vectors_per_sec:-0}" "higher_is_better" diff_metric "search_p50_ms" "${search_p50:-0}" "lower_is_better" diff_metric "search_p95_ms" "${search_p95:-0}" "lower_is_better" diff_metric "rss_vectord_mb" "${rss_now[vectord]:-0}" "lower_is_better" diff_metric "rss_queryd_mb" "${rss_now[queryd]:-0}" "lower_is_better" fi