#!/usr/bin/env bash # D5 smoke — proves the Day 5 acceptance gate end-to-end. # # Validates: # - Ingest CSV via D4 → catalog manifest registered # - queryd starts AFTER ingest; initial Refresh picks up the dataset # - POST /sql with SELECT count(*) FROM → matches CSV row count # - POST /sql with SELECT * FROM LIMIT 3 → rows have expected # column names + types + values # - Re-ingest same CSV → updated_at bumps → next Refresh re-creates # the view (we restart queryd to verify the fresh-side path works) # - Schema-drift CSV ingest → catalogd 409s → queryd's view stays # pointing at the original (no DuckDB session corruption) # # Requires storaged + catalogd + ingestd + queryd binaries. MinIO at # :9000 with bucket lakehouse-go-primary already created (D2/D4 setup). # # Usage: ./scripts/d5_smoke.sh set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" echo "[d5-smoke] building all 4 backing services..." go build -o bin/ ./cmd/storaged ./cmd/catalogd ./cmd/ingestd ./cmd/queryd # Cleanup any prior processes. pkill -f "bin/storaged" 2>/dev/null || true pkill -f "bin/catalogd" 2>/dev/null || true pkill -f "bin/ingestd" 2>/dev/null || true pkill -f "bin/queryd" 2>/dev/null || true sleep 0.3 STORAGED_PID=""; CATALOGD_PID=""; INGESTD_PID=""; QUERYD_PID="" TMP="$(mktemp -d)" cleanup() { echo "[d5-smoke] cleanup" for p in $QUERYD_PID $INGESTD_PID $CATALOGD_PID $STORAGED_PID; do [ -n "$p" ] && kill "$p" 2>/dev/null || true done rm -rf "$TMP" } trap cleanup EXIT INT TERM poll_health() { local port="$1" deadline=$(($(date +%s) + 5)) while [ "$(date +%s)" -lt "$deadline" ]; do if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi sleep 0.05 done return 1 } echo "[d5-smoke] launching storaged → catalogd → ingestd..." ./bin/storaged > /tmp/storaged.log 2>&1 & STORAGED_PID=$! poll_health 3211 || { echo "storaged failed"; tail -10 /tmp/storaged.log; exit 1; } # Clean any prior catalog manifests + datasets so smoke starts fresh. for k in $(curl -sS "http://127.0.0.1:3211/storage/list?prefix=_catalog/manifests/" | jq -r '.objects[]?.Key // empty' 2>/dev/null); do curl -sS -o /dev/null -X DELETE "http://127.0.0.1:3211/storage/delete/$k" || true done for k in $(curl -sS "http://127.0.0.1:3211/storage/list?prefix=datasets/d5_workers/" | jq -r '.objects[]?.Key // empty' 2>/dev/null); do curl -sS -o /dev/null -X DELETE "http://127.0.0.1:3211/storage/delete/$k" || true done ./bin/catalogd > /tmp/catalogd.log 2>&1 & CATALOGD_PID=$! poll_health 3212 || { echo "catalogd failed"; tail -10 /tmp/catalogd.log; exit 1; } ./bin/ingestd > /tmp/ingestd.log 2>&1 & INGESTD_PID=$! poll_health 3213 || { echo "ingestd failed"; tail -10 /tmp/ingestd.log; exit 1; } # Build a small CSV (5 rows × 5 cols) — same shape as D4 smoke so the # inferred types are familiar. NAME="d5_workers" cat > "$TMP/workers.csv" <<'EOF' id,name,salary,active,weight 1,Alice,50000,true,165.5 2,Bob,60000,false,180.0 3,Carol,55000,true,135.2 4,Dave,75000,false,200.0 5,Eve,80000,true,150.5 EOF echo "[d5-smoke] ingest 5-row CSV via D4 path:" INGEST="$(curl -sS -X POST -F "file=@$TMP/workers.csv" "http://127.0.0.1:3213/ingest?name=$NAME")" RC="$(echo "$INGEST" | jq -r '.row_count')" if [ "$RC" = "5" ]; then echo " ✓ ingest row_count=5" else echo " ✗ ingest → $INGEST"; exit 1 fi # Launch queryd LAST — its initial Refresh picks up the dataset. echo "[d5-smoke] launching queryd (initial Refresh picks up $NAME)..." ./bin/queryd > /tmp/queryd.log 2>&1 & QUERYD_PID=$! poll_health 3214 || { echo "queryd failed"; tail -20 /tmp/queryd.log; exit 1; } FAILED=0 echo "[d5-smoke] POST /sql SELECT count(*) FROM $NAME:" COUNT_RESP="$(curl -sS -X POST http://127.0.0.1:3214/sql \ -H 'Content-Type: application/json' \ -d "{\"sql\":\"SELECT count(*) AS n FROM \\\"$NAME\\\"\"}")" N="$(echo "$COUNT_RESP" | jq -r '.rows[0][0]')" if [ "$N" = "5" ]; then echo " ✓ count(*)=5" else echo " ✗ count → $COUNT_RESP" echo " queryd log tail:"; tail -20 /tmp/queryd.log FAILED=1 fi echo "[d5-smoke] POST /sql SELECT * FROM $NAME LIMIT 3:" ROWS_RESP="$(curl -sS -X POST http://127.0.0.1:3214/sql \ -H 'Content-Type: application/json' \ -d "{\"sql\":\"SELECT id, name, salary FROM \\\"$NAME\\\" ORDER BY id LIMIT 3\"}")" ROW_COUNT="$(echo "$ROWS_RESP" | jq -r '.row_count')" COL0_NAME="$(echo "$ROWS_RESP" | jq -r '.columns[0].name')" COL1_NAME="$(echo "$ROWS_RESP" | jq -r '.columns[1].name')" ROW0_ID="$(echo "$ROWS_RESP" | jq -r '.rows[0][0]')" ROW0_NAME="$(echo "$ROWS_RESP" | jq -r '.rows[0][1]')" if [ "$ROW_COUNT" = "3" ] && [ "$COL0_NAME" = "id" ] && [ "$COL1_NAME" = "name" ] \ && [ "$ROW0_ID" = "1" ] && [ "$ROW0_NAME" = "Alice" ]; then echo " ✓ rows[0] = (id=1, name=Alice), columns=[id, name, salary]" else echo " ✗ rows → $ROWS_RESP" FAILED=1 fi echo "[d5-smoke] schema-drift ingest 409s; existing view still queries:" cat > "$TMP/workers_drift.csv" <<'EOF' user_id,name,salary,active,weight 99,Mallory,99999,true,99.9 EOF HTTP="$(curl -sS -o "$TMP/conflict.out" -w '%{http_code}' -X POST \ -F "file=@$TMP/workers_drift.csv" "http://127.0.0.1:3213/ingest?name=$NAME")" if [ "$HTTP" = "409" ]; then echo " ✓ drift → 409" else echo " ✗ drift → $HTTP"; FAILED=1 fi # Verify the original view is still queryable. COUNT_RESP="$(curl -sS -X POST http://127.0.0.1:3214/sql \ -H 'Content-Type: application/json' \ -d "{\"sql\":\"SELECT count(*) FROM \\\"$NAME\\\"\"}")" N="$(echo "$COUNT_RESP" | jq -r '.rows[0][0]')" if [ "$N" = "5" ]; then echo " ✓ post-drift count(*)=5 (view unchanged)" else echo " ✗ post-drift count → $COUNT_RESP" FAILED=1 fi echo "[d5-smoke] error path: SELECT FROM nonexistent → 400:" HTTP="$(curl -sS -o "$TMP/err.out" -w '%{http_code}' -X POST http://127.0.0.1:3214/sql \ -H 'Content-Type: application/json' \ -d '{"sql":"SELECT * FROM no_such_table"}')" if [ "$HTTP" = "400" ]; then echo " ✓ unknown table → 400" else echo " ✗ unknown table → $HTTP body=$(cat "$TMP/err.out")" FAILED=1 fi # Cleanup smoke artifacts. for k in $(curl -sS "http://127.0.0.1:3211/storage/list?prefix=datasets/$NAME/" | jq -r '.objects[]?.Key // empty' 2>/dev/null); do curl -sS -o /dev/null -X DELETE "http://127.0.0.1:3211/storage/delete/$k" || true done curl -sS -o /dev/null -X DELETE "http://127.0.0.1:3211/storage/delete/_catalog/manifests/$NAME.parquet" || true if [ "$FAILED" -eq 0 ]; then echo "[d5-smoke] D5 acceptance gate: PASSED" exit 0 else echo "[d5-smoke] D5 acceptance gate: FAILED" exit 1 fi