#!/usr/bin/env bash # lance smoke — gates the 5 /vectors/lance/* HTTP routes (search, doc, # index, append, migrate). Only the read paths are exercised here so a # CI run doesn't mutate state. Migrate + index + append have shape # probes (request bodies are well-formed) but ride the not-found path # that the 2026-05-02 audit added. # # Targets the live gateway at $LH_GATEWAY (default :3100). Uses an # existing on-disk Lance dataset — `workers_500k_v1` — so no # migration setup is needed. If the dataset is missing the smoke # fails loudly with a clear message. # # Surfaced 2026-05-02: the lance crates had zero tests + no smoke; # substrate change to lance_backend.rs would silently break the live # surface. This smoke is the regression gate. # # Usage: # ./scripts/lance_smoke.sh # LH_GATEWAY=http://127.0.0.1:3100 ./scripts/lance_smoke.sh set -euo pipefail GATEWAY="${LH_GATEWAY:-http://127.0.0.1:3100}" DATASET="${LH_LANCE_DATASET:-workers_500k_v1}" PREFIX="$GATEWAY/vectors/lance" PASS=0; FAIL=0 PROBE() { local label="$1"; shift; "$@" && { echo " ✓ $label"; PASS=$((PASS+1)); } || { echo " ✗ $label"; FAIL=$((FAIL+1)); }; } echo "[lance-smoke] gateway=$GATEWAY dataset=$DATASET" # ── 0. Gateway alive ───────────────────────────────────────────── PROBE "gateway /v1/health responds" \ bash -c "curl -sf -m 3 $GATEWAY/v1/health -o /dev/null" # ── 1. Search returns IVF_PQ results on existing dataset ──────── RESP=$(curl -sS -m 30 -X POST "$PREFIX/search/$DATASET" \ -H 'Content-Type: application/json' \ -d '{"query":"forklift operator","top_k":3}' 2>/dev/null || echo '{}') PROBE "search/$DATASET returns top-3 lance_ivf_pq results" \ bash -c "echo '$RESP' | jq -e '.method == \"lance_ivf_pq\" and (.results | length) == 3' >/dev/null" # Capture one doc_id from those results so the next probe has something real to fetch. DOC_ID=$(echo "$RESP" | jq -r '.results[0].doc_id // ""') # ── 2. get_doc by id returns the row ──────────────────────────── PROBE "doc/$DATASET/ returns full row" \ bash -c "[ -n '$DOC_ID' ] && curl -sf -m 5 '$PREFIX/doc/$DATASET/$DOC_ID' | jq -e '.row.doc_id == \"$DOC_ID\"' >/dev/null" # ── 3. get_doc with bogus id returns 404 (not 500) ────────────── STATUS=$(curl -sS -m 5 -o /tmp/lance_smoke_404.json -w '%{http_code}' \ "$PREFIX/doc/$DATASET/W500K-NOT-A-REAL-ID-00000") PROBE "doc/$DATASET/ → 404" \ test "$STATUS" = "404" # ── 4. search on missing dataset returns 404 + sanitized message ─ STATUS=$(curl -sS -m 5 -o /tmp/lance_smoke_500.json -w '%{http_code}' \ -X POST "$PREFIX/search/no-such-dataset-${RANDOM}" \ -H 'Content-Type: application/json' \ -d '{"query":"x","top_k":1}') BODY=$(cat /tmp/lance_smoke_500.json) PROBE "search/ → 404 (was 500 pre-2026-05-02)" \ test "$STATUS" = "404" # The sanitizer fix specifically: no /home/ or /root/.cargo/ in body. PROBE "search/ body sanitized — no filesystem leak" \ bash -c "echo '$BODY' | grep -qvE '/home/|/root/\.cargo/'" # ── 5. build_index on missing dataset also sanitized ──────────── STATUS=$(curl -sS -m 5 -o /tmp/lance_smoke_idx.json -w '%{http_code}' \ -X POST "$PREFIX/index/no-such-dataset-${RANDOM}" \ -H 'Content-Type: application/json' \ -d '{}') BODY=$(cat /tmp/lance_smoke_idx.json) PROBE "index/ body sanitized" \ bash -c "echo '$BODY' | grep -qvE '/home/|/root/\.cargo/'" # ── 6. append validates input shape (rejects empty rows array) ── STATUS=$(curl -sS -m 5 -o /dev/null -w '%{http_code}' \ -X POST "$PREFIX/append/$DATASET" \ -H 'Content-Type: application/json' \ -d '{"rows":[]}') PROBE "append with empty rows[] → 400" \ test "$STATUS" = "400" # ── 7. migrate route is reachable (POST without body returns a real error, not 404) ── STATUS=$(curl -sS -m 5 -o /dev/null -w '%{http_code}' \ -X POST "$PREFIX/migrate/probe-not-real-${RANDOM}?bucket=primary" 2>/dev/null) # Should be 4xx (bad request shape), NOT 404 (route registered) and NOT 200. PROBE "migrate route registered (non-404, non-200 on empty body)" \ bash -c "[ '$STATUS' != '404' ] && [ '$STATUS' != '200' ]" echo "[lance-smoke] $PASS PASS / $FAIL FAIL" [ "$FAIL" -eq 0 ]