#!/usr/bin/env bash # Matrix smoke — multi-corpus retrieve+merge via matrixd (SPEC §3.4). # All assertions go through gateway :3110. # # Validates: # - Multi-corpus search returns hits from BOTH corpora # - Each result carries its corpus attribution (load-bearing — losing # it defeats the matrix's purpose) # - Merged top-k is ordered by distance across corpora # - /matrix/corpora lists known indexes # - Empty corpora list → 400 # - Bad corpus name → 502 (matrix bubbles vectord's 404 as upstream error) # # Uses query_vector (not query_text) to skip the embedd dependency so # this smoke runs without Ollama. End-to-end embed→matrix→search has # its own integration test (next commit). # # Usage: ./scripts/matrix_smoke.sh set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" echo "[matrix-smoke] building matrixd + vectord + gateway..." go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true sleep 0.3 PIDS=() TMP="$(mktemp -d)" CFG="$TMP/matrix.toml" cleanup() { echo "[matrix-smoke] cleanup" for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done rm -rf "$TMP" } trap cleanup EXIT INT TERM # Custom toml: vectord persistence disabled (don't pollute storaged # state with the test corpora). cat > "$CFG" </dev/null 2>&1; then return 0; fi sleep 0.05 done return 1 } echo "[matrix-smoke] launching vectord → matrixd → gateway..." ./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!) poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; } ./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!) poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; } ./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!) poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; } FAILED=0 DIM=4 # Create two corpora — corpus_a and corpus_b — each with a few # vectors at known distances from a chosen query vector. echo "[matrix-smoke] create two corpora:" for c in corpus_a corpus_b; do HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/vectors/index \ -H 'Content-Type: application/json' \ -d "{\"name\":\"$c\",\"dimension\":$DIM,\"distance\":\"euclidean\"}")" if [ "$HTTP" != "201" ]; then echo " ✗ create $c → $HTTP"; FAILED=1; fi done echo " ✓ corpus_a and corpus_b created" # Add vectors. Use euclidean distance for predictable arithmetic. # Query vector will be [1,0,0,0]. Distances from it: # corpus_a/a-near : [1.1, 0, 0, 0] ≈ 0.1 # corpus_a/a-mid : [1, 0.5, 0, 0] ≈ 0.5 # corpus_a/a-far : [3, 0, 0, 0] ≈ 2.0 # corpus_b/b-near : [1.05, 0, 0, 0] ≈ 0.05 (closest globally) # corpus_b/b-mid : [1, 0.7, 0, 0] ≈ 0.7 # corpus_b/b-far : [4, 0, 0, 0] ≈ 3.0 echo "[matrix-smoke] add vectors to both corpora:" curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_a/add" \ -H 'Content-Type: application/json' \ -d '{"items":[ {"id":"a-near","vector":[1.1,0,0,0],"metadata":{"label":"a near"}}, {"id":"a-mid","vector":[1,0.5,0,0],"metadata":{"label":"a mid"}}, {"id":"a-far","vector":[3,0,0,0],"metadata":{"label":"a far"}} ]}' curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_b/add" \ -H 'Content-Type: application/json' \ -d '{"items":[ {"id":"b-near","vector":[1.05,0,0,0],"metadata":{"label":"b near"}}, {"id":"b-mid","vector":[1,0.7,0,0],"metadata":{"label":"b mid"}}, {"id":"b-far","vector":[4,0,0,0],"metadata":{"label":"b far"}} ]}' echo " ✓ 3 + 3 vectors loaded" # ── 1. /matrix/corpora lists both ───────────────────────────────── echo "[matrix-smoke] /matrix/corpora lists both:" RESP="$(curl -sS http://127.0.0.1:3110/v1/matrix/corpora)" COUNT="$(echo "$RESP" | jq -r '.count')" HAS_A="$(echo "$RESP" | jq -r '.corpora | index("corpus_a") != null')" HAS_B="$(echo "$RESP" | jq -r '.corpora | index("corpus_b") != null')" if [ "$COUNT" = "2" ] && [ "$HAS_A" = "true" ] && [ "$HAS_B" = "true" ]; then echo " ✓ count=2, both corpora listed" else echo " ✗ resp: $RESP"; FAILED=1 fi # ── 2. multi-corpus search returns hits from BOTH ───────────────── echo "[matrix-smoke] /matrix/search multi-corpus retrieve+merge:" RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d '{"query_vector":[1,0,0,0],"corpora":["corpus_a","corpus_b"],"k":4,"per_corpus_k":3}')" RESULTS_LEN="$(echo "$RESP" | jq -r '.results | length')" A_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_a')" B_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_b')" HAS_A_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_a")] | length > 0')" HAS_B_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_b")] | length > 0')" if [ "$RESULTS_LEN" = "4" ] && [ "$A_COUNT" = "3" ] && [ "$B_COUNT" = "3" ] && [ "$HAS_A_RESULT" = "true" ] && [ "$HAS_B_RESULT" = "true" ]; then echo " ✓ 4 merged results · 3+3 per-corpus · both corpora represented" else echo " ✗ len=$RESULTS_LEN per_corpus={a:$A_COUNT b:$B_COUNT} a_hit=$HAS_A_RESULT b_hit=$HAS_B_RESULT" echo " full: $RESP" FAILED=1 fi # ── 3. distance-merged top-k correct across corpora ─────────────── echo "[matrix-smoke] top hit comes from corpus_b (b-near is globally closest):" TOP_ID="$(echo "$RESP" | jq -r '.results[0].id')" TOP_CORPUS="$(echo "$RESP" | jq -r '.results[0].corpus')" if [ "$TOP_ID" = "b-near" ] && [ "$TOP_CORPUS" = "corpus_b" ]; then echo " ✓ top hit: id=b-near corpus=corpus_b (closer than corpus_a's a-near)" else echo " ✗ top: id=$TOP_ID corpus=$TOP_CORPUS (expected b-near/corpus_b)" FAILED=1 fi # ── 4. corpus attribution preserved in metadata ─────────────────── echo "[matrix-smoke] metadata preserved on merged results:" TOP_LABEL="$(echo "$RESP" | jq -r '.results[0].metadata.label')" if [ "$TOP_LABEL" = "b near" ]; then echo " ✓ metadata.label round-trips through matrix" else echo " ✗ label=$TOP_LABEL"; FAILED=1 fi # ── 5. distances ascending in result list ───────────────────────── echo "[matrix-smoke] results sorted by distance ascending:" ASCENDING="$(echo "$RESP" | jq -r '[.results[].distance] | . == (sort)')" if [ "$ASCENDING" = "true" ]; then echo " ✓ distances ascending" else echo " ✗ distances not sorted: $(echo "$RESP" | jq -c '[.results[].distance]')" FAILED=1 fi # ── 6. negative paths ───────────────────────────────────────────── echo "[matrix-smoke] empty corpora → 400:" HTTP_400="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d '{"query_vector":[1,0,0,0],"corpora":[],"k":4}')" echo "[matrix-smoke] missing corpus name → 502:" HTTP_502="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d '{"query_vector":[1,0,0,0],"corpora":["does_not_exist"],"k":4}')" echo "[matrix-smoke] no query (empty text and vector) → 400:" HTTP_400b="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d '{"corpora":["corpus_a"],"k":4}')" if [ "$HTTP_400" = "400" ] && [ "$HTTP_502" = "502" ] && [ "$HTTP_400b" = "400" ]; then echo " ✓ empty=400, missing-corpus=502, no-query=400" else echo " ✗ empty=$HTTP_400 missing=$HTTP_502 noquery=$HTTP_400b" FAILED=1 fi if [ "$FAILED" -eq 0 ]; then echo "[matrix-smoke] Matrix acceptance gate: PASSED" exit 0 else echo "[matrix-smoke] Matrix acceptance gate: FAILED" exit 1 fi