#!/usr/bin/env bash # Playbook smoke — learning-loop integration end-to-end. # All assertions go through gateway :3110. # # Validates the full boost cycle: # 1. Build a test corpus with 3 items # 2. Query → get baseline ranking # 3. Record a playbook: query → bottom-ranked answer with score=1.0 # 4. Re-query with use_playbook=true # 5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost # math: distance' = distance × (1 - 0.5×score)) # 6. Assert: PlaybookBoosted >= 1 in the response # # Requires Ollama on :11434 with nomic-embed-text loaded — Record # embeds the query_text. Skips (exit 0) when Ollama is absent. set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then echo "[playbook-smoke] Ollama not reachable on :11434 — skipping" exit 0 fi echo "[playbook-smoke] building stack..." go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true sleep 0.3 PIDS=() TMP="$(mktemp -d)" CFG="$TMP/playbook.toml" cleanup() { echo "[playbook-smoke] cleanup" for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done rm -rf "$TMP" } trap cleanup EXIT INT TERM cat > "$CFG" </dev/null 2>&1; then return 0; fi sleep 0.05 done return 1 } echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..." ./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!) poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; } ./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!) poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; } ./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!) poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; } ./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!) poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; } FAILED=0 # Embed three corpus items + the query, all via /v1/embed. echo "[playbook-smoke] embedding 3 corpus items + query..." EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \ -H 'Content-Type: application/json' \ -d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')" V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')" V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')" V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')" V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')" # Build corpus echo "[playbook-smoke] create corpus widgets + add 3 items..." curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \ -H 'Content-Type: application/json' \ -d '{"name":"widgets","dimension":768,"distance":"cosine"}' curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \ -H 'Content-Type: application/json' \ -d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \ '{items:[ {id:"widget-a", vector:$va, metadata:{label:"a"}}, {id:"widget-b", vector:$vb, metadata:{label:"b"}}, {id:"widget-c", vector:$vc, metadata:{label:"c"}} ]}')" # Baseline matrix search (no playbook) — using query_vector to skip # embedd round-trip and keep the test deterministic on the geometry # we know. echo "[playbook-smoke] baseline search (no playbook):" BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")" BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')" BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')" echo " baseline order: $BASE_ORDER widget-c distance=$BASE_C_DIST" # Record a playbook entry for the query → widget-c (use the same # query_text that the playbook will be re-queried by, exact match). QUERY_TEXT="alpha staffing query test full prompt" echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0" RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \ -H 'Content-Type: application/json' \ -d "$(jq -n --arg q "$QUERY_TEXT" \ '{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")" PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')" if [ -z "$PB_ID" ]; then echo " ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1 else echo " ✓ playbook_id=$PB_ID" fi # Re-search with use_playbook=true. Use query_text so matrixd embeds # it again (proves end-to-end). The newly-recorded playbook entry has # the SAME query_text → cosine distance ~0 → boost applies to widget-c. echo "[playbook-smoke] boosted search (use_playbook=true):" BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \ -H 'Content-Type: application/json' \ -d "$(jq -n --arg q "$QUERY_TEXT" \ '{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")" BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')" BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')" PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')" echo " boosted order: $BOOST_ORDER widget-c distance=$BOOST_C_DIST playbook_boosted=$PB_BOOSTED" # ── Assertion 1: PlaybookBoosted >= 1 ──────────────────────────── if [ "$PB_BOOSTED" -ge 1 ]; then echo " ✓ playbook_boosted=$PB_BOOSTED ≥ 1" else echo " ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1 fi # ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor) # Allow some tolerance because the query and recorded query may not # be byte-identical depending on Ollama's tokenization stability. RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')" echo " widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)" WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')" if [ "$WITHIN" = "true" ]; then echo " ✓ ratio in [0.40, 0.60] — boost applied correctly" else echo " ✗ ratio out of band: $RATIO"; FAILED=1 fi # ── 4. /matrix/playbooks/bulk — component C (operational rating wiring) echo "[playbook-smoke] bulk record 3 entries:" BULK_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/bulk \ -H 'Content-Type: application/json' \ -d "$(jq -n '{ entries: [ {query_text: "alpha test query", answer_id: "widget-a", answer_corpus: "widgets", score: 0.9}, {query_text: "bravo test query", answer_id: "widget-b", answer_corpus: "widgets", score: 0.8}, {query_text: "", answer_id: "x", answer_corpus: "widgets", score: 0.5} ] }')")" RECORDED="$(echo "$BULK_RESP" | jq -r '.recorded')" FAIL="$(echo "$BULK_RESP" | jq -r '.failed')" GOT_PB_A="$(echo "$BULK_RESP" | jq -r '.results[0].playbook_id // empty')" ERR_BAD="$(echo "$BULK_RESP" | jq -r '.results[2].error // empty')" if [ "$RECORDED" = "2" ] && [ "$FAIL" = "1" ] && [ -n "$GOT_PB_A" ] && [ -n "$ERR_BAD" ]; then echo " ✓ 2 recorded, 1 failed (empty query_text caught), per-entry IDs/errors returned" else echo " ✗ recorded=$RECORDED failed=$FAIL pb_a=$GOT_PB_A err=$ERR_BAD" echo " full: $BULK_RESP" FAILED=1 fi if [ "$FAILED" -eq 0 ]; then echo "[playbook-smoke] Playbook acceptance gate: PASSED" exit 0 else echo "[playbook-smoke] Playbook acceptance gate: FAILED" exit 1 fi