POST /v1/matrix/playbooks/bulk accepts an array of playbook entries
and records each independently — failures per-entry don't abort the
batch. Designed for two operational use cases:
1. Backfilling historical placement data into the playbook
substrate (the Rust system has 4,701 fill operations recorded
with embeddings; that data deserves to feed the Go learning
loop without a 4,701-call procedural script).
2. Batched click-tracking from a session's worth of coordinator
interactions, posted once at idle rather than per-click.
Per-entry response shape: {index, playbook_id} on success or
{index, error} on failure. Caller can inspect failures without
diffing.
Smoke (scripts/playbook_smoke.sh, new assertion #4):
Bulk POST 3 entries: 2 valid (alpha→widget-a, bravo→widget-b) +
1 invalid (empty query_text). Verifies recorded=2, failed=1,
the 2 valid ones get playbook_ids back, and the invalid one
surfaces its validation error in-line.
Single-record /matrix/playbooks/record from 06e7152 still works
unchanged; bulk is additive. The corpus field can be set per-
entry or once at the batch level (entry-level wins on collision).
Per the small-model autonomous pipeline framing: this is the
"the playbook gets denser with each iteration" mechanism. Click
tracking → bulk POST → playbook entries → future similar queries
get those answers boosted via the existing /matrix/search
use_playbook path. The learning loop now has both inflows wired
(single + bulk) — what remains is the demo UI shim that calls
/feedback on result interaction (deferred — no Go demo UI yet).
15-smoke regression all green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
199 lines
8.2 KiB
Bash
Executable File
199 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# Playbook smoke — learning-loop integration end-to-end.
|
||
# All assertions go through gateway :3110.
|
||
#
|
||
# Validates the full boost cycle:
|
||
# 1. Build a test corpus with 3 items
|
||
# 2. Query → get baseline ranking
|
||
# 3. Record a playbook: query → bottom-ranked answer with score=1.0
|
||
# 4. Re-query with use_playbook=true
|
||
# 5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost
|
||
# math: distance' = distance × (1 - 0.5×score))
|
||
# 6. Assert: PlaybookBoosted >= 1 in the response
|
||
#
|
||
# Requires Ollama on :11434 with nomic-embed-text loaded — Record
|
||
# embeds the query_text. Skips (exit 0) when Ollama is absent.
|
||
|
||
set -euo pipefail
|
||
cd "$(dirname "$0")/.."
|
||
|
||
export PATH="$PATH:/usr/local/go/bin"
|
||
|
||
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
|
||
echo "[playbook-smoke] Ollama not reachable on :11434 — skipping"
|
||
exit 0
|
||
fi
|
||
|
||
echo "[playbook-smoke] building stack..."
|
||
go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway
|
||
|
||
pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true
|
||
sleep 0.3
|
||
|
||
PIDS=()
|
||
TMP="$(mktemp -d)"
|
||
CFG="$TMP/playbook.toml"
|
||
|
||
cleanup() {
|
||
echo "[playbook-smoke] cleanup"
|
||
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
|
||
rm -rf "$TMP"
|
||
}
|
||
trap cleanup EXIT INT TERM
|
||
|
||
cat > "$CFG" <<EOF
|
||
[gateway]
|
||
bind = "127.0.0.1:3110"
|
||
storaged_url = "http://127.0.0.1:3211"
|
||
catalogd_url = "http://127.0.0.1:3212"
|
||
ingestd_url = "http://127.0.0.1:3213"
|
||
queryd_url = "http://127.0.0.1:3214"
|
||
vectord_url = "http://127.0.0.1:3215"
|
||
embedd_url = "http://127.0.0.1:3216"
|
||
pathwayd_url = "http://127.0.0.1:3217"
|
||
matrixd_url = "http://127.0.0.1:3218"
|
||
|
||
[vectord]
|
||
bind = "127.0.0.1:3215"
|
||
storaged_url = ""
|
||
|
||
[matrixd]
|
||
bind = "127.0.0.1:3218"
|
||
embedd_url = "http://127.0.0.1:3216"
|
||
vectord_url = "http://127.0.0.1:3215"
|
||
EOF
|
||
|
||
poll_health() {
|
||
local port="$1" deadline=$(($(date +%s) + 5))
|
||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
|
||
sleep 0.05
|
||
done
|
||
return 1
|
||
}
|
||
|
||
echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..."
|
||
./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
|
||
poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
|
||
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
|
||
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
|
||
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
|
||
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
|
||
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
|
||
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
|
||
|
||
FAILED=0
|
||
|
||
# Embed three corpus items + the query, all via /v1/embed.
|
||
echo "[playbook-smoke] embedding 3 corpus items + query..."
|
||
EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
|
||
-H 'Content-Type: application/json' \
|
||
-d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')"
|
||
V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')"
|
||
V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')"
|
||
V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')"
|
||
V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')"
|
||
|
||
# Build corpus
|
||
echo "[playbook-smoke] create corpus widgets + add 3 items..."
|
||
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \
|
||
-H 'Content-Type: application/json' \
|
||
-d '{"name":"widgets","dimension":768,"distance":"cosine"}'
|
||
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \
|
||
-H 'Content-Type: application/json' \
|
||
-d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \
|
||
'{items:[
|
||
{id:"widget-a", vector:$va, metadata:{label:"a"}},
|
||
{id:"widget-b", vector:$vb, metadata:{label:"b"}},
|
||
{id:"widget-c", vector:$vc, metadata:{label:"c"}}
|
||
]}')"
|
||
|
||
# Baseline matrix search (no playbook) — using query_vector to skip
|
||
# embedd round-trip and keep the test deterministic on the geometry
|
||
# we know.
|
||
echo "[playbook-smoke] baseline search (no playbook):"
|
||
BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
|
||
-H 'Content-Type: application/json' \
|
||
-d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")"
|
||
BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')"
|
||
BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
|
||
echo " baseline order: $BASE_ORDER widget-c distance=$BASE_C_DIST"
|
||
|
||
# Record a playbook entry for the query → widget-c (use the same
|
||
# query_text that the playbook will be re-queried by, exact match).
|
||
QUERY_TEXT="alpha staffing query test full prompt"
|
||
echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0"
|
||
RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \
|
||
-H 'Content-Type: application/json' \
|
||
-d "$(jq -n --arg q "$QUERY_TEXT" \
|
||
'{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")"
|
||
PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')"
|
||
if [ -z "$PB_ID" ]; then
|
||
echo " ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1
|
||
else
|
||
echo " ✓ playbook_id=$PB_ID"
|
||
fi
|
||
|
||
# Re-search with use_playbook=true. Use query_text so matrixd embeds
|
||
# it again (proves end-to-end). The newly-recorded playbook entry has
|
||
# the SAME query_text → cosine distance ~0 → boost applies to widget-c.
|
||
echo "[playbook-smoke] boosted search (use_playbook=true):"
|
||
BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
|
||
-H 'Content-Type: application/json' \
|
||
-d "$(jq -n --arg q "$QUERY_TEXT" \
|
||
'{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")"
|
||
BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')"
|
||
BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
|
||
PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')"
|
||
echo " boosted order: $BOOST_ORDER widget-c distance=$BOOST_C_DIST playbook_boosted=$PB_BOOSTED"
|
||
|
||
# ── Assertion 1: PlaybookBoosted >= 1 ────────────────────────────
|
||
if [ "$PB_BOOSTED" -ge 1 ]; then
|
||
echo " ✓ playbook_boosted=$PB_BOOSTED ≥ 1"
|
||
else
|
||
echo " ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1
|
||
fi
|
||
|
||
# ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor)
|
||
# Allow some tolerance because the query and recorded query may not
|
||
# be byte-identical depending on Ollama's tokenization stability.
|
||
RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')"
|
||
echo " widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)"
|
||
WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')"
|
||
if [ "$WITHIN" = "true" ]; then
|
||
echo " ✓ ratio in [0.40, 0.60] — boost applied correctly"
|
||
else
|
||
echo " ✗ ratio out of band: $RATIO"; FAILED=1
|
||
fi
|
||
|
||
# ── 4. /matrix/playbooks/bulk — component C (operational rating wiring)
|
||
echo "[playbook-smoke] bulk record 3 entries:"
|
||
BULK_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/bulk \
|
||
-H 'Content-Type: application/json' \
|
||
-d "$(jq -n '{
|
||
entries: [
|
||
{query_text: "alpha test query", answer_id: "widget-a", answer_corpus: "widgets", score: 0.9},
|
||
{query_text: "bravo test query", answer_id: "widget-b", answer_corpus: "widgets", score: 0.8},
|
||
{query_text: "", answer_id: "x", answer_corpus: "widgets", score: 0.5}
|
||
]
|
||
}')")"
|
||
RECORDED="$(echo "$BULK_RESP" | jq -r '.recorded')"
|
||
FAIL="$(echo "$BULK_RESP" | jq -r '.failed')"
|
||
GOT_PB_A="$(echo "$BULK_RESP" | jq -r '.results[0].playbook_id // empty')"
|
||
ERR_BAD="$(echo "$BULK_RESP" | jq -r '.results[2].error // empty')"
|
||
if [ "$RECORDED" = "2" ] && [ "$FAIL" = "1" ] && [ -n "$GOT_PB_A" ] && [ -n "$ERR_BAD" ]; then
|
||
echo " ✓ 2 recorded, 1 failed (empty query_text caught), per-entry IDs/errors returned"
|
||
else
|
||
echo " ✗ recorded=$RECORDED failed=$FAIL pb_a=$GOT_PB_A err=$ERR_BAD"
|
||
echo " full: $BULK_RESP"
|
||
FAILED=1
|
||
fi
|
||
|
||
if [ "$FAILED" -eq 0 ]; then
|
||
echo "[playbook-smoke] Playbook acceptance gate: PASSED"
|
||
exit 0
|
||
else
|
||
echo "[playbook-smoke] Playbook acceptance gate: FAILED"
|
||
exit 1
|
||
fi
|