#!/usr/bin/env bash # Multi-coordinator stress harness — Phase 1 of the 48-hour mock. # # Three coordinators (Alice / Bob / Carol) own three distinct contracts # (Milwaukee distribution, Indianapolis manufacturing, Chicago # construction). The driver fires phases: # 1. baseline — each coord runs their contract's role queries # 2. surge — each contract's demand doubles (URGENT phrasing) # 3. merge — alpha + beta combined under alice # 4. handover — bob takes alpha, USING alice's playbook namespace # 5. split — alpha surge re-distributed across all 3 coords # 6. reissue — non-determinism check: same baselines reissued # 7. analysis — diversity + determinism + learning metrics # # Phase 1 deliberately skips the 48-hour clock, email/SMS endpoints, # and Langfuse wiring — those are Phase 2/3. # # Usage: # ./scripts/multi_coord_stress.sh # run #001 # RUN_ID=002 ./scripts/multi_coord_stress.sh # K=12 ./scripts/multi_coord_stress.sh set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" RUN_ID="${RUN_ID:-001}" WORKERS_LIMIT="${WORKERS_LIMIT:-5000}" ETHEREAL_LIMIT="${ETHEREAL_LIMIT:-0}" CORPORA="${CORPORA:-workers,ethereal_workers}" K="${K:-8}" OUT_JSON="reports/reality-tests/multi_coord_stress_${RUN_ID}.json" OUT_MD="reports/reality-tests/multi_coord_stress_${RUN_ID}.md" if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then echo "[stress] Ollama not reachable on :11434 — skipping (need it for embeddings)" exit 0 fi echo "[stress] building binaries..." go build -o bin/ ./cmd/storaged ./cmd/catalogd ./cmd/ingestd ./cmd/queryd \ ./cmd/embedd ./cmd/vectord ./cmd/pathwayd ./cmd/observerd \ ./cmd/matrixd ./cmd/gateway \ ./scripts/staffing_workers ./scripts/multi_coord_stress pkill -f "bin/(storaged|catalogd|ingestd|queryd|embedd|vectord|pathwayd|observerd|matrixd|gateway)" 2>/dev/null || true sleep 0.3 PIDS=() TMP="$(mktemp -d)" CFG="$TMP/stress.toml" cleanup() { echo "[stress] cleanup" for p in "${PIDS[@]:-}"; do [ -n "${p:-}" ] && kill "$p" 2>/dev/null || true; done rm -rf "$TMP" } trap cleanup EXIT INT TERM cat > "$CFG" </dev/null 2>&1; then return 0; fi sleep 0.05 done return 1 } echo "[stress] launching stack..." ./bin/storaged -config "$CFG" > /tmp/stress_storaged.log 2>&1 & PIDS+=($!); poll_health 3211 || { echo "storaged failed"; exit 1; } ./bin/catalogd -config "$CFG" > /tmp/stress_catalogd.log 2>&1 & PIDS+=($!); poll_health 3212 || { echo "catalogd failed"; exit 1; } ./bin/ingestd -config "$CFG" > /tmp/stress_ingestd.log 2>&1 & PIDS+=($!); poll_health 3213 || { echo "ingestd failed"; exit 1; } ./bin/queryd -config "$CFG" > /tmp/stress_queryd.log 2>&1 & PIDS+=($!); poll_health 3214 || { echo "queryd failed"; exit 1; } ./bin/embedd -config "$CFG" > /tmp/stress_embedd.log 2>&1 & PIDS+=($!); poll_health 3216 || { echo "embedd failed"; exit 1; } ./bin/vectord -config "$CFG" > /tmp/stress_vectord.log 2>&1 & PIDS+=($!); poll_health 3215 || { echo "vectord failed"; exit 1; } ./bin/pathwayd -config "$CFG" > /tmp/stress_pathwayd.log 2>&1 & PIDS+=($!); poll_health 3217 || { echo "pathwayd failed"; exit 1; } ./bin/observerd -config "$CFG" > /tmp/stress_observerd.log 2>&1 & PIDS+=($!); poll_health 3219 || { echo "observerd failed"; exit 1; } ./bin/matrixd -config "$CFG" > /tmp/stress_matrixd.log 2>&1 & PIDS+=($!); poll_health 3218 || { echo "matrixd failed"; exit 1; } ./bin/gateway -config "$CFG" > /tmp/stress_gateway.log 2>&1 & PIDS+=($!); poll_health 3110 || { echo "gateway failed"; exit 1; } echo echo "[stress] ingest workers (limit=$WORKERS_LIMIT) into 'workers' corpus..." ./bin/staffing_workers -limit "$WORKERS_LIMIT" echo echo "[stress] ingest ethereal_workers (limit=$ETHEREAL_LIMIT, 0=all) into 'ethereal_workers' corpus..." ./bin/staffing_workers \ -parquet "/home/profit/lakehouse/data/datasets/ethereal_workers.parquet" \ -index-name ethereal_workers \ -id-prefix "e-" \ -limit "$ETHEREAL_LIMIT" echo echo "[stress] running multi-coord stress driver..." EXTRA_FLAGS="" if [ "${WITH_PARAPHRASE_HANDOVER:-1}" = "1" ]; then EXTRA_FLAGS="$EXTRA_FLAGS -with-paraphrase-handover" fi ./bin/multi_coord_stress \ -gateway "http://127.0.0.1:3110" \ -contracts tests/reality/contracts \ -corpora "$CORPORA" \ -k "$K" \ -out "$OUT_JSON" \ -ollama "http://localhost:11434" \ -judge "${JUDGE_MODEL:-qwen2.5:latest}" \ $EXTRA_FLAGS echo echo "[stress] generating markdown report → $OUT_MD" # Render compact markdown from the JSON. Same shape as the lift harness # reports so reviewers can compare format. total=$(jq -r '.events | length' "$OUT_JSON") gen_at=$(jq -r '.generated_at' "$OUT_JSON") div_role=$(jq -r '.diversity.same_role_across_contracts_mean_jaccard' "$OUT_JSON") div_role_n=$(jq -r '.diversity.num_pairs_same_role_across_contracts' "$OUT_JSON") div_xrole=$(jq -r '.diversity.different_roles_same_contract_mean_jaccard' "$OUT_JSON") div_xrole_n=$(jq -r '.diversity.num_pairs_different_roles_same_contract' "$OUT_JSON") det_jacc=$(jq -r '.determinism.mean_jaccard' "$OUT_JSON") det_n=$(jq -r '.determinism.num_reissued_pairs' "$OUT_JSON") hand_run=$(jq -r '.learning.handover_queries_run' "$OUT_JSON") hand_top1=$(jq -r '.learning.recorded_answers_top1_count' "$OUT_JSON") hand_topk=$(jq -r '.learning.recorded_answers_topk_count' "$OUT_JSON") hand_rate=$(jq -r '.learning.handover_hit_rate' "$OUT_JSON") ph_run=$(jq -r '.learning.paraphrase_handover_run // 0' "$OUT_JSON") ph_top1=$(jq -r '.learning.paraphrase_top1_count // 0' "$OUT_JSON") ph_topk=$(jq -r '.learning.paraphrase_topk_count // 0' "$OUT_JSON") ph_rate=$(jq -r '.learning.paraphrase_handover_hit_rate // 0' "$OUT_JSON") cat > "$OUT_MD" < 0.50, the system is "cycling" the same handful of workers regardless of query intent. --- ## Determinism — same query reissued, top-K stability | Metric | Value | |---|---:| | Mean Jaccard on retrieval-only reissue | ${det_jacc} | | Number of reissue pairs | ${det_n} | **Interpretation:** - ≥ 0.95: HNSW retrieval is highly deterministic; reissues land on near-identical top-K. Good — system locks into a stable view of "best workers for this query." - 0.80 – 0.95: Some HNSW or embed variance, acceptable. - < 0.80: Retrieval is unstable — reissues see substantially different results, suggesting either embed nondeterminism (Ollama returning slightly different vectors) or vectord nondeterminism (HNSW insertion order affecting recall). --- ## Learning — handover hit rate Bob takes Alice's contract using Alice's playbook namespace. Did Alice's recorded answers surface in Bob's results? | Metric | Value | |---|---:| | Verbatim handover queries run | ${hand_run} | | Alice's recorded answer at Bob's top-1 (verbatim) | ${hand_top1} | | Alice's recorded answer in Bob's top-K (verbatim) | ${hand_topk} | | **Verbatim handover hit rate (top-1)** | **${hand_rate}** | | Paraphrase handover queries run | ${ph_run} | | Alice's recorded answer at Bob's top-1 (paraphrase) | ${ph_top1} | | Alice's recorded answer in Bob's top-K (paraphrase) | ${ph_topk} | | **Paraphrase handover hit rate (top-1)** | **${ph_rate}** | **Interpretation:** - Verbatim hit rate ≈ 1.0: trivial case — Bob runs identical queries; should always hit. - Paraphrase hit rate ≥ 0.5: institutional memory survives wording change — the harder learning property. - Paraphrase hit rate ≈ 0.0: Bob's paraphrases drift past the inject threshold, so Alice's recordings don't activate. Same caveat as the playbook_lift paraphrase pass. --- ## Per-event capture All matrix.search responses live in the JSON — top-K with worker IDs, distances, and per-corpus counts. Search by phase: \`\`\`bash jq '.events[] | select(.phase == "merge")' ${OUT_JSON} jq '.events[] | select(.coordinator == "alice" and .phase == "baseline")' ${OUT_JSON} jq '.events[] | select(.role == "warehouse worker") | {phase, contract, top_k_ids: [.top_k[].id]}' ${OUT_JSON} \`\`\` --- ## What's NOT in this run (Phase 1 deliberately defers) - **48-hour clock.** Events fire as discrete steps, not on a timeline. - **Email / SMS ingest.** No endpoints exist on the Go side yet. - **New-resume injection mid-run.** The corpus is fixed at the start. - **Langfuse traces.** Need Go-side wiring. These are Phase 2/3. The Phase 1 substrate is what the time-based runner will mount on top of. MDEOF echo echo "[stress] DONE" echo "[stress] evidence: $OUT_JSON" echo "[stress] report: $OUT_MD"