#!/usr/bin/env bash # scripts/cutover/embed_parity.sh # # G5 cutover prep — first-flip probe on the cleanest endpoint. # # Brings up the Go embedd + gateway on :3216/:3110, then for a fixed # corpus of texts, hits both: # - Rust: POST localhost:3100/ai/embed {texts:[...], model:"nomic-embed-text"} # - Go: POST localhost:3110/v1/embed {texts:[...], model:"nomic-embed-text"} # and computes cosine similarity + L2 norm + max abs component delta. # # Verdict goes to reports/cutover/embed_parity_.md. # # IMPORTANT: model is forced to "nomic-embed-text" on both sides so # we isolate "is the gateway-plumbing equivalent?" from "is the # default model the same?" (Rust default = v1, Go default = v2-moe; # different models = different vectors by design). # # Why this is the first flip: vectors have a single trivially- # measurable parity invariant (cosine sim + L2 norm). Retrieve has # thousands of edge cases. If embed parity holds, all downstream # vector-using endpoints inherit confidence. If it doesn't, we catch # the issue in 30 seconds instead of after a flip. set -euo pipefail cd "$(dirname "$0")/../.." REPO="$(pwd)" DATE="$(date +%Y%m%d)" REPORT="reports/cutover/embed_parity_${DATE}.md" mkdir -p reports/cutover RUST_URL="${RUST_URL:-http://127.0.0.1:3100}" GO_URL="${GO_URL:-http://127.0.0.1:3110}" MODEL="${MODEL:-nomic-embed-text}" echo "[cutover] embed parity probe — Rust ${RUST_URL}/ai/embed vs Go ${GO_URL}/v1/embed" echo "[cutover] model forced to: ${MODEL}" # Verify Rust side is up before we bother launching Go. if ! curl -sSf -m 3 "${RUST_URL}/health" >/dev/null 2>&1; then echo "[cutover] Rust gateway not up at ${RUST_URL} — start lakehouse.service first" exit 1 fi # Anchored pkill — bin/(name)$ never matches /bin/ system tools # (per feedback_pkill_scope; took out MinIO once with a bare pattern). pkill -f "bin/(embedd|gateway)$" 2>/dev/null || true sleep 0.3 PIDS=() TMP="$(mktemp -d)" CFG="$TMP/cutover.toml" cleanup() { echo "[cutover] cleanup" for p in "${PIDS[@]:-}"; do [ -n "${p:-}" ] && kill "$p" 2>/dev/null || true; done rm -rf "$TMP" } trap cleanup EXIT INT TERM # Minimal config — only the two daemons under test. Other daemons # (storaged/catalogd/...) aren't required because the gateway proxies # lazily and we never hit a non-embed path. cat > "$CFG" </dev/null 2>&1; then return 0; fi sleep 0.1 done echo "[cutover] ${name} (port ${port}) failed to come up" return 1 } echo "[cutover] launching embedd + gateway..." ./bin/embedd -config "$CFG" > /tmp/cutover_embedd.log 2>&1 & PIDS+=($!) poll_health 3216 embedd ./bin/gateway -config "$CFG" > /tmp/cutover_gateway.log 2>&1 & PIDS+=($!) poll_health 3110 gateway # Sample corpus — short, medium, long, special chars, domain-flavored. SAMPLES=( "hello" "forklift operator with OSHA cert" "Need 5 production workers in Aurora IL for night shift starting Monday" "résumé: 12 yrs warehouse — pick/pack, RF scanner, pallet jack — bilingual" "Q: who's available next Friday? A: Bob, Carol, Dan." ) echo "[cutover] running ${#SAMPLES[@]} parity samples..." REPORT_TMP="$TMP/report.md" { echo "# Embed parity probe — $(date -Iminutes)" echo echo "Forced model: \`${MODEL}\` on both sides (isolates plumbing from" echo "default-model drift; Rust default = v1, Go default = v2-moe)." echo echo "| # | Sample (head) | Dim R/G | Cosine | L2 R | L2 G | Max\\|Δ\\| |" echo "|---|---|---|---|---|---|---|" } > "$REPORT_TMP" PASS=0 FAIL=0 i=0 for text in "${SAMPLES[@]}"; do i=$((i+1)) body=$(jq -nc --arg t "$text" --arg m "$MODEL" '{texts:[$t], model:$m}') rust_resp=$(curl -sS -m 30 -X POST "${RUST_URL}/ai/embed" \ -H 'content-type: application/json' --data "$body") go_resp=$(curl -sS -m 30 -X POST "${GO_URL}/v1/embed" \ -H 'content-type: application/json' --data "$body") # Hand to python3 for vector math — bash can't. result=$(python3 - < 0 and nb > 0 else 0.0 if rd != gd: print(f"DIM_MISMATCH|{rd}|{gd}|0.0|0.0|0.0|0.0") else: c = cos(rv, gv) nr = l2(rv) ng = l2(gv) md = max(abs(x - y) for x, y in zip(rv, gv)) print(f"OK|{rd}|{gd}|{c:.6f}|{nr:.6f}|{ng:.6f}|{md:.6f}") PYEOF ) status=$(echo "$result" | cut -d'|' -f1) rd=$(echo "$result" | cut -d'|' -f2) gd=$(echo "$result" | cut -d'|' -f3) cosv=$(echo "$result" | cut -d'|' -f4) l2r=$(echo "$result" | cut -d'|' -f5) l2g=$(echo "$result" | cut -d'|' -f6) maxd=$(echo "$result" | cut -d'|' -f7) head=$(echo "$text" | cut -c1-40) if [ "$status" = "OK" ] && [ "$(awk -v c="$cosv" 'BEGIN{print (c>=0.9990)?1:0}')" = "1" ]; then PASS=$((PASS+1)) verdict_row="✅" else FAIL=$((FAIL+1)) verdict_row="❌" fi echo "[cutover] sample $i: status=$status cos=$cosv ${verdict_row}" echo "| $i | \`$head\` | $rd / $gd | $cosv | $l2r | $l2g | $maxd |" >> "$REPORT_TMP" done { echo echo "## Verdict" echo if [ "$FAIL" -eq 0 ]; then echo "**PASS** — ${PASS}/${#SAMPLES[@]} samples ≥ 0.9990 cosine similarity. Gateway plumbing is at-parity for embed." echo echo "First-flip ready: nginx-side or Bun-side routing of \`/ai/embed\` to Go's \`/v1/embed\`" echo "(with the wire-format remap noted in §Drift below) is safe to attempt." else echo "**FAIL** — ${FAIL}/${#SAMPLES[@]} samples below 0.9990 cosine. Investigate before flipping." fi echo echo "## Drift notes" echo echo "- **URL prefix**: Rust uses \`/ai/embed\` (nested under \`/ai\`); Go uses \`/v1/embed\` (gateway strips \`/v1\` then forwards to embedd at \`:3216/embed\`)." echo "- **Wire format**: Rust returns \`{embeddings, model, dimensions}\` (plural); Go returns \`{vectors, model, dimension}\` (singular). A flip needs either a wire-shape adapter on the Go side, or callers updated to handle both shapes." echo "- **Default model**: Rust default = \`nomic-embed-text\` (v1, 137M); Go default = \`nomic-embed-text-v2-moe\` (v2 MoE, 475M). This probe forces v1 on both to isolate plumbing parity. The v2-moe upgrade is intentional and a separate dimension." echo echo "## Repro" echo echo "\`\`\`bash" echo "cd $(realpath .)" echo "./scripts/cutover/embed_parity.sh # default: model=nomic-embed-text" echo "MODEL=nomic-embed-text-v2-moe ./scripts/cutover/embed_parity.sh # measure embedder drift" echo "\`\`\`" } >> "$REPORT_TMP" cp "$REPORT_TMP" "$REPORT" echo "[cutover] report → $REPORT" echo echo "[cutover] verdict: ${PASS} pass / ${FAIL} fail (threshold cos ≥ 0.9990)" [ "$FAIL" -eq 0 ]