From f971e647456557e835e3ab6a97d372024573112a Mon Sep 17 00:00:00 2001 From: root Date: Thu, 30 Apr 2026 17:37:20 -0500 Subject: [PATCH] g2_smoke: accept nomic-embed-text* family members as default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-push hook caught the regression — the smoke hardcoded MODEL = "nomic-embed-text" and the bump to nomic-embed-text-v2-moe in 4da32ad failed the gate. Fix: glob-match the family prefix (nomic-embed-text*). Both v1 and v2-moe are 768d drop-ins; the property the smoke is locking is dim + distinct-vectors, not the exact model variant. Operators swap the variant in lakehouse.toml without needing to touch the smoke. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/g2_smoke.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/g2_smoke.sh b/scripts/g2_smoke.sh index cc28ab6..f45c12b 100755 --- a/scripts/g2_smoke.sh +++ b/scripts/g2_smoke.sh @@ -76,8 +76,14 @@ DIM="$(echo "$RESP" | jq -r '.dimension')" N="$(echo "$RESP" | jq -r '.vectors | length')" MODEL="$(echo "$RESP" | jq -r '.model')" SAME="$(echo "$RESP" | jq -r '.vectors[0][0] == .vectors[1][0]')" -if [ "$DIM" = "768" ] && [ "$N" = "2" ] && [ "$MODEL" = "nomic-embed-text" ] && [ "$SAME" = "false" ]; then - echo " ✓ dim=768, model=nomic-embed-text, 2 distinct vectors" +# Accept any nomic-embed-text* family member as the default — v1 +# (137M, 768d) and v2-moe (475M MoE, 768d) are both supported drop-ins. +# The smoke locks the dimension + the distinct-vectors property, NOT +# the exact model name (operators bump the model in lakehouse.toml +# without changing this smoke). +case "$MODEL" in nomic-embed-text*) MODEL_OK=1 ;; *) MODEL_OK=0 ;; esac +if [ "$DIM" = "768" ] && [ "$N" = "2" ] && [ "$MODEL_OK" = "1" ] && [ "$SAME" = "false" ]; then + echo " ✓ dim=768, model=$MODEL, 2 distinct vectors" else echo " ✗ resp: dim=$DIM n=$N model=$MODEL same=$SAME"; FAILED=1 fi