golangLAKEHOUSE/scripts/downgrade_smoke.sh
root 3968ec8a7b matrix: strong-model downgrade gate — SPEC §3.4 component 4 of 5
Pure-Go port of mode.rs::execute's pass5 downgrade gate (Rust
2026-04-26). Adds POST /v1/matrix/downgrade endpoint via matrixd.

The gate captures the pass5 finding: composing matrix corpora into
codereview_lakehouse on a strong model LOST 5/5 head-to-head reps
against matrix-free codereview_isolation on grok-4.1-fast (p=0.031).
Strong models have enough native capacity that bug fingerprints +
adversarial framing + file content carry them; matrix chunks
displace depth-of-analysis.

Logic (matches Rust mode.rs:614-632):
  if mode == codereview_lakehouse
     && !forced_mode
     && !LH_FORCE_FULL_ENRICHMENT
     && !is_weak_model(model)
  → flip to codereview_isolation, record downgraded_from

is_weak_model captures the empirical weak-list:
  - `:free` suffix or `:free/` infix (OpenRouter free tier)
  - qwen3.5:latest, qwen3:latest (local last-resort rungs)
  - everything else → strong by default

Tests:
  - 3 unit tests in internal/matrix/downgrade_test.go: IsWeakModel
    coverage, MaybeDowngrade truth table (5 rows), forced-mode
    precedence (forced beats every other bypass)
  - scripts/downgrade_smoke.sh: 6 assertions through gateway covering
    all 5 truth-table rows + empty-mode 400

14-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance, downgrade).

SPEC §3.4 progress: 4 of 5 components shipped (corpus builders,
multi-corpus retrieve+merge, relevance filter, downgrade gate).
Last component is learning-loop integration.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 19:17:55 -05:00

160 lines
5.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# Downgrade smoke — strong-model auto-downgrade gate via matrixd.
# All assertions go through gateway :3110 → /v1/matrix/downgrade.
#
# Validates the 5-row truth table from mode.rs::execute pass5:
# 1. Lakehouse + strong + no force → DOWNGRADE
# 2. Lakehouse + strong + forced_mode=true → keep
# 3. Lakehouse + strong + force_full_override → keep
# 4. Lakehouse + weak (qwen3.5:latest) → keep
# 5. Non-lakehouse mode → gate not applicable
# 6. Negative path: empty mode → 400
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[downgrade-smoke] building matrixd + vectord + gateway..."
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/downgrade.toml"
cleanup() {
echo "[downgrade-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[downgrade-smoke] launching vectord → matrixd → gateway..."
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; exit 1; }
FAILED=0
URL=http://127.0.0.1:3110/v1/matrix/downgrade
# Helper for body→{mode, downgraded_from} extraction.
post() {
curl -sS -X POST "$URL" -H 'Content-Type: application/json' -d "$1"
}
# ── 1. Downgrade fires ───────────────────────────────────────────
echo "[downgrade-smoke] strong model + no force → downgrade fires:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from')"
if [ "$M" = "codereview_isolation" ] && [ "$D" = "codereview_lakehouse" ]; then
echo " ✓ codereview_lakehouse → codereview_isolation (downgraded_from=lakehouse)"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 2. Forced mode bypasses ──────────────────────────────────────
echo "[downgrade-smoke] forced_mode=true bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","forced_mode":true}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ caller-forced mode preserved, no downgrade"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 3. force_full_override bypasses ──────────────────────────────
echo "[downgrade-smoke] force_full_override=true bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","force_full_override":true}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ env-override bypass, no downgrade"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 4. Weak model bypasses ───────────────────────────────────────
echo "[downgrade-smoke] weak model (qwen3.5:latest) bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"qwen3.5:latest"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ weak model keeps lakehouse"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 5. Non-lakehouse mode → gate not applicable ──────────────────
echo "[downgrade-smoke] non-lakehouse mode → gate not applicable:"
RESP="$(post '{"mode":"codereview_isolation","model":"x-ai/grok-4.1-fast"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
R="$(echo "$RESP" | jq -r '.reason')"
if [ "$M" = "codereview_isolation" ] && [ "$D" = "" ] && echo "$R" | grep -q "not applicable"; then
echo " ✓ codereview_isolation passes through unchanged"
else
echo " ✗ mode=$M downgraded_from=$D reason='$R'"; FAILED=1
fi
# ── 6. Negative: empty mode → 400 ────────────────────────────────
echo "[downgrade-smoke] empty mode → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST "$URL" \
-H 'Content-Type: application/json' -d '{"mode":"","model":"x"}')"
if [ "$HTTP" = "400" ]; then
echo " ✓ empty mode → 400"
else
echo " ✗ got $HTTP"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[downgrade-smoke] Downgrade gate acceptance: PASSED"
exit 0
else
echo "[downgrade-smoke] Downgrade gate acceptance: FAILED"
exit 1
fi