Pure-Go port of mode.rs::execute's pass5 downgrade gate (Rust
2026-04-26). Adds POST /v1/matrix/downgrade endpoint via matrixd.
The gate captures the pass5 finding: composing matrix corpora into
codereview_lakehouse on a strong model LOST 5/5 head-to-head reps
against matrix-free codereview_isolation on grok-4.1-fast (p=0.031).
Strong models have enough native capacity that bug fingerprints +
adversarial framing + file content carry them; matrix chunks
displace depth-of-analysis.
Logic (matches Rust mode.rs:614-632):
if mode == codereview_lakehouse
&& !forced_mode
&& !LH_FORCE_FULL_ENRICHMENT
&& !is_weak_model(model)
→ flip to codereview_isolation, record downgraded_from
is_weak_model captures the empirical weak-list:
- `:free` suffix or `:free/` infix (OpenRouter free tier)
- qwen3.5:latest, qwen3:latest (local last-resort rungs)
- everything else → strong by default
Tests:
- 3 unit tests in internal/matrix/downgrade_test.go: IsWeakModel
coverage, MaybeDowngrade truth table (5 rows), forced-mode
precedence (forced beats every other bypass)
- scripts/downgrade_smoke.sh: 6 assertions through gateway covering
all 5 truth-table rows + empty-mode 400
14-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance, downgrade).
SPEC §3.4 progress: 4 of 5 components shipped (corpus builders,
multi-corpus retrieve+merge, relevance filter, downgrade gate).
Last component is learning-loop integration.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
160 lines
5.9 KiB
Bash
Executable File
160 lines
5.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Downgrade smoke — strong-model auto-downgrade gate via matrixd.
|
|
# All assertions go through gateway :3110 → /v1/matrix/downgrade.
|
|
#
|
|
# Validates the 5-row truth table from mode.rs::execute pass5:
|
|
# 1. Lakehouse + strong + no force → DOWNGRADE
|
|
# 2. Lakehouse + strong + forced_mode=true → keep
|
|
# 3. Lakehouse + strong + force_full_override → keep
|
|
# 4. Lakehouse + weak (qwen3.5:latest) → keep
|
|
# 5. Non-lakehouse mode → gate not applicable
|
|
# 6. Negative path: empty mode → 400
|
|
|
|
set -euo pipefail
|
|
cd "$(dirname "$0")/.."
|
|
|
|
export PATH="$PATH:/usr/local/go/bin"
|
|
|
|
echo "[downgrade-smoke] building matrixd + vectord + gateway..."
|
|
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
|
|
|
|
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
|
|
sleep 0.3
|
|
|
|
PIDS=()
|
|
TMP="$(mktemp -d)"
|
|
CFG="$TMP/downgrade.toml"
|
|
|
|
cleanup() {
|
|
echo "[downgrade-smoke] cleanup"
|
|
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
|
|
rm -rf "$TMP"
|
|
}
|
|
trap cleanup EXIT INT TERM
|
|
|
|
cat > "$CFG" <<EOF
|
|
[gateway]
|
|
bind = "127.0.0.1:3110"
|
|
storaged_url = "http://127.0.0.1:3211"
|
|
catalogd_url = "http://127.0.0.1:3212"
|
|
ingestd_url = "http://127.0.0.1:3213"
|
|
queryd_url = "http://127.0.0.1:3214"
|
|
vectord_url = "http://127.0.0.1:3215"
|
|
embedd_url = "http://127.0.0.1:3216"
|
|
pathwayd_url = "http://127.0.0.1:3217"
|
|
matrixd_url = "http://127.0.0.1:3218"
|
|
|
|
[vectord]
|
|
bind = "127.0.0.1:3215"
|
|
storaged_url = ""
|
|
|
|
[matrixd]
|
|
bind = "127.0.0.1:3218"
|
|
embedd_url = "http://127.0.0.1:3216"
|
|
vectord_url = "http://127.0.0.1:3215"
|
|
EOF
|
|
|
|
poll_health() {
|
|
local port="$1" deadline=$(($(date +%s) + 5))
|
|
while [ "$(date +%s)" -lt "$deadline" ]; do
|
|
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
|
|
sleep 0.05
|
|
done
|
|
return 1
|
|
}
|
|
|
|
echo "[downgrade-smoke] launching vectord → matrixd → gateway..."
|
|
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
|
|
PIDS+=($!)
|
|
poll_health 3215 || { echo "vectord failed"; exit 1; }
|
|
|
|
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
|
|
PIDS+=($!)
|
|
poll_health 3218 || { echo "matrixd failed"; exit 1; }
|
|
|
|
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
|
|
PIDS+=($!)
|
|
poll_health 3110 || { echo "gateway failed"; exit 1; }
|
|
|
|
FAILED=0
|
|
URL=http://127.0.0.1:3110/v1/matrix/downgrade
|
|
|
|
# Helper for body→{mode, downgraded_from} extraction.
|
|
post() {
|
|
curl -sS -X POST "$URL" -H 'Content-Type: application/json' -d "$1"
|
|
}
|
|
|
|
# ── 1. Downgrade fires ───────────────────────────────────────────
|
|
echo "[downgrade-smoke] strong model + no force → downgrade fires:"
|
|
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast"}')"
|
|
M="$(echo "$RESP" | jq -r '.mode')"
|
|
D="$(echo "$RESP" | jq -r '.downgraded_from')"
|
|
if [ "$M" = "codereview_isolation" ] && [ "$D" = "codereview_lakehouse" ]; then
|
|
echo " ✓ codereview_lakehouse → codereview_isolation (downgraded_from=lakehouse)"
|
|
else
|
|
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
|
|
fi
|
|
|
|
# ── 2. Forced mode bypasses ──────────────────────────────────────
|
|
echo "[downgrade-smoke] forced_mode=true bypasses:"
|
|
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","forced_mode":true}')"
|
|
M="$(echo "$RESP" | jq -r '.mode')"
|
|
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
|
|
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
|
|
echo " ✓ caller-forced mode preserved, no downgrade"
|
|
else
|
|
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
|
|
fi
|
|
|
|
# ── 3. force_full_override bypasses ──────────────────────────────
|
|
echo "[downgrade-smoke] force_full_override=true bypasses:"
|
|
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","force_full_override":true}')"
|
|
M="$(echo "$RESP" | jq -r '.mode')"
|
|
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
|
|
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
|
|
echo " ✓ env-override bypass, no downgrade"
|
|
else
|
|
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
|
|
fi
|
|
|
|
# ── 4. Weak model bypasses ───────────────────────────────────────
|
|
echo "[downgrade-smoke] weak model (qwen3.5:latest) bypasses:"
|
|
RESP="$(post '{"mode":"codereview_lakehouse","model":"qwen3.5:latest"}')"
|
|
M="$(echo "$RESP" | jq -r '.mode')"
|
|
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
|
|
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
|
|
echo " ✓ weak model keeps lakehouse"
|
|
else
|
|
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
|
|
fi
|
|
|
|
# ── 5. Non-lakehouse mode → gate not applicable ──────────────────
|
|
echo "[downgrade-smoke] non-lakehouse mode → gate not applicable:"
|
|
RESP="$(post '{"mode":"codereview_isolation","model":"x-ai/grok-4.1-fast"}')"
|
|
M="$(echo "$RESP" | jq -r '.mode')"
|
|
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
|
|
R="$(echo "$RESP" | jq -r '.reason')"
|
|
if [ "$M" = "codereview_isolation" ] && [ "$D" = "" ] && echo "$R" | grep -q "not applicable"; then
|
|
echo " ✓ codereview_isolation passes through unchanged"
|
|
else
|
|
echo " ✗ mode=$M downgraded_from=$D reason='$R'"; FAILED=1
|
|
fi
|
|
|
|
# ── 6. Negative: empty mode → 400 ────────────────────────────────
|
|
echo "[downgrade-smoke] empty mode → 400:"
|
|
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST "$URL" \
|
|
-H 'Content-Type: application/json' -d '{"mode":"","model":"x"}')"
|
|
if [ "$HTTP" = "400" ]; then
|
|
echo " ✓ empty mode → 400"
|
|
else
|
|
echo " ✗ got $HTTP"; FAILED=1
|
|
fi
|
|
|
|
if [ "$FAILED" -eq 0 ]; then
|
|
echo "[downgrade-smoke] Downgrade gate acceptance: PASSED"
|
|
exit 0
|
|
else
|
|
echo "[downgrade-smoke] Downgrade gate acceptance: FAILED"
|
|
exit 1
|
|
fi
|