Pure-Go port of mode.rs::execute's pass5 downgrade gate (Rust
2026-04-26). Adds POST /v1/matrix/downgrade endpoint via matrixd.
The gate captures the pass5 finding: composing matrix corpora into
codereview_lakehouse on a strong model LOST 5/5 head-to-head reps
against matrix-free codereview_isolation on grok-4.1-fast (p=0.031).
Strong models have enough native capacity that bug fingerprints +
adversarial framing + file content carry them; matrix chunks
displace depth-of-analysis.
Logic (matches Rust mode.rs:614-632):
if mode == codereview_lakehouse
&& !forced_mode
&& !LH_FORCE_FULL_ENRICHMENT
&& !is_weak_model(model)
→ flip to codereview_isolation, record downgraded_from
is_weak_model captures the empirical weak-list:
- `:free` suffix or `:free/` infix (OpenRouter free tier)
- qwen3.5:latest, qwen3:latest (local last-resort rungs)
- everything else → strong by default
Tests:
- 3 unit tests in internal/matrix/downgrade_test.go: IsWeakModel
coverage, MaybeDowngrade truth table (5 rows), forced-mode
precedence (forced beats every other bypass)
- scripts/downgrade_smoke.sh: 6 assertions through gateway covering
all 5 truth-table rows + empty-mode 400
14-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance, downgrade).
SPEC §3.4 progress: 4 of 5 components shipped (corpus builders,
multi-corpus retrieve+merge, relevance filter, downgrade gate).
Last component is learning-loop integration.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
101 lines
3.2 KiB
Go
101 lines
3.2 KiB
Go
package matrix
|
|
|
|
import "testing"
|
|
|
|
func TestIsWeakModel(t *testing.T) {
|
|
cases := []struct {
|
|
model string
|
|
weak bool
|
|
}{
|
|
// :free suffix → weak
|
|
{"openai/gpt-4o:free", true},
|
|
{"meta-llama/llama-3-8b:free", true},
|
|
// :free/ infix (routing-prefixed names)
|
|
{"openrouter:free/anthropic/claude-3.5-sonnet", true},
|
|
// Local last-resort rungs
|
|
{"qwen3.5:latest", true},
|
|
{"qwen3:latest", true},
|
|
// Strong by default
|
|
{"x-ai/grok-4.1-fast", false},
|
|
{"opencode/claude-opus-4-7", false},
|
|
{"openai/gpt-5", false},
|
|
{"qwen3-coder:480b", false}, // not the :latest tag
|
|
{"", false},
|
|
}
|
|
for _, c := range cases {
|
|
got := IsWeakModel(c.model)
|
|
if got != c.weak {
|
|
t.Errorf("IsWeakModel(%q): want %v, got %v", c.model, c.weak, got)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMaybeDowngrade_TruthTable(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
in DowngradeInput
|
|
want DowngradeDecision
|
|
}{
|
|
{
|
|
name: "downgrade fires: lakehouse mode + strong model + no force",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast"},
|
|
want: DowngradeDecision{
|
|
Mode: ModeCodeReviewIsolation,
|
|
DowngradedFrom: ModeCodeReviewLakehouse,
|
|
},
|
|
},
|
|
{
|
|
name: "no downgrade: forced mode bypasses gate",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForcedMode: true},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: env override bypasses gate",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForceFullOverride: true},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: weak model keeps lakehouse",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "openai/gpt-4o:free"},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: non-lakehouse mode (gate not applicable)",
|
|
in: DowngradeInput{Mode: "codereview_isolation", Model: "x-ai/grok-4.1-fast"},
|
|
want: DowngradeDecision{Mode: "codereview_isolation"},
|
|
},
|
|
}
|
|
for _, c := range cases {
|
|
got := MaybeDowngrade(c.in)
|
|
if got.Mode != c.want.Mode {
|
|
t.Errorf("%s: Mode want %q, got %q", c.name, c.want.Mode, got.Mode)
|
|
}
|
|
if got.DowngradedFrom != c.want.DowngradedFrom {
|
|
t.Errorf("%s: DowngradedFrom want %q, got %q", c.name, c.want.DowngradedFrom, got.DowngradedFrom)
|
|
}
|
|
if got.Reason == "" {
|
|
t.Errorf("%s: Reason should be non-empty", c.name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
|
|
// multiple bypass conditions hit, ForcedMode wins (explicit caller
|
|
// intent always overrides). Caught a subtle ordering bug in the
|
|
// original Rust code where this was tested only by happy path.
|
|
func TestMaybeDowngrade_ForcedTrumpsOthers(t *testing.T) {
|
|
in := DowngradeInput{
|
|
Mode: ModeCodeReviewLakehouse,
|
|
Model: "qwen3.5:latest", // weak — would otherwise hit weak-bypass
|
|
ForcedMode: true,
|
|
ForceFullOverride: true,
|
|
}
|
|
got := MaybeDowngrade(in)
|
|
if got.Mode != ModeCodeReviewLakehouse {
|
|
t.Errorf("forced mode should keep mode: got %q", got.Mode)
|
|
}
|
|
if got.DowngradedFrom != "" {
|
|
t.Errorf("no downgrade expected; got DowngradedFrom=%q", got.DowngradedFrom)
|
|
}
|
|
}
|