migrate the strong-model auto-downgrade gate from a hardcoded weak list to cfg.Models.WeakModels. backward compatible: existing API preserved, callers that don't migrate keep using DefaultWeakModels. changes: - internal/matrix/downgrade.go: split IsWeakModel into rule-based base (`:free` suffix/infix) + literal-list lookup. New IsWeakModelInList(model, list) takes the config-supplied list. DowngradeInput grows a WeakModels field; nil falls back to DefaultWeakModels (preserves pre-phase-2 behavior). - internal/workflow/modes.go: add MatrixDowngradeWithWeakList(list) factory mirroring MatrixSearch's pattern. Plain MatrixDowngrade kept for backward compat. - cmd/matrixd/main.go: handlers struct holds weakModels populated from cfg.Models.WeakModels at startup; handleDowngrade threads it into every DowngradeInput. - cmd/observerd/main.go: registerBuiltinModes accepts weakModels and uses the factory variant. observerd reads cfg.Models.WeakModels in main(). end-to-end verified: downgrade + matrix + observer + workflow smokes all pass. Existing TestMaybeDowngrade_TruthTable + TestIsWeakModel unchanged (backward compat). Two new tests cover the config path: - TestIsWeakModelInList — covers rule + literal + empty + nil - TestMaybeDowngrade_WithConfigList — verifies cfg list overrides default Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
163 lines
5.5 KiB
Go
163 lines
5.5 KiB
Go
package matrix
|
|
|
|
import "testing"
|
|
|
|
func TestIsWeakModel(t *testing.T) {
|
|
cases := []struct {
|
|
model string
|
|
weak bool
|
|
}{
|
|
// :free suffix → weak
|
|
{"openai/gpt-4o:free", true},
|
|
{"meta-llama/llama-3-8b:free", true},
|
|
// :free/ infix (routing-prefixed names)
|
|
{"openrouter:free/anthropic/claude-3.5-sonnet", true},
|
|
// Local last-resort rungs
|
|
{"qwen3.5:latest", true},
|
|
{"qwen3:latest", true},
|
|
// Strong by default
|
|
{"x-ai/grok-4.1-fast", false},
|
|
{"opencode/claude-opus-4-7", false},
|
|
{"openai/gpt-5", false},
|
|
{"qwen3-coder:480b", false}, // not the :latest tag
|
|
{"", false},
|
|
}
|
|
for _, c := range cases {
|
|
got := IsWeakModel(c.model)
|
|
if got != c.weak {
|
|
t.Errorf("IsWeakModel(%q): want %v, got %v", c.model, c.weak, got)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMaybeDowngrade_TruthTable(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
in DowngradeInput
|
|
want DowngradeDecision
|
|
}{
|
|
{
|
|
name: "downgrade fires: lakehouse mode + strong model + no force",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast"},
|
|
want: DowngradeDecision{
|
|
Mode: ModeCodeReviewIsolation,
|
|
DowngradedFrom: ModeCodeReviewLakehouse,
|
|
},
|
|
},
|
|
{
|
|
name: "no downgrade: forced mode bypasses gate",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForcedMode: true},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: env override bypasses gate",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForceFullOverride: true},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: weak model keeps lakehouse",
|
|
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "openai/gpt-4o:free"},
|
|
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
|
|
},
|
|
{
|
|
name: "no downgrade: non-lakehouse mode (gate not applicable)",
|
|
in: DowngradeInput{Mode: "codereview_isolation", Model: "x-ai/grok-4.1-fast"},
|
|
want: DowngradeDecision{Mode: "codereview_isolation"},
|
|
},
|
|
}
|
|
for _, c := range cases {
|
|
got := MaybeDowngrade(c.in)
|
|
if got.Mode != c.want.Mode {
|
|
t.Errorf("%s: Mode want %q, got %q", c.name, c.want.Mode, got.Mode)
|
|
}
|
|
if got.DowngradedFrom != c.want.DowngradedFrom {
|
|
t.Errorf("%s: DowngradedFrom want %q, got %q", c.name, c.want.DowngradedFrom, got.DowngradedFrom)
|
|
}
|
|
if got.Reason == "" {
|
|
t.Errorf("%s: Reason should be non-empty", c.name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestIsWeakModelInList covers the config-driven variant added in
|
|
// Phase 2 — callers pass cfg.Models.WeakModels instead of relying on
|
|
// the package-level DefaultWeakModels.
|
|
func TestIsWeakModelInList(t *testing.T) {
|
|
custom := []string{"my-custom-7b:latest", "qwen3:latest"}
|
|
cases := []struct {
|
|
model string
|
|
list []string
|
|
weak bool
|
|
}{
|
|
// Custom literal hits (config-driven)
|
|
{"my-custom-7b:latest", custom, true},
|
|
{"qwen3:latest", custom, true},
|
|
// qwen3.5:latest is in DefaultWeakModels but NOT in custom list →
|
|
// not weak when caller supplies custom list (intentional — operator
|
|
// owns the list).
|
|
{"qwen3.5:latest", custom, false},
|
|
// :free/free-tier rules apply regardless of list
|
|
{"openai/gpt-4o:free", custom, true},
|
|
{"openrouter:free/anthropic/claude", custom, true},
|
|
// Empty list → only rule-based checks apply
|
|
{"qwen3.5:latest", []string{}, false},
|
|
{"openai/gpt-4o:free", []string{}, true},
|
|
// nil list behaves like empty (caller hasn't migrated yet)
|
|
{"qwen3.5:latest", nil, false},
|
|
}
|
|
for _, c := range cases {
|
|
got := IsWeakModelInList(c.model, c.list)
|
|
if got != c.weak {
|
|
t.Errorf("IsWeakModelInList(%q, %v): want %v, got %v", c.model, c.list, c.weak, got)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestMaybeDowngrade_WithConfigList verifies the DowngradeInput.WeakModels
|
|
// field overrides DefaultWeakModels when populated, so callers reading
|
|
// cfg.Models.WeakModels at startup get the configured behavior end-to-end.
|
|
func TestMaybeDowngrade_WithConfigList(t *testing.T) {
|
|
// Config says "only my-custom is weak". qwen3.5:latest is now strong
|
|
// (operator opted out of the local-hot-path treatment).
|
|
configList := []string{"my-custom:latest"}
|
|
in := DowngradeInput{
|
|
Mode: ModeCodeReviewLakehouse,
|
|
Model: "qwen3.5:latest",
|
|
WeakModels: configList,
|
|
}
|
|
got := MaybeDowngrade(in)
|
|
if got.Mode != ModeCodeReviewIsolation {
|
|
t.Errorf("qwen3.5:latest with config list excluding it should downgrade; got Mode=%q", got.Mode)
|
|
}
|
|
if got.DowngradedFrom != ModeCodeReviewLakehouse {
|
|
t.Errorf("DowngradedFrom should be lakehouse; got %q", got.DowngradedFrom)
|
|
}
|
|
|
|
// Same input but weak by config — gate must NOT fire.
|
|
in.Model = "my-custom:latest"
|
|
got = MaybeDowngrade(in)
|
|
if got.Mode != ModeCodeReviewLakehouse {
|
|
t.Errorf("my-custom:latest is in config list — should stay lakehouse; got %q", got.Mode)
|
|
}
|
|
}
|
|
|
|
// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
|
|
// multiple bypass conditions hit, ForcedMode wins (explicit caller
|
|
// intent always overrides). Caught a subtle ordering bug in the
|
|
// original Rust code where this was tested only by happy path.
|
|
func TestMaybeDowngrade_ForcedTrumpsOthers(t *testing.T) {
|
|
in := DowngradeInput{
|
|
Mode: ModeCodeReviewLakehouse,
|
|
Model: "qwen3.5:latest", // weak — would otherwise hit weak-bypass
|
|
ForcedMode: true,
|
|
ForceFullOverride: true,
|
|
}
|
|
got := MaybeDowngrade(in)
|
|
if got.Mode != ModeCodeReviewLakehouse {
|
|
t.Errorf("forced mode should keep mode: got %q", got.Mode)
|
|
}
|
|
if got.DowngradedFrom != "" {
|
|
t.Errorf("no downgrade expected; got DowngradedFrom=%q", got.DowngradedFrom)
|
|
}
|
|
}
|