phase 2: matrix.downgrade reads WeakModels from config

migrate the strong-model auto-downgrade gate from a hardcoded weak list to cfg.Models.WeakModels. backward compatible: existing API preserved, callers that don't migrate keep using DefaultWeakModels. changes: - internal/matrix/downgrade.go: split IsWeakModel into rule-based base (`:free` suffix/infix) + literal-list lookup. New IsWeakModelInList(model, list) takes the config-supplied list. DowngradeInput grows a WeakModels field; nil falls back to DefaultWeakModels (preserves pre-phase-2 behavior). - internal/workflow/modes.go: add MatrixDowngradeWithWeakList(list) factory mirroring MatrixSearch's pattern. Plain MatrixDowngrade kept for backward compat. - cmd/matrixd/main.go: handlers struct holds weakModels populated from cfg.Models.WeakModels at startup; handleDowngrade threads it into every DowngradeInput. - cmd/observerd/main.go: registerBuiltinModes accepts weakModels and uses the factory variant. observerd reads cfg.Models.WeakModels in main(). end-to-end verified: downgrade + matrix + observer + workflow smokes all pass. Existing TestMaybeDowngrade_TruthTable + TestIsWeakModel unchanged (backward compat). Two new tests cover the config path: - TestIsWeakModelInList — covers rule + literal + empty + nil - TestMaybeDowngrade_WithConfigList — verifies cfg list overrides default Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 23:52:18 -05:00 · 2026-04-29 23:52:18 -05:00 · 622e124b8f
commit 622e124b8f
parent ec1d031996
5 changed files with 142 additions and 11 deletions
--- a/cmd/matrixd/main.go
+++ b/cmd/matrixd/main.go
@ -52,7 +52,7 @@ func main() {
 	}

 	retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
-	h := &handlers{r: retriever}
+	h := &handlers{r: retriever, weakModels: cfg.Models.WeakModels}

 	if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
 		slog.Error("server", "err", err)
@ -62,6 +62,11 @@ func main() {

 type handlers struct {
 	r *matrix.Retriever
+	// weakModels comes from cfg.Models.WeakModels at startup. Threaded
+	// into every DowngradeInput so the gate uses the configured list
+	// instead of matrix.DefaultWeakModels. nil/empty falls back to
+	// the package default — matches pre-Phase 2 behavior.
+	weakModels []string
 }

 func (h *handlers) register(r chi.Router) {
@ -240,6 +245,7 @@ func (h *handlers) handleDowngrade(w http.ResponseWriter, r *http.Request) {
 		// without env pollution.
 		in.ForceFullOverride = *req.ForceFullOverride
 	}
+	in.WeakModels = h.weakModels
 	writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
 }

--- a/cmd/observerd/main.go
+++ b/cmd/observerd/main.go
@ -74,7 +74,7 @@ func main() {
 	// to gateway's matrixd_url so a single-toml deploy works without
 	// duplicating the address.
 	matrixdURL := cfg.Gateway.MatrixdURL
-	registerBuiltinModes(runner, matrixdURL)
+	registerBuiltinModes(runner, matrixdURL, cfg.Models.WeakModels)

 	h := &handlers{store: store, runner: runner}
 	if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
@ -204,7 +204,7 @@ func summarizeOutput(output map[string]any) string {
 //   - playbook.record (HTTP to matrixd)
 //   - playbook.lookup (HTTP to matrixd)
 //   - llm.chat (HTTP to gateway /v1/chat)
-func registerBuiltinModes(r *workflow.Runner, matrixdURL string) {
+func registerBuiltinModes(r *workflow.Runner, matrixdURL string, weakModels []string) {
 	// Fixture modes for runner mechanics smokes.
 	r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
 		out := make(map[string]any, len(input))
@ -220,7 +220,10 @@ func registerBuiltinModes(r *workflow.Runner, matrixdURL string) {

 	// Real modes — pure-function wrappers (no I/O).
 	r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
-	r.RegisterMode("matrix.downgrade", workflow.MatrixDowngrade)
+	// matrix.downgrade reads weakModels from config — Phase 2.
+	// nil/empty falls back to matrix.DefaultWeakModels per the
+	// MatrixDowngradeWithWeakList factory contract.
+	r.RegisterMode("matrix.downgrade", workflow.MatrixDowngradeWithWeakList(weakModels))
 	r.RegisterMode("distillation.score", workflow.DistillationScore)
 	r.RegisterMode("drift.scorer", workflow.DriftScorer)

--- a/internal/matrix/downgrade.go
+++ b/internal/matrix/downgrade.go
@ -37,6 +37,12 @@ const (
 // diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true").
 const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"

+// DefaultWeakModels is the fallback weak-model list used when no
+// config-driven list is provided. Matches the pre-config hardcoded
+// switch — preserved for backward compatibility with callers that
+// haven't migrated to the [models] tier config (Phase 1).
+var DefaultWeakModels = []string{"qwen3.5:latest", "qwen3:latest"}
+
 // IsWeakModel returns true for models matrix-corpus composition
 // demonstrably helped during the 2026-04-26 pass5 bake-off. Strong
 // models (default) get matrix dropped to avoid the "composed lost
@ -45,17 +51,29 @@ const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
 // Weak signals:
 //   - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`)
 //   - `:free/` infix (handles routing-prefixed names like `or:free/x`)
-//   - `qwen3.5:latest` / `qwen3:latest` — local last-resort rung
+//   - DefaultWeakModels list — local-hot-path eligible.
 //
-// Add new weak models by extending this function alongside variance
-// data that justifies it.
+// For config-driven lists, use IsWeakModelInList.
 func IsWeakModel(model string) bool {
+	return IsWeakModelInList(model, DefaultWeakModels)
+}
+
+// IsWeakModelInList is the config-aware variant. Callers pass the
+// configured weak-model list (typically cfg.Models.WeakModels) and
+// the function applies the same rule-based checks (`:free` suffix /
+// infix) plus the literal list match.
+//
+// Empty list still applies the rule-based checks — only the literal
+// match becomes a no-op. That preserves "free-tier always weak"
+// regardless of what the operator configured.
+func IsWeakModelInList(model string, weakList []string) bool {
 	if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") {
 		return true
 	}
-	switch model {
-	case "qwen3.5:latest", "qwen3:latest":
-		return true
+	for _, w := range weakList {
+		if w == model {
+			return true
+		}
 	}
 	return false
 }
@ -75,6 +93,10 @@ type DowngradeInput struct {
 	Model             string
 	ForcedMode        bool
 	ForceFullOverride bool
+	// WeakModels overrides the default weak-model list (DefaultWeakModels)
+	// when non-nil. Typically populated from cfg.Models.WeakModels at
+	// startup. nil = use DefaultWeakModels (backward compat).
+	WeakModels []string
 }

 // DowngradeDecision is the output. DowngradedFrom is non-empty
@ -106,7 +128,11 @@ func MaybeDowngrade(in DowngradeInput) DowngradeDecision {
 		out.Reason = EnvForceFullEnrichment + " bypass"
 		return out
 	}
-	if IsWeakModel(in.Model) {
+	weakList := in.WeakModels
+	if weakList == nil {
+		weakList = DefaultWeakModels
+	}
+	if IsWeakModelInList(in.Model, weakList) {
 		out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)"
 		return out
 	}
--- a/internal/matrix/downgrade_test.go
+++ b/internal/matrix/downgrade_test.go
@ -79,6 +79,68 @@ func TestMaybeDowngrade_TruthTable(t *testing.T) {
 	}
 }

+// TestIsWeakModelInList covers the config-driven variant added in
+// Phase 2 — callers pass cfg.Models.WeakModels instead of relying on
+// the package-level DefaultWeakModels.
+func TestIsWeakModelInList(t *testing.T) {
+	custom := []string{"my-custom-7b:latest", "qwen3:latest"}
+	cases := []struct {
+		model string
+		list  []string
+		weak  bool
+	}{
+		// Custom literal hits (config-driven)
+		{"my-custom-7b:latest", custom, true},
+		{"qwen3:latest", custom, true},
+		// qwen3.5:latest is in DefaultWeakModels but NOT in custom list →
+		// not weak when caller supplies custom list (intentional — operator
+		// owns the list).
+		{"qwen3.5:latest", custom, false},
+		// :free/free-tier rules apply regardless of list
+		{"openai/gpt-4o:free", custom, true},
+		{"openrouter:free/anthropic/claude", custom, true},
+		// Empty list → only rule-based checks apply
+		{"qwen3.5:latest", []string{}, false},
+		{"openai/gpt-4o:free", []string{}, true},
+		// nil list behaves like empty (caller hasn't migrated yet)
+		{"qwen3.5:latest", nil, false},
+	}
+	for _, c := range cases {
+		got := IsWeakModelInList(c.model, c.list)
+		if got != c.weak {
+			t.Errorf("IsWeakModelInList(%q, %v): want %v, got %v", c.model, c.list, c.weak, got)
+		}
+	}
+}
+
+// TestMaybeDowngrade_WithConfigList verifies the DowngradeInput.WeakModels
+// field overrides DefaultWeakModels when populated, so callers reading
+// cfg.Models.WeakModels at startup get the configured behavior end-to-end.
+func TestMaybeDowngrade_WithConfigList(t *testing.T) {
+	// Config says "only my-custom is weak". qwen3.5:latest is now strong
+	// (operator opted out of the local-hot-path treatment).
+	configList := []string{"my-custom:latest"}
+	in := DowngradeInput{
+		Mode:       ModeCodeReviewLakehouse,
+		Model:      "qwen3.5:latest",
+		WeakModels: configList,
+	}
+	got := MaybeDowngrade(in)
+	if got.Mode != ModeCodeReviewIsolation {
+		t.Errorf("qwen3.5:latest with config list excluding it should downgrade; got Mode=%q", got.Mode)
+	}
+	if got.DowngradedFrom != ModeCodeReviewLakehouse {
+		t.Errorf("DowngradedFrom should be lakehouse; got %q", got.DowngradedFrom)
+	}
+
+	// Same input but weak by config — gate must NOT fire.
+	in.Model = "my-custom:latest"
+	got = MaybeDowngrade(in)
+	if got.Mode != ModeCodeReviewLakehouse {
+		t.Errorf("my-custom:latest is in config list — should stay lakehouse; got %q", got.Mode)
+	}
+}
+
 // TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
 // multiple bypass conditions hit, ForcedMode wins (explicit caller
 // intent always overrides). Caught a subtle ordering bug in the
--- a/internal/workflow/modes.go
+++ b/internal/workflow/modes.go
@ -96,6 +96,40 @@ func MatrixDowngrade(_ Context, input map[string]any) (map[string]any, error) {
 	}, nil
 }

+// MatrixDowngradeWithWeakList is the config-driven variant of
+// MatrixDowngrade — callers pass cfg.Models.WeakModels at startup
+// and the closure includes that list in every DowngradeInput.
+// nil/empty list falls back to matrix.DefaultWeakModels (matching the
+// plain MatrixDowngrade behavior).
+func MatrixDowngradeWithWeakList(weakModels []string) Mode {
+	return func(_ Context, input map[string]any) (map[string]any, error) {
+		var req struct {
+			Mode              string `json:"mode"`
+			Model             string `json:"model"`
+			ForcedMode        bool   `json:"forced_mode"`
+			ForceFullOverride bool   `json:"force_full_override"`
+		}
+		if err := remarshalInput(input, &req); err != nil {
+			return nil, fmt.Errorf("matrix.downgrade: %w", err)
+		}
+		if req.Mode == "" || req.Model == "" {
+			return nil, fmt.Errorf("matrix.downgrade: mode and model are required")
+		}
+		dec := matrix.MaybeDowngrade(matrix.DowngradeInput{
+			Mode:              req.Mode,
+			Model:             req.Model,
+			ForcedMode:        req.ForcedMode,
+			ForceFullOverride: req.ForceFullOverride,
+			WeakModels:        weakModels,
+		})
+		return map[string]any{
+			"mode":            dec.Mode,
+			"downgraded_from": dec.DowngradedFrom,
+			"reason":          dec.Reason,
+		}, nil
+	}
+}
+
 // DistillationScore wraps distillation.ScoreRecord — re-runs the
 // scorer over a single EvidenceRecord. Useful as a workflow node
 // that grades a freshly-produced evidence row.