phase 2: matrix.downgrade reads WeakModels from config
migrate the strong-model auto-downgrade gate from a hardcoded weak list to cfg.Models.WeakModels. backward compatible: existing API preserved, callers that don't migrate keep using DefaultWeakModels. changes: - internal/matrix/downgrade.go: split IsWeakModel into rule-based base (`:free` suffix/infix) + literal-list lookup. New IsWeakModelInList(model, list) takes the config-supplied list. DowngradeInput grows a WeakModels field; nil falls back to DefaultWeakModels (preserves pre-phase-2 behavior). - internal/workflow/modes.go: add MatrixDowngradeWithWeakList(list) factory mirroring MatrixSearch's pattern. Plain MatrixDowngrade kept for backward compat. - cmd/matrixd/main.go: handlers struct holds weakModels populated from cfg.Models.WeakModels at startup; handleDowngrade threads it into every DowngradeInput. - cmd/observerd/main.go: registerBuiltinModes accepts weakModels and uses the factory variant. observerd reads cfg.Models.WeakModels in main(). end-to-end verified: downgrade + matrix + observer + workflow smokes all pass. Existing TestMaybeDowngrade_TruthTable + TestIsWeakModel unchanged (backward compat). Two new tests cover the config path: - TestIsWeakModelInList — covers rule + literal + empty + nil - TestMaybeDowngrade_WithConfigList — verifies cfg list overrides default Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ec1d031996
commit
622e124b8f
@ -52,7 +52,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
|
retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
|
||||||
h := &handlers{r: retriever}
|
h := &handlers{r: retriever, weakModels: cfg.Models.WeakModels}
|
||||||
|
|
||||||
if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
|
if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
|
||||||
slog.Error("server", "err", err)
|
slog.Error("server", "err", err)
|
||||||
@ -62,6 +62,11 @@ func main() {
|
|||||||
|
|
||||||
type handlers struct {
|
type handlers struct {
|
||||||
r *matrix.Retriever
|
r *matrix.Retriever
|
||||||
|
// weakModels comes from cfg.Models.WeakModels at startup. Threaded
|
||||||
|
// into every DowngradeInput so the gate uses the configured list
|
||||||
|
// instead of matrix.DefaultWeakModels. nil/empty falls back to
|
||||||
|
// the package default — matches pre-Phase 2 behavior.
|
||||||
|
weakModels []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handlers) register(r chi.Router) {
|
func (h *handlers) register(r chi.Router) {
|
||||||
@ -240,6 +245,7 @@ func (h *handlers) handleDowngrade(w http.ResponseWriter, r *http.Request) {
|
|||||||
// without env pollution.
|
// without env pollution.
|
||||||
in.ForceFullOverride = *req.ForceFullOverride
|
in.ForceFullOverride = *req.ForceFullOverride
|
||||||
}
|
}
|
||||||
|
in.WeakModels = h.weakModels
|
||||||
writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
|
writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -74,7 +74,7 @@ func main() {
|
|||||||
// to gateway's matrixd_url so a single-toml deploy works without
|
// to gateway's matrixd_url so a single-toml deploy works without
|
||||||
// duplicating the address.
|
// duplicating the address.
|
||||||
matrixdURL := cfg.Gateway.MatrixdURL
|
matrixdURL := cfg.Gateway.MatrixdURL
|
||||||
registerBuiltinModes(runner, matrixdURL)
|
registerBuiltinModes(runner, matrixdURL, cfg.Models.WeakModels)
|
||||||
|
|
||||||
h := &handlers{store: store, runner: runner}
|
h := &handlers{store: store, runner: runner}
|
||||||
if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
|
if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
|
||||||
@ -204,7 +204,7 @@ func summarizeOutput(output map[string]any) string {
|
|||||||
// - playbook.record (HTTP to matrixd)
|
// - playbook.record (HTTP to matrixd)
|
||||||
// - playbook.lookup (HTTP to matrixd)
|
// - playbook.lookup (HTTP to matrixd)
|
||||||
// - llm.chat (HTTP to gateway /v1/chat)
|
// - llm.chat (HTTP to gateway /v1/chat)
|
||||||
func registerBuiltinModes(r *workflow.Runner, matrixdURL string) {
|
func registerBuiltinModes(r *workflow.Runner, matrixdURL string, weakModels []string) {
|
||||||
// Fixture modes for runner mechanics smokes.
|
// Fixture modes for runner mechanics smokes.
|
||||||
r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
|
r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
|
||||||
out := make(map[string]any, len(input))
|
out := make(map[string]any, len(input))
|
||||||
@ -220,7 +220,10 @@ func registerBuiltinModes(r *workflow.Runner, matrixdURL string) {
|
|||||||
|
|
||||||
// Real modes — pure-function wrappers (no I/O).
|
// Real modes — pure-function wrappers (no I/O).
|
||||||
r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
|
r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
|
||||||
r.RegisterMode("matrix.downgrade", workflow.MatrixDowngrade)
|
// matrix.downgrade reads weakModels from config — Phase 2.
|
||||||
|
// nil/empty falls back to matrix.DefaultWeakModels per the
|
||||||
|
// MatrixDowngradeWithWeakList factory contract.
|
||||||
|
r.RegisterMode("matrix.downgrade", workflow.MatrixDowngradeWithWeakList(weakModels))
|
||||||
r.RegisterMode("distillation.score", workflow.DistillationScore)
|
r.RegisterMode("distillation.score", workflow.DistillationScore)
|
||||||
r.RegisterMode("drift.scorer", workflow.DriftScorer)
|
r.RegisterMode("drift.scorer", workflow.DriftScorer)
|
||||||
|
|
||||||
|
|||||||
@ -37,6 +37,12 @@ const (
|
|||||||
// diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true").
|
// diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true").
|
||||||
const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
|
const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
|
||||||
|
|
||||||
|
// DefaultWeakModels is the fallback weak-model list used when no
|
||||||
|
// config-driven list is provided. Matches the pre-config hardcoded
|
||||||
|
// switch — preserved for backward compatibility with callers that
|
||||||
|
// haven't migrated to the [models] tier config (Phase 1).
|
||||||
|
var DefaultWeakModels = []string{"qwen3.5:latest", "qwen3:latest"}
|
||||||
|
|
||||||
// IsWeakModel returns true for models matrix-corpus composition
|
// IsWeakModel returns true for models matrix-corpus composition
|
||||||
// demonstrably helped during the 2026-04-26 pass5 bake-off. Strong
|
// demonstrably helped during the 2026-04-26 pass5 bake-off. Strong
|
||||||
// models (default) get matrix dropped to avoid the "composed lost
|
// models (default) get matrix dropped to avoid the "composed lost
|
||||||
@ -45,18 +51,30 @@ const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
|
|||||||
// Weak signals:
|
// Weak signals:
|
||||||
// - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`)
|
// - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`)
|
||||||
// - `:free/` infix (handles routing-prefixed names like `or:free/x`)
|
// - `:free/` infix (handles routing-prefixed names like `or:free/x`)
|
||||||
// - `qwen3.5:latest` / `qwen3:latest` — local last-resort rung
|
// - DefaultWeakModels list — local-hot-path eligible.
|
||||||
//
|
//
|
||||||
// Add new weak models by extending this function alongside variance
|
// For config-driven lists, use IsWeakModelInList.
|
||||||
// data that justifies it.
|
|
||||||
func IsWeakModel(model string) bool {
|
func IsWeakModel(model string) bool {
|
||||||
|
return IsWeakModelInList(model, DefaultWeakModels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsWeakModelInList is the config-aware variant. Callers pass the
|
||||||
|
// configured weak-model list (typically cfg.Models.WeakModels) and
|
||||||
|
// the function applies the same rule-based checks (`:free` suffix /
|
||||||
|
// infix) plus the literal list match.
|
||||||
|
//
|
||||||
|
// Empty list still applies the rule-based checks — only the literal
|
||||||
|
// match becomes a no-op. That preserves "free-tier always weak"
|
||||||
|
// regardless of what the operator configured.
|
||||||
|
func IsWeakModelInList(model string, weakList []string) bool {
|
||||||
if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") {
|
if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
switch model {
|
for _, w := range weakList {
|
||||||
case "qwen3.5:latest", "qwen3:latest":
|
if w == model {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,6 +93,10 @@ type DowngradeInput struct {
|
|||||||
Model string
|
Model string
|
||||||
ForcedMode bool
|
ForcedMode bool
|
||||||
ForceFullOverride bool
|
ForceFullOverride bool
|
||||||
|
// WeakModels overrides the default weak-model list (DefaultWeakModels)
|
||||||
|
// when non-nil. Typically populated from cfg.Models.WeakModels at
|
||||||
|
// startup. nil = use DefaultWeakModels (backward compat).
|
||||||
|
WeakModels []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// DowngradeDecision is the output. DowngradedFrom is non-empty
|
// DowngradeDecision is the output. DowngradedFrom is non-empty
|
||||||
@ -106,7 +128,11 @@ func MaybeDowngrade(in DowngradeInput) DowngradeDecision {
|
|||||||
out.Reason = EnvForceFullEnrichment + " bypass"
|
out.Reason = EnvForceFullEnrichment + " bypass"
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
if IsWeakModel(in.Model) {
|
weakList := in.WeakModels
|
||||||
|
if weakList == nil {
|
||||||
|
weakList = DefaultWeakModels
|
||||||
|
}
|
||||||
|
if IsWeakModelInList(in.Model, weakList) {
|
||||||
out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)"
|
out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)"
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|||||||
@ -79,6 +79,68 @@ func TestMaybeDowngrade_TruthTable(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestIsWeakModelInList covers the config-driven variant added in
|
||||||
|
// Phase 2 — callers pass cfg.Models.WeakModels instead of relying on
|
||||||
|
// the package-level DefaultWeakModels.
|
||||||
|
func TestIsWeakModelInList(t *testing.T) {
|
||||||
|
custom := []string{"my-custom-7b:latest", "qwen3:latest"}
|
||||||
|
cases := []struct {
|
||||||
|
model string
|
||||||
|
list []string
|
||||||
|
weak bool
|
||||||
|
}{
|
||||||
|
// Custom literal hits (config-driven)
|
||||||
|
{"my-custom-7b:latest", custom, true},
|
||||||
|
{"qwen3:latest", custom, true},
|
||||||
|
// qwen3.5:latest is in DefaultWeakModels but NOT in custom list →
|
||||||
|
// not weak when caller supplies custom list (intentional — operator
|
||||||
|
// owns the list).
|
||||||
|
{"qwen3.5:latest", custom, false},
|
||||||
|
// :free/free-tier rules apply regardless of list
|
||||||
|
{"openai/gpt-4o:free", custom, true},
|
||||||
|
{"openrouter:free/anthropic/claude", custom, true},
|
||||||
|
// Empty list → only rule-based checks apply
|
||||||
|
{"qwen3.5:latest", []string{}, false},
|
||||||
|
{"openai/gpt-4o:free", []string{}, true},
|
||||||
|
// nil list behaves like empty (caller hasn't migrated yet)
|
||||||
|
{"qwen3.5:latest", nil, false},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
got := IsWeakModelInList(c.model, c.list)
|
||||||
|
if got != c.weak {
|
||||||
|
t.Errorf("IsWeakModelInList(%q, %v): want %v, got %v", c.model, c.list, c.weak, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMaybeDowngrade_WithConfigList verifies the DowngradeInput.WeakModels
|
||||||
|
// field overrides DefaultWeakModels when populated, so callers reading
|
||||||
|
// cfg.Models.WeakModels at startup get the configured behavior end-to-end.
|
||||||
|
func TestMaybeDowngrade_WithConfigList(t *testing.T) {
|
||||||
|
// Config says "only my-custom is weak". qwen3.5:latest is now strong
|
||||||
|
// (operator opted out of the local-hot-path treatment).
|
||||||
|
configList := []string{"my-custom:latest"}
|
||||||
|
in := DowngradeInput{
|
||||||
|
Mode: ModeCodeReviewLakehouse,
|
||||||
|
Model: "qwen3.5:latest",
|
||||||
|
WeakModels: configList,
|
||||||
|
}
|
||||||
|
got := MaybeDowngrade(in)
|
||||||
|
if got.Mode != ModeCodeReviewIsolation {
|
||||||
|
t.Errorf("qwen3.5:latest with config list excluding it should downgrade; got Mode=%q", got.Mode)
|
||||||
|
}
|
||||||
|
if got.DowngradedFrom != ModeCodeReviewLakehouse {
|
||||||
|
t.Errorf("DowngradedFrom should be lakehouse; got %q", got.DowngradedFrom)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same input but weak by config — gate must NOT fire.
|
||||||
|
in.Model = "my-custom:latest"
|
||||||
|
got = MaybeDowngrade(in)
|
||||||
|
if got.Mode != ModeCodeReviewLakehouse {
|
||||||
|
t.Errorf("my-custom:latest is in config list — should stay lakehouse; got %q", got.Mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
|
// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
|
||||||
// multiple bypass conditions hit, ForcedMode wins (explicit caller
|
// multiple bypass conditions hit, ForcedMode wins (explicit caller
|
||||||
// intent always overrides). Caught a subtle ordering bug in the
|
// intent always overrides). Caught a subtle ordering bug in the
|
||||||
|
|||||||
@ -96,6 +96,40 @@ func MatrixDowngrade(_ Context, input map[string]any) (map[string]any, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatrixDowngradeWithWeakList is the config-driven variant of
|
||||||
|
// MatrixDowngrade — callers pass cfg.Models.WeakModels at startup
|
||||||
|
// and the closure includes that list in every DowngradeInput.
|
||||||
|
// nil/empty list falls back to matrix.DefaultWeakModels (matching the
|
||||||
|
// plain MatrixDowngrade behavior).
|
||||||
|
func MatrixDowngradeWithWeakList(weakModels []string) Mode {
|
||||||
|
return func(_ Context, input map[string]any) (map[string]any, error) {
|
||||||
|
var req struct {
|
||||||
|
Mode string `json:"mode"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
ForcedMode bool `json:"forced_mode"`
|
||||||
|
ForceFullOverride bool `json:"force_full_override"`
|
||||||
|
}
|
||||||
|
if err := remarshalInput(input, &req); err != nil {
|
||||||
|
return nil, fmt.Errorf("matrix.downgrade: %w", err)
|
||||||
|
}
|
||||||
|
if req.Mode == "" || req.Model == "" {
|
||||||
|
return nil, fmt.Errorf("matrix.downgrade: mode and model are required")
|
||||||
|
}
|
||||||
|
dec := matrix.MaybeDowngrade(matrix.DowngradeInput{
|
||||||
|
Mode: req.Mode,
|
||||||
|
Model: req.Model,
|
||||||
|
ForcedMode: req.ForcedMode,
|
||||||
|
ForceFullOverride: req.ForceFullOverride,
|
||||||
|
WeakModels: weakModels,
|
||||||
|
})
|
||||||
|
return map[string]any{
|
||||||
|
"mode": dec.Mode,
|
||||||
|
"downgraded_from": dec.DowngradedFrom,
|
||||||
|
"reason": dec.Reason,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// DistillationScore wraps distillation.ScoreRecord — re-runs the
|
// DistillationScore wraps distillation.ScoreRecord — re-runs the
|
||||||
// scorer over a single EvidenceRecord. Useful as a workflow node
|
// scorer over a single EvidenceRecord. Useful as a workflow node
|
||||||
// that grades a freshly-produced evidence row.
|
// that grades a freshly-produced evidence row.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user