package matrix // Strong-model auto-downgrade gate. Port of mode.rs::execute's // downgrade block (Rust system, 2026-04-26 pass5). // // What it does: if the caller resolves `codereview_lakehouse` against // a strong model and didn't force the mode, flip to // `codereview_isolation` so we don't pollute the prompt with matrix // chunks the model demonstrably does better without. // // Why: pass5 variance test on x-ai/grok-4.1-fast — composing matrix // corpora into codereview_lakehouse LOST 5/5 head-to-head reps // against matrix-free codereview_isolation, p=0.031. Strong models // have enough native capacity that bug fingerprints + adversarial // framing + file content carry them; matrix chunks displace // depth-of-analysis. // // Defaults: assume "strong" (downgrade matrix off). The explicit // IsWeakModel predicate keeps the weak-list small — anything // matching `:free` (OpenRouter free tier) or the local last-resort // rungs (qwen3.5/qwen3) stays on the full lakehouse path where // matrix demonstrably helped during the 2026-04-26 free-tier // bake-off. import ( "os" "strings" ) // Mode constants — exported so callers don't string-literal them. const ( ModeCodeReviewLakehouse = "codereview_lakehouse" ModeCodeReviewIsolation = "codereview_isolation" ) // EnvForceFullEnrichment is the env var that bypasses the gate for // diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true"). const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT" // DefaultWeakModels is the fallback weak-model list used when no // config-driven list is provided. Matches the pre-config hardcoded // switch — preserved for backward compatibility with callers that // haven't migrated to the [models] tier config (Phase 1). var DefaultWeakModels = []string{"qwen3.5:latest", "qwen3:latest"} // IsWeakModel returns true for models matrix-corpus composition // demonstrably helped during the 2026-04-26 pass5 bake-off. Strong // models (default) get matrix dropped to avoid the "composed lost // 5/5 vs isolation" effect. // // Weak signals: // - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`) // - `:free/` infix (handles routing-prefixed names like `or:free/x`) // - DefaultWeakModels list — local-hot-path eligible. // // For config-driven lists, use IsWeakModelInList. func IsWeakModel(model string) bool { return IsWeakModelInList(model, DefaultWeakModels) } // IsWeakModelInList is the config-aware variant. Callers pass the // configured weak-model list (typically cfg.Models.WeakModels) and // the function applies the same rule-based checks (`:free` suffix / // infix) plus the literal list match. // // Empty list still applies the rule-based checks — only the literal // match becomes a no-op. That preserves "free-tier always weak" // regardless of what the operator configured. func IsWeakModelInList(model string, weakList []string) bool { if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") { return true } for _, w := range weakList { if w == model { return true } } return false } // DowngradeInput is what MaybeDowngrade evaluates. // // ForcedMode: caller explicitly set their mode (mirrors Rust's // req.force_mode.is_some()) — treated as opt-in to the chosen mode, // skips the downgrade. Experiments need exact-mode control. // // ForceFullOverride: the LH_FORCE_FULL_ENRICHMENT escape hatch — // usually populated from the env var via NewDowngradeInputFromEnv, // but the field is explicit so callers can pass it from a config or // test deterministically. type DowngradeInput struct { Mode string Model string ForcedMode bool ForceFullOverride bool // WeakModels overrides the default weak-model list (DefaultWeakModels) // when non-nil. Typically populated from cfg.Models.WeakModels at // startup. nil = use DefaultWeakModels (backward compat). WeakModels []string } // DowngradeDecision is the output. DowngradedFrom is non-empty // only when a downgrade fired — callers should record it for audit // (matches the Rust EnrichmentSources.downgraded_from field). // // Reason is a short human-readable string for logs/responses; // useful for debugging "why did/didn't the gate fire." type DowngradeDecision struct { Mode string `json:"mode"` DowngradedFrom string `json:"downgraded_from,omitempty"` Reason string `json:"reason"` } // MaybeDowngrade applies the strong-model auto-downgrade gate. // Pure function; no env reads. For env-driven callers see // NewDowngradeInputFromEnv. func MaybeDowngrade(in DowngradeInput) DowngradeDecision { out := DowngradeDecision{Mode: in.Mode} if in.Mode != ModeCodeReviewLakehouse { out.Reason = "mode is not " + ModeCodeReviewLakehouse + "; gate not applicable" return out } if in.ForcedMode { out.Reason = "caller forced mode; skip downgrade" return out } if in.ForceFullOverride { out.Reason = EnvForceFullEnrichment + " bypass" return out } weakList := in.WeakModels if weakList == nil { weakList = DefaultWeakModels } if IsWeakModelInList(in.Model, weakList) { out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)" return out } // Downgrade fires. out.Mode = ModeCodeReviewIsolation out.DowngradedFrom = ModeCodeReviewLakehouse out.Reason = "strong model; matrix composes anti-additively (pass5: composed lost 5/5 vs isolation on grok-4.1-fast, p=0.031)" return out } // NewDowngradeInputFromEnv is a convenience that reads // LH_FORCE_FULL_ENRICHMENT from the process environment and returns // a populated DowngradeInput. Most production callers want this; // tests should construct DowngradeInput directly to avoid env // pollution. func NewDowngradeInputFromEnv(mode, model string, forcedMode bool) DowngradeInput { return DowngradeInput{ Mode: mode, Model: model, ForcedMode: forcedMode, ForceFullOverride: envForceFullEnrichment(), } } func envForceFullEnrichment() bool { v := strings.ToLower(strings.TrimSpace(os.Getenv(EnvForceFullEnrichment))) return v == "1" || v == "true" }