phase 1: add [models] tier config — additive, no callers migrate yet

Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md) in lakehouse.toml [models] section. Tier names map to actual model IDs; bumping a model means editing one line, not hunting through code. Tier philosophy: - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. weak_models is the codified "local-hot-path eligible" list — phase 2 will migrate matrix.downgrade to read it instead of hardcoding. Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local (stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as frontier_free leveraging the OpenCode subscription. 3 new tests in internal/shared/config_test.go: - TestDefaultConfig_ModelsTier — locks tier defaults - TestModelsConfig_IsWeak — weak-bypass list - TestLoadConfig_ModelsTOMLRoundTrip — override semantics just verify PASS (g2 had one flake on first run — Ollama transfer truncation; clean on retry, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 23:48:45 -05:00 · 2026-04-29 23:48:45 -05:00 · ec1d031996
commit ec1d031996
parent 3dd7d9fe30
3 changed files with 227 additions and 0 deletions
--- a/internal/shared/config.go
+++ b/internal/shared/config.go
@ -30,6 +30,7 @@ type Config struct {
 	Matrixd   MatrixdConfig   `toml:"matrixd"`
 	Observerd ObserverdConfig `toml:"observerd"`
 	S3        S3Config        `toml:"s3"`
+	Models    ModelsConfig    `toml:"models"`
 	Log      LogConfig     `toml:"log"`
 	Auth     AuthConfig    `toml:"auth"`
 }
@ -162,6 +163,90 @@ type LogConfig struct {
 	Level string `toml:"level"`
 }

+// ModelsConfig names the models used by each tier of the small-model
+// pipeline (per project_small_model_pipeline_vision.md and the Rust
+// `config/providers.toml` convention). Callers reference tier names,
+// not literal model IDs — bumping a tier means editing this file, not
+// hunting through code.
+//
+// Tier philosophy:
+//   - local_*    : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
+//                  hot path. Repeated calls per query.
+//   - cloud_*    : Ollama Cloud (Pro plan). Larger context, called when
+//                  local is uncertain. Auth via OLLAMA_CLOUD_KEY.
+//   - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
+//                  Reserved for blockers and full-scope reviews, not
+//                  steady state.
+//
+// WeakModels is the codified "local-hot-path eligible" list that the
+// matrix downgrade gate consults — replacing the previous hardcoded
+// switch in internal/matrix/downgrade.go. A model in this list bypasses
+// the corpus-downgrade rule: it's already weak, no need to downgrade
+// further. Strong (paid / cloud / frontier) models trigger the gate.
+type ModelsConfig struct {
+	LocalFast      string `toml:"local_fast"`
+	LocalEmbed     string `toml:"local_embed"`
+	LocalJudge     string `toml:"local_judge"`
+	LocalReview    string `toml:"local_review"`
+
+	CloudJudge     string `toml:"cloud_judge"`
+	CloudReview    string `toml:"cloud_review"`
+	CloudStrong    string `toml:"cloud_strong"`
+
+	FrontierReview string `toml:"frontier_review"`
+	FrontierArch   string `toml:"frontier_arch"`
+	FrontierStrong string `toml:"frontier_strong"`
+	FrontierFree   string `toml:"frontier_free"`
+
+	WeakModels []string `toml:"weak_models"`
+}
+
+// Resolve maps a tier name (e.g. "local_judge") to the configured
+// model ID. Unknown tier returns "". Callers should fall back to a
+// hardcoded default and log a warning when this returns empty — the
+// alternative (panic) would make a missing tier crash the binary at
+// startup, which is too aggressive for an additive config.
+func (m ModelsConfig) Resolve(tier string) string {
+	switch tier {
+	case "local_fast":
+		return m.LocalFast
+	case "local_embed":
+		return m.LocalEmbed
+	case "local_judge":
+		return m.LocalJudge
+	case "local_review":
+		return m.LocalReview
+	case "cloud_judge":
+		return m.CloudJudge
+	case "cloud_review":
+		return m.CloudReview
+	case "cloud_strong":
+		return m.CloudStrong
+	case "frontier_review":
+		return m.FrontierReview
+	case "frontier_arch":
+		return m.FrontierArch
+	case "frontier_strong":
+		return m.FrontierStrong
+	case "frontier_free":
+		return m.FrontierFree
+	default:
+		return ""
+	}
+}
+
+// IsWeak reports whether `model` is in the configured WeakModels list.
+// Used by matrix.downgrade to decide whether to bypass the strong-model
+// downgrade gate (weak models stay on the full lakehouse path).
+func (m ModelsConfig) IsWeak(model string) bool {
+	for _, w := range m.WeakModels {
+		if w == model {
+			return true
+		}
+	}
+	return false
+}
+
 // AuthConfig is the inter-service auth posture from ADR-003.
 // Token is a Bearer token; empty means "no auth" (G0 dev mode).
 // AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
@ -244,6 +329,34 @@ func DefaultConfig() Config {
 			Bucket:       "lakehouse-primary",
 			UsePathStyle: true,
 		},
+		Models: ModelsConfig{
+			// Tier 1 — local hot path. JSON-clean, fast, deterministic.
+			// qwen3.5:latest replaces qwen2.5 as the local default per
+			// 2026-04-29 architectural review (stronger local model,
+			// same JSON-clean property).
+			LocalFast:      "qwen3.5:latest",
+			LocalEmbed:     "nomic-embed-text",
+			LocalJudge:     "qwen3.5:latest",
+			LocalReview:    "qwen3.5:latest",
+
+			// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
+			// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
+			// are the working primaries.
+			CloudJudge:  "kimi-k2.6:cloud",
+			CloudReview: "qwen3-coder:480b",
+			CloudStrong: "deepseek-v3.2",
+
+			// Tier 3 — frontier. OpenRouter credits + OpenCode key.
+			// Use sparingly: rate-limited, billed per call.
+			FrontierReview: "openrouter/anthropic/claude-opus-4-7",
+			FrontierArch:   "openrouter/moonshotai/kimi-k2-0905",
+			FrontierStrong: "openrouter/openai/gpt-5",
+			FrontierFree:   "opencode/claude-opus-4-7",
+
+			// Local-hot-path eligible. matrix.downgrade reads this
+			// list to decide whether to bypass the strong-model gate.
+			WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
+		},
 		Log: LogConfig{Level: "info"},
 	}
 }
--- a/internal/shared/config_test.go
+++ b/internal/shared/config_test.go
@ -110,6 +110,84 @@ bucket   = "custom-bucket"
 	}
 }

+func TestDefaultConfig_ModelsTier(t *testing.T) {
+	cfg := DefaultConfig()
+	// Locks tier defaults so a refactor that drops a tier or renames
+	// a default silently can't ship without a test edit.
+	checks := []struct {
+		tier, want string
+	}{
+		{"local_fast", "qwen3.5:latest"},
+		{"local_embed", "nomic-embed-text"},
+		{"local_judge", "qwen3.5:latest"},
+		{"cloud_judge", "kimi-k2.6:cloud"},
+		{"cloud_review", "qwen3-coder:480b"},
+		{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
+		{"frontier_free", "opencode/claude-opus-4-7"},
+	}
+	for _, c := range checks {
+		if got := cfg.Models.Resolve(c.tier); got != c.want {
+			t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
+		}
+	}
+	// Unknown tier returns "" — caller's responsibility to fall back.
+	if got := cfg.Models.Resolve("nonexistent"); got != "" {
+		t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
+	}
+}
+
+func TestModelsConfig_IsWeak(t *testing.T) {
+	cfg := DefaultConfig()
+	// Default WeakModels set is the matrix.downgrade bypass list.
+	if !cfg.Models.IsWeak("qwen3.5:latest") {
+		t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
+	}
+	if !cfg.Models.IsWeak("qwen3:latest") {
+		t.Errorf("qwen3:latest should be weak")
+	}
+	// Strong / cloud / frontier models are NOT weak.
+	for _, m := range []string{
+		"opencode/claude-opus-4-7",
+		"openrouter/openai/gpt-5",
+		"qwen3-coder:480b",
+		"deepseek-v3.2",
+	} {
+		if cfg.Models.IsWeak(m) {
+			t.Errorf("%s should NOT be weak", m)
+		}
+	}
+}
+
+func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
+	// Override one tier name; verify it loads, rest stay at defaults.
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "lakehouse.toml")
+	body := `[models]
+local_judge = "custom-judge:latest"
+weak_models = ["custom-judge:latest", "qwen3:latest"]
+`
+	if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
+		t.Fatalf("write config: %v", err)
+	}
+	cfg, err := LoadConfig(cfgPath)
+	if err != nil {
+		t.Fatalf("LoadConfig: %v", err)
+	}
+	if cfg.Models.LocalJudge != "custom-judge:latest" {
+		t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
+	}
+	if !cfg.Models.IsWeak("custom-judge:latest") {
+		t.Errorf("custom-judge:latest should be weak after override")
+	}
+	// Unspecified tiers keep defaults — TOML decoder doesn't zero
+	// fields it didn't see, BUT slices are replaced wholesale. The
+	// weak_models override above is intentional so callers know
+	// they're trading the full default list when they set this.
+	if cfg.Models.LocalFast != "qwen3.5:latest" {
+		t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
+	}
+}
+
 func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
 	dir := t.TempDir()
 	cfgPath := filepath.Join(dir, "bad.toml")
--- a/lakehouse.toml
+++ b/lakehouse.toml
@ -80,3 +80,39 @@ use_path_style  = true

 [log]
 level = "info"
+
+# Model tier registry — names map to actual model IDs per the small-
+# model pipeline architecture (project_small_model_pipeline_vision.md).
+# Bumping a model means editing one line here, not hunting through code.
+#
+# Tier philosophy:
+#   - local_*    : on-box Ollama. Inner-loop hot path. Repeated calls.
+#   - cloud_*    : Ollama Cloud (Pro plan). Larger context, fail-up tier.
+#   - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
+#                  Reserved for blockers and full-scope reviews.
+#
+# weak_models is the codified "local-hot-path eligible" list that the
+# matrix.downgrade gate reads. A model in this list bypasses the
+# strong-model downgrade rule (it's already weak — no need to downgrade
+# corpora further).
+[models]
+# Tier 1 — local hot path
+local_fast    = "qwen3.5:latest"
+local_embed   = "nomic-embed-text"
+local_judge   = "qwen3.5:latest"
+local_review  = "qwen3.5:latest"
+
+# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
+# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
+cloud_judge   = "kimi-k2.6:cloud"
+cloud_review  = "qwen3-coder:480b"
+cloud_strong  = "deepseek-v3.2"
+
+# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
+frontier_review = "openrouter/anthropic/claude-opus-4-7"
+frontier_arch   = "openrouter/moonshotai/kimi-k2-0905"
+frontier_strong = "openrouter/openai/gpt-5"
+frontier_free   = "opencode/claude-opus-4-7"
+
+# Local-hot-path eligible — matrix.downgrade bypass list.
+weak_models = ["qwen3.5:latest", "qwen3:latest"]