phase 1: add [models] tier config — additive, no callers migrate yet

Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md) in lakehouse.toml [models] section. Tier names map to actual model IDs; bumping a model means editing one line, not hunting through code. Tier philosophy: - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. weak_models is the codified "local-hot-path eligible" list — phase 2 will migrate matrix.downgrade to read it instead of hardcoding. Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local (stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as frontier_free leveraging the OpenCode subscription. 3 new tests in internal/shared/config_test.go: - TestDefaultConfig_ModelsTier — locks tier defaults - TestModelsConfig_IsWeak — weak-bypass list - TestLoadConfig_ModelsTOMLRoundTrip — override semantics just verify PASS (g2 had one flake on first run — Ollama transfer truncation; clean on retry, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 23:48:45 -05:00 · 2026-04-29 23:48:45 -05:00 · ec1d031996
commit ec1d031996
parent 3dd7d9fe30
3 changed files with 227 additions and 0 deletions
--- a/internal/shared/config.go
+++ b/internal/shared/config.go
@ -30,6 +30,7 @@ type Config struct {
 	Matrixd   MatrixdConfig   `toml:"matrixd"`
 	Observerd ObserverdConfig `toml:"observerd"`
 	S3        S3Config        `toml:"s3"`
 	Models    ModelsConfig    `toml:"models"`
 	Log      LogConfig     `toml:"log"`
 	Auth     AuthConfig    `toml:"auth"`
 }
@ -162,6 +163,90 @@ type LogConfig struct {
 	Level string `toml:"level"`
 }
 // ModelsConfig names the models used by each tier of the small-model
 // pipeline (per project_small_model_pipeline_vision.md and the Rust
 // `config/providers.toml` convention). Callers reference tier names,
 // not literal model IDs — bumping a tier means editing this file, not
 // hunting through code.
 //
 // Tier philosophy:
 //   - local_*    : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
 //                  hot path. Repeated calls per query.
 //   - cloud_*    : Ollama Cloud (Pro plan). Larger context, called when
 //                  local is uncertain. Auth via OLLAMA_CLOUD_KEY.
 //   - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
 //                  Reserved for blockers and full-scope reviews, not
 //                  steady state.
 //
 // WeakModels is the codified "local-hot-path eligible" list that the
 // matrix downgrade gate consults — replacing the previous hardcoded
 // switch in internal/matrix/downgrade.go. A model in this list bypasses
 // the corpus-downgrade rule: it's already weak, no need to downgrade
 // further. Strong (paid / cloud / frontier) models trigger the gate.
 type ModelsConfig struct {
 	LocalFast      string `toml:"local_fast"`
 	LocalEmbed     string `toml:"local_embed"`
 	LocalJudge     string `toml:"local_judge"`
 	LocalReview    string `toml:"local_review"`
 	CloudJudge     string `toml:"cloud_judge"`
 	CloudReview    string `toml:"cloud_review"`
 	CloudStrong    string `toml:"cloud_strong"`
 	FrontierReview string `toml:"frontier_review"`
 	FrontierArch   string `toml:"frontier_arch"`
 	FrontierStrong string `toml:"frontier_strong"`
 	FrontierFree   string `toml:"frontier_free"`
 	WeakModels []string `toml:"weak_models"`
 }
 // Resolve maps a tier name (e.g. "local_judge") to the configured
 // model ID. Unknown tier returns "". Callers should fall back to a
 // hardcoded default and log a warning when this returns empty — the
 // alternative (panic) would make a missing tier crash the binary at
 // startup, which is too aggressive for an additive config.
 func (m ModelsConfig) Resolve(tier string) string {
 	switch tier {
 	case "local_fast":
 		return m.LocalFast
 	case "local_embed":
 		return m.LocalEmbed
 	case "local_judge":
 		return m.LocalJudge
 	case "local_review":
 		return m.LocalReview
 	case "cloud_judge":
 		return m.CloudJudge
 	case "cloud_review":
 		return m.CloudReview
 	case "cloud_strong":
 		return m.CloudStrong
 	case "frontier_review":
 		return m.FrontierReview
 	case "frontier_arch":
 		return m.FrontierArch
 	case "frontier_strong":
 		return m.FrontierStrong
 	case "frontier_free":
 		return m.FrontierFree
 	default:
 		return ""
 	}
 }
 // IsWeak reports whether `model` is in the configured WeakModels list.
 // Used by matrix.downgrade to decide whether to bypass the strong-model
 // downgrade gate (weak models stay on the full lakehouse path).
 func (m ModelsConfig) IsWeak(model string) bool {
 	for _, w := range m.WeakModels {
 		if w == model {
 			return true
 		}
 	}
 	return false
 }
 // AuthConfig is the inter-service auth posture from ADR-003.
 // Token is a Bearer token; empty means "no auth" (G0 dev mode).
 // AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
@ -244,6 +329,34 @@ func DefaultConfig() Config {
 			Bucket:       "lakehouse-primary",
 			UsePathStyle: true,
 		},
 		Models: ModelsConfig{
 			// Tier 1 — local hot path. JSON-clean, fast, deterministic.
 			// qwen3.5:latest replaces qwen2.5 as the local default per
 			// 2026-04-29 architectural review (stronger local model,
 			// same JSON-clean property).
 			LocalFast:      "qwen3.5:latest",
 			LocalEmbed:     "nomic-embed-text",
 			LocalJudge:     "qwen3.5:latest",
 			LocalReview:    "qwen3.5:latest",
 			// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
 			// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
 			// are the working primaries.
 			CloudJudge:  "kimi-k2.6:cloud",
 			CloudReview: "qwen3-coder:480b",
 			CloudStrong: "deepseek-v3.2",
 			// Tier 3 — frontier. OpenRouter credits + OpenCode key.
 			// Use sparingly: rate-limited, billed per call.
 			FrontierReview: "openrouter/anthropic/claude-opus-4-7",
 			FrontierArch:   "openrouter/moonshotai/kimi-k2-0905",
 			FrontierStrong: "openrouter/openai/gpt-5",
 			FrontierFree:   "opencode/claude-opus-4-7",
 			// Local-hot-path eligible. matrix.downgrade reads this
 			// list to decide whether to bypass the strong-model gate.
 			WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
 		},
 		Log: LogConfig{Level: "info"},
 	}
 }
--- a/internal/shared/config_test.go
+++ b/internal/shared/config_test.go
@ -110,6 +110,84 @@ bucket   = "custom-bucket"
 	}
 }
 func TestDefaultConfig_ModelsTier(t *testing.T) {
 	cfg := DefaultConfig()
 	// Locks tier defaults so a refactor that drops a tier or renames
 	// a default silently can't ship without a test edit.
 	checks := []struct {
 		tier, want string
 	}{
 		{"local_fast", "qwen3.5:latest"},
 		{"local_embed", "nomic-embed-text"},
 		{"local_judge", "qwen3.5:latest"},
 		{"cloud_judge", "kimi-k2.6:cloud"},
 		{"cloud_review", "qwen3-coder:480b"},
 		{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
 		{"frontier_free", "opencode/claude-opus-4-7"},
 	}
 	for _, c := range checks {
 		if got := cfg.Models.Resolve(c.tier); got != c.want {
 			t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
 		}
 	}
 	// Unknown tier returns "" — caller's responsibility to fall back.
 	if got := cfg.Models.Resolve("nonexistent"); got != "" {
 		t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
 	}
 }
 func TestModelsConfig_IsWeak(t *testing.T) {
 	cfg := DefaultConfig()
 	// Default WeakModels set is the matrix.downgrade bypass list.
 	if !cfg.Models.IsWeak("qwen3.5:latest") {
 		t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
 	}
 	if !cfg.Models.IsWeak("qwen3:latest") {
 		t.Errorf("qwen3:latest should be weak")
 	}
 	// Strong / cloud / frontier models are NOT weak.
 	for _, m := range []string{
 		"opencode/claude-opus-4-7",
 		"openrouter/openai/gpt-5",
 		"qwen3-coder:480b",
 		"deepseek-v3.2",
 	} {
 		if cfg.Models.IsWeak(m) {
 			t.Errorf("%s should NOT be weak", m)
 		}
 	}
 }
 func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
 	// Override one tier name; verify it loads, rest stay at defaults.
 	dir := t.TempDir()
 	cfgPath := filepath.Join(dir, "lakehouse.toml")
 	body := `[models]
 local_judge = "custom-judge:latest"
 weak_models = ["custom-judge:latest", "qwen3:latest"]
 `
 	if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
 		t.Fatalf("write config: %v", err)
 	}
 	cfg, err := LoadConfig(cfgPath)
 	if err != nil {
 		t.Fatalf("LoadConfig: %v", err)
 	}
 	if cfg.Models.LocalJudge != "custom-judge:latest" {
 		t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
 	}
 	if !cfg.Models.IsWeak("custom-judge:latest") {
 		t.Errorf("custom-judge:latest should be weak after override")
 	}
 	// Unspecified tiers keep defaults — TOML decoder doesn't zero
 	// fields it didn't see, BUT slices are replaced wholesale. The
 	// weak_models override above is intentional so callers know
 	// they're trading the full default list when they set this.
 	if cfg.Models.LocalFast != "qwen3.5:latest" {
 		t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
 	}
 }
 func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
 	dir := t.TempDir()
 	cfgPath := filepath.Join(dir, "bad.toml")
--- a/lakehouse.toml
+++ b/lakehouse.toml
@ -80,3 +80,39 @@ use_path_style  = true
 [log]
 level = "info"
 # Model tier registry — names map to actual model IDs per the small-
 # model pipeline architecture (project_small_model_pipeline_vision.md).
 # Bumping a model means editing one line here, not hunting through code.
 #
 # Tier philosophy:
 #   - local_*    : on-box Ollama. Inner-loop hot path. Repeated calls.
 #   - cloud_*    : Ollama Cloud (Pro plan). Larger context, fail-up tier.
 #   - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
 #                  Reserved for blockers and full-scope reviews.
 #
 # weak_models is the codified "local-hot-path eligible" list that the
 # matrix.downgrade gate reads. A model in this list bypasses the
 # strong-model downgrade rule (it's already weak — no need to downgrade
 # corpora further).
 [models]
 # Tier 1 — local hot path
 local_fast    = "qwen3.5:latest"
 local_embed   = "nomic-embed-text"
 local_judge   = "qwen3.5:latest"
 local_review  = "qwen3.5:latest"
 # Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
 # deepseek/kimi-k2.6/qwen3-coder are the working primaries.
 cloud_judge   = "kimi-k2.6:cloud"
 cloud_review  = "qwen3-coder:480b"
 cloud_strong  = "deepseek-v3.2"
 # Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
 frontier_review = "openrouter/anthropic/claude-opus-4-7"
 frontier_arch   = "openrouter/moonshotai/kimi-k2-0905"
 frontier_strong = "openrouter/openai/gpt-5"
 frontier_free   = "opencode/claude-opus-4-7"
 # Local-hot-path eligible — matrix.downgrade bypass list.
 weak_models = ["qwen3.5:latest", "qwen3:latest"]