diff --git a/internal/shared/config.go b/internal/shared/config.go index a3d8cdb..e0d2664 100644 --- a/internal/shared/config.go +++ b/internal/shared/config.go @@ -30,6 +30,7 @@ type Config struct { Matrixd MatrixdConfig `toml:"matrixd"` Observerd ObserverdConfig `toml:"observerd"` S3 S3Config `toml:"s3"` + Models ModelsConfig `toml:"models"` Log LogConfig `toml:"log"` Auth AuthConfig `toml:"auth"` } @@ -162,6 +163,90 @@ type LogConfig struct { Level string `toml:"level"` } +// ModelsConfig names the models used by each tier of the small-model +// pipeline (per project_small_model_pipeline_vision.md and the Rust +// `config/providers.toml` convention). Callers reference tier names, +// not literal model IDs — bumping a tier means editing this file, not +// hunting through code. +// +// Tier philosophy: +// - local_* : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop +// hot path. Repeated calls per query. +// - cloud_* : Ollama Cloud (Pro plan). Larger context, called when +// local is uncertain. Auth via OLLAMA_CLOUD_KEY. +// - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. +// Reserved for blockers and full-scope reviews, not +// steady state. +// +// WeakModels is the codified "local-hot-path eligible" list that the +// matrix downgrade gate consults — replacing the previous hardcoded +// switch in internal/matrix/downgrade.go. A model in this list bypasses +// the corpus-downgrade rule: it's already weak, no need to downgrade +// further. Strong (paid / cloud / frontier) models trigger the gate. +type ModelsConfig struct { + LocalFast string `toml:"local_fast"` + LocalEmbed string `toml:"local_embed"` + LocalJudge string `toml:"local_judge"` + LocalReview string `toml:"local_review"` + + CloudJudge string `toml:"cloud_judge"` + CloudReview string `toml:"cloud_review"` + CloudStrong string `toml:"cloud_strong"` + + FrontierReview string `toml:"frontier_review"` + FrontierArch string `toml:"frontier_arch"` + FrontierStrong string `toml:"frontier_strong"` + FrontierFree string `toml:"frontier_free"` + + WeakModels []string `toml:"weak_models"` +} + +// Resolve maps a tier name (e.g. "local_judge") to the configured +// model ID. Unknown tier returns "". Callers should fall back to a +// hardcoded default and log a warning when this returns empty — the +// alternative (panic) would make a missing tier crash the binary at +// startup, which is too aggressive for an additive config. +func (m ModelsConfig) Resolve(tier string) string { + switch tier { + case "local_fast": + return m.LocalFast + case "local_embed": + return m.LocalEmbed + case "local_judge": + return m.LocalJudge + case "local_review": + return m.LocalReview + case "cloud_judge": + return m.CloudJudge + case "cloud_review": + return m.CloudReview + case "cloud_strong": + return m.CloudStrong + case "frontier_review": + return m.FrontierReview + case "frontier_arch": + return m.FrontierArch + case "frontier_strong": + return m.FrontierStrong + case "frontier_free": + return m.FrontierFree + default: + return "" + } +} + +// IsWeak reports whether `model` is in the configured WeakModels list. +// Used by matrix.downgrade to decide whether to bypass the strong-model +// downgrade gate (weak models stay on the full lakehouse path). +func (m ModelsConfig) IsWeak(model string) bool { + for _, w := range m.WeakModels { + if w == model { + return true + } + } + return false +} + // AuthConfig is the inter-service auth posture from ADR-003. // Token is a Bearer token; empty means "no auth" (G0 dev mode). // AllowedIPs is a list of CIDRs (or bare IPs treated as /32); @@ -244,6 +329,34 @@ func DefaultConfig() Config { Bucket: "lakehouse-primary", UsePathStyle: true, }, + Models: ModelsConfig{ + // Tier 1 — local hot path. JSON-clean, fast, deterministic. + // qwen3.5:latest replaces qwen2.5 as the local default per + // 2026-04-29 architectural review (stronger local model, + // same JSON-clean property). + LocalFast: "qwen3.5:latest", + LocalEmbed: "nomic-embed-text", + LocalJudge: "qwen3.5:latest", + LocalReview: "qwen3.5:latest", + + // Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade. + // kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder + // are the working primaries. + CloudJudge: "kimi-k2.6:cloud", + CloudReview: "qwen3-coder:480b", + CloudStrong: "deepseek-v3.2", + + // Tier 3 — frontier. OpenRouter credits + OpenCode key. + // Use sparingly: rate-limited, billed per call. + FrontierReview: "openrouter/anthropic/claude-opus-4-7", + FrontierArch: "openrouter/moonshotai/kimi-k2-0905", + FrontierStrong: "openrouter/openai/gpt-5", + FrontierFree: "opencode/claude-opus-4-7", + + // Local-hot-path eligible. matrix.downgrade reads this + // list to decide whether to bypass the strong-model gate. + WeakModels: []string{"qwen3.5:latest", "qwen3:latest"}, + }, Log: LogConfig{Level: "info"}, } } diff --git a/internal/shared/config_test.go b/internal/shared/config_test.go index f35311b..48e7e1e 100644 --- a/internal/shared/config_test.go +++ b/internal/shared/config_test.go @@ -110,6 +110,84 @@ bucket = "custom-bucket" } } +func TestDefaultConfig_ModelsTier(t *testing.T) { + cfg := DefaultConfig() + // Locks tier defaults so a refactor that drops a tier or renames + // a default silently can't ship without a test edit. + checks := []struct { + tier, want string + }{ + {"local_fast", "qwen3.5:latest"}, + {"local_embed", "nomic-embed-text"}, + {"local_judge", "qwen3.5:latest"}, + {"cloud_judge", "kimi-k2.6:cloud"}, + {"cloud_review", "qwen3-coder:480b"}, + {"frontier_review", "openrouter/anthropic/claude-opus-4-7"}, + {"frontier_free", "opencode/claude-opus-4-7"}, + } + for _, c := range checks { + if got := cfg.Models.Resolve(c.tier); got != c.want { + t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want) + } + } + // Unknown tier returns "" — caller's responsibility to fall back. + if got := cfg.Models.Resolve("nonexistent"); got != "" { + t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got) + } +} + +func TestModelsConfig_IsWeak(t *testing.T) { + cfg := DefaultConfig() + // Default WeakModels set is the matrix.downgrade bypass list. + if !cfg.Models.IsWeak("qwen3.5:latest") { + t.Errorf("qwen3.5:latest should be weak (in default WeakModels)") + } + if !cfg.Models.IsWeak("qwen3:latest") { + t.Errorf("qwen3:latest should be weak") + } + // Strong / cloud / frontier models are NOT weak. + for _, m := range []string{ + "opencode/claude-opus-4-7", + "openrouter/openai/gpt-5", + "qwen3-coder:480b", + "deepseek-v3.2", + } { + if cfg.Models.IsWeak(m) { + t.Errorf("%s should NOT be weak", m) + } + } +} + +func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) { + // Override one tier name; verify it loads, rest stay at defaults. + dir := t.TempDir() + cfgPath := filepath.Join(dir, "lakehouse.toml") + body := `[models] +local_judge = "custom-judge:latest" +weak_models = ["custom-judge:latest", "qwen3:latest"] +` + if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil { + t.Fatalf("write config: %v", err) + } + cfg, err := LoadConfig(cfgPath) + if err != nil { + t.Fatalf("LoadConfig: %v", err) + } + if cfg.Models.LocalJudge != "custom-judge:latest" { + t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge) + } + if !cfg.Models.IsWeak("custom-judge:latest") { + t.Errorf("custom-judge:latest should be weak after override") + } + // Unspecified tiers keep defaults — TOML decoder doesn't zero + // fields it didn't see, BUT slices are replaced wholesale. The + // weak_models override above is intentional so callers know + // they're trading the full default list when they set this. + if cfg.Models.LocalFast != "qwen3.5:latest" { + t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast) + } +} + func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) { dir := t.TempDir() cfgPath := filepath.Join(dir, "bad.toml") diff --git a/lakehouse.toml b/lakehouse.toml index ae111cd..3ea71fc 100644 --- a/lakehouse.toml +++ b/lakehouse.toml @@ -80,3 +80,39 @@ use_path_style = true [log] level = "info" + +# Model tier registry — names map to actual model IDs per the small- +# model pipeline architecture (project_small_model_pipeline_vision.md). +# Bumping a model means editing one line here, not hunting through code. +# +# Tier philosophy: +# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. +# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. +# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. +# Reserved for blockers and full-scope reviews. +# +# weak_models is the codified "local-hot-path eligible" list that the +# matrix.downgrade gate reads. A model in this list bypasses the +# strong-model downgrade rule (it's already weak — no need to downgrade +# corpora further). +[models] +# Tier 1 — local hot path +local_fast = "qwen3.5:latest" +local_embed = "nomic-embed-text" +local_judge = "qwen3.5:latest" +local_review = "qwen3.5:latest" + +# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken; +# deepseek/kimi-k2.6/qwen3-coder are the working primaries. +cloud_judge = "kimi-k2.6:cloud" +cloud_review = "qwen3-coder:480b" +cloud_strong = "deepseek-v3.2" + +# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing. +frontier_review = "openrouter/anthropic/claude-opus-4-7" +frontier_arch = "openrouter/moonshotai/kimi-k2-0905" +frontier_strong = "openrouter/openai/gpt-5" +frontier_free = "opencode/claude-opus-4-7" + +# Local-hot-path eligible — matrix.downgrade bypass list. +weak_models = ["qwen3.5:latest", "qwen3:latest"]