phase 1: add [models] tier config — additive, no callers migrate yet
Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md) in lakehouse.toml [models] section. Tier names map to actual model IDs; bumping a model means editing one line, not hunting through code. Tier philosophy: - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. weak_models is the codified "local-hot-path eligible" list — phase 2 will migrate matrix.downgrade to read it instead of hardcoding. Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local (stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as frontier_free leveraging the OpenCode subscription. 3 new tests in internal/shared/config_test.go: - TestDefaultConfig_ModelsTier — locks tier defaults - TestModelsConfig_IsWeak — weak-bypass list - TestLoadConfig_ModelsTOMLRoundTrip — override semantics just verify PASS (g2 had one flake on first run — Ollama transfer truncation; clean on retry, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3dd7d9fe30
commit
ec1d031996
@ -30,6 +30,7 @@ type Config struct {
|
||||
Matrixd MatrixdConfig `toml:"matrixd"`
|
||||
Observerd ObserverdConfig `toml:"observerd"`
|
||||
S3 S3Config `toml:"s3"`
|
||||
Models ModelsConfig `toml:"models"`
|
||||
Log LogConfig `toml:"log"`
|
||||
Auth AuthConfig `toml:"auth"`
|
||||
}
|
||||
@ -162,6 +163,90 @@ type LogConfig struct {
|
||||
Level string `toml:"level"`
|
||||
}
|
||||
|
||||
// ModelsConfig names the models used by each tier of the small-model
|
||||
// pipeline (per project_small_model_pipeline_vision.md and the Rust
|
||||
// `config/providers.toml` convention). Callers reference tier names,
|
||||
// not literal model IDs — bumping a tier means editing this file, not
|
||||
// hunting through code.
|
||||
//
|
||||
// Tier philosophy:
|
||||
// - local_* : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
|
||||
// hot path. Repeated calls per query.
|
||||
// - cloud_* : Ollama Cloud (Pro plan). Larger context, called when
|
||||
// local is uncertain. Auth via OLLAMA_CLOUD_KEY.
|
||||
// - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
|
||||
// Reserved for blockers and full-scope reviews, not
|
||||
// steady state.
|
||||
//
|
||||
// WeakModels is the codified "local-hot-path eligible" list that the
|
||||
// matrix downgrade gate consults — replacing the previous hardcoded
|
||||
// switch in internal/matrix/downgrade.go. A model in this list bypasses
|
||||
// the corpus-downgrade rule: it's already weak, no need to downgrade
|
||||
// further. Strong (paid / cloud / frontier) models trigger the gate.
|
||||
type ModelsConfig struct {
|
||||
LocalFast string `toml:"local_fast"`
|
||||
LocalEmbed string `toml:"local_embed"`
|
||||
LocalJudge string `toml:"local_judge"`
|
||||
LocalReview string `toml:"local_review"`
|
||||
|
||||
CloudJudge string `toml:"cloud_judge"`
|
||||
CloudReview string `toml:"cloud_review"`
|
||||
CloudStrong string `toml:"cloud_strong"`
|
||||
|
||||
FrontierReview string `toml:"frontier_review"`
|
||||
FrontierArch string `toml:"frontier_arch"`
|
||||
FrontierStrong string `toml:"frontier_strong"`
|
||||
FrontierFree string `toml:"frontier_free"`
|
||||
|
||||
WeakModels []string `toml:"weak_models"`
|
||||
}
|
||||
|
||||
// Resolve maps a tier name (e.g. "local_judge") to the configured
|
||||
// model ID. Unknown tier returns "". Callers should fall back to a
|
||||
// hardcoded default and log a warning when this returns empty — the
|
||||
// alternative (panic) would make a missing tier crash the binary at
|
||||
// startup, which is too aggressive for an additive config.
|
||||
func (m ModelsConfig) Resolve(tier string) string {
|
||||
switch tier {
|
||||
case "local_fast":
|
||||
return m.LocalFast
|
||||
case "local_embed":
|
||||
return m.LocalEmbed
|
||||
case "local_judge":
|
||||
return m.LocalJudge
|
||||
case "local_review":
|
||||
return m.LocalReview
|
||||
case "cloud_judge":
|
||||
return m.CloudJudge
|
||||
case "cloud_review":
|
||||
return m.CloudReview
|
||||
case "cloud_strong":
|
||||
return m.CloudStrong
|
||||
case "frontier_review":
|
||||
return m.FrontierReview
|
||||
case "frontier_arch":
|
||||
return m.FrontierArch
|
||||
case "frontier_strong":
|
||||
return m.FrontierStrong
|
||||
case "frontier_free":
|
||||
return m.FrontierFree
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// IsWeak reports whether `model` is in the configured WeakModels list.
|
||||
// Used by matrix.downgrade to decide whether to bypass the strong-model
|
||||
// downgrade gate (weak models stay on the full lakehouse path).
|
||||
func (m ModelsConfig) IsWeak(model string) bool {
|
||||
for _, w := range m.WeakModels {
|
||||
if w == model {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AuthConfig is the inter-service auth posture from ADR-003.
|
||||
// Token is a Bearer token; empty means "no auth" (G0 dev mode).
|
||||
// AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
|
||||
@ -244,6 +329,34 @@ func DefaultConfig() Config {
|
||||
Bucket: "lakehouse-primary",
|
||||
UsePathStyle: true,
|
||||
},
|
||||
Models: ModelsConfig{
|
||||
// Tier 1 — local hot path. JSON-clean, fast, deterministic.
|
||||
// qwen3.5:latest replaces qwen2.5 as the local default per
|
||||
// 2026-04-29 architectural review (stronger local model,
|
||||
// same JSON-clean property).
|
||||
LocalFast: "qwen3.5:latest",
|
||||
LocalEmbed: "nomic-embed-text",
|
||||
LocalJudge: "qwen3.5:latest",
|
||||
LocalReview: "qwen3.5:latest",
|
||||
|
||||
// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
|
||||
// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
|
||||
// are the working primaries.
|
||||
CloudJudge: "kimi-k2.6:cloud",
|
||||
CloudReview: "qwen3-coder:480b",
|
||||
CloudStrong: "deepseek-v3.2",
|
||||
|
||||
// Tier 3 — frontier. OpenRouter credits + OpenCode key.
|
||||
// Use sparingly: rate-limited, billed per call.
|
||||
FrontierReview: "openrouter/anthropic/claude-opus-4-7",
|
||||
FrontierArch: "openrouter/moonshotai/kimi-k2-0905",
|
||||
FrontierStrong: "openrouter/openai/gpt-5",
|
||||
FrontierFree: "opencode/claude-opus-4-7",
|
||||
|
||||
// Local-hot-path eligible. matrix.downgrade reads this
|
||||
// list to decide whether to bypass the strong-model gate.
|
||||
WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
|
||||
},
|
||||
Log: LogConfig{Level: "info"},
|
||||
}
|
||||
}
|
||||
|
||||
@ -110,6 +110,84 @@ bucket = "custom-bucket"
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultConfig_ModelsTier(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
// Locks tier defaults so a refactor that drops a tier or renames
|
||||
// a default silently can't ship without a test edit.
|
||||
checks := []struct {
|
||||
tier, want string
|
||||
}{
|
||||
{"local_fast", "qwen3.5:latest"},
|
||||
{"local_embed", "nomic-embed-text"},
|
||||
{"local_judge", "qwen3.5:latest"},
|
||||
{"cloud_judge", "kimi-k2.6:cloud"},
|
||||
{"cloud_review", "qwen3-coder:480b"},
|
||||
{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
|
||||
{"frontier_free", "opencode/claude-opus-4-7"},
|
||||
}
|
||||
for _, c := range checks {
|
||||
if got := cfg.Models.Resolve(c.tier); got != c.want {
|
||||
t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
|
||||
}
|
||||
}
|
||||
// Unknown tier returns "" — caller's responsibility to fall back.
|
||||
if got := cfg.Models.Resolve("nonexistent"); got != "" {
|
||||
t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestModelsConfig_IsWeak(t *testing.T) {
|
||||
cfg := DefaultConfig()
|
||||
// Default WeakModels set is the matrix.downgrade bypass list.
|
||||
if !cfg.Models.IsWeak("qwen3.5:latest") {
|
||||
t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
|
||||
}
|
||||
if !cfg.Models.IsWeak("qwen3:latest") {
|
||||
t.Errorf("qwen3:latest should be weak")
|
||||
}
|
||||
// Strong / cloud / frontier models are NOT weak.
|
||||
for _, m := range []string{
|
||||
"opencode/claude-opus-4-7",
|
||||
"openrouter/openai/gpt-5",
|
||||
"qwen3-coder:480b",
|
||||
"deepseek-v3.2",
|
||||
} {
|
||||
if cfg.Models.IsWeak(m) {
|
||||
t.Errorf("%s should NOT be weak", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
|
||||
// Override one tier name; verify it loads, rest stay at defaults.
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "lakehouse.toml")
|
||||
body := `[models]
|
||||
local_judge = "custom-judge:latest"
|
||||
weak_models = ["custom-judge:latest", "qwen3:latest"]
|
||||
`
|
||||
if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
cfg, err := LoadConfig(cfgPath)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig: %v", err)
|
||||
}
|
||||
if cfg.Models.LocalJudge != "custom-judge:latest" {
|
||||
t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
|
||||
}
|
||||
if !cfg.Models.IsWeak("custom-judge:latest") {
|
||||
t.Errorf("custom-judge:latest should be weak after override")
|
||||
}
|
||||
// Unspecified tiers keep defaults — TOML decoder doesn't zero
|
||||
// fields it didn't see, BUT slices are replaced wholesale. The
|
||||
// weak_models override above is intentional so callers know
|
||||
// they're trading the full default list when they set this.
|
||||
if cfg.Models.LocalFast != "qwen3.5:latest" {
|
||||
t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "bad.toml")
|
||||
|
||||
@ -80,3 +80,39 @@ use_path_style = true
|
||||
|
||||
[log]
|
||||
level = "info"
|
||||
|
||||
# Model tier registry — names map to actual model IDs per the small-
|
||||
# model pipeline architecture (project_small_model_pipeline_vision.md).
|
||||
# Bumping a model means editing one line here, not hunting through code.
|
||||
#
|
||||
# Tier philosophy:
|
||||
# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls.
|
||||
# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier.
|
||||
# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
|
||||
# Reserved for blockers and full-scope reviews.
|
||||
#
|
||||
# weak_models is the codified "local-hot-path eligible" list that the
|
||||
# matrix.downgrade gate reads. A model in this list bypasses the
|
||||
# strong-model downgrade rule (it's already weak — no need to downgrade
|
||||
# corpora further).
|
||||
[models]
|
||||
# Tier 1 — local hot path
|
||||
local_fast = "qwen3.5:latest"
|
||||
local_embed = "nomic-embed-text"
|
||||
local_judge = "qwen3.5:latest"
|
||||
local_review = "qwen3.5:latest"
|
||||
|
||||
# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
|
||||
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
|
||||
cloud_judge = "kimi-k2.6:cloud"
|
||||
cloud_review = "qwen3-coder:480b"
|
||||
cloud_strong = "deepseek-v3.2"
|
||||
|
||||
# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
|
||||
frontier_review = "openrouter/anthropic/claude-opus-4-7"
|
||||
frontier_arch = "openrouter/moonshotai/kimi-k2-0905"
|
||||
frontier_strong = "openrouter/openai/gpt-5"
|
||||
frontier_free = "opencode/claude-opus-4-7"
|
||||
|
||||
# Local-hot-path eligible — matrix.downgrade bypass list.
|
||||
weak_models = ["qwen3.5:latest", "qwen3:latest"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user