phase 1: add [models] tier config — additive, no callers migrate yet
Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md) in lakehouse.toml [models] section. Tier names map to actual model IDs; bumping a model means editing one line, not hunting through code. Tier philosophy: - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. weak_models is the codified "local-hot-path eligible" list — phase 2 will migrate matrix.downgrade to read it instead of hardcoding. Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local (stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as frontier_free leveraging the OpenCode subscription. 3 new tests in internal/shared/config_test.go: - TestDefaultConfig_ModelsTier — locks tier defaults - TestModelsConfig_IsWeak — weak-bypass list - TestLoadConfig_ModelsTOMLRoundTrip — override semantics just verify PASS (g2 had one flake on first run — Ollama transfer truncation; clean on retry, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3dd7d9fe30
commit
ec1d031996
@ -30,6 +30,7 @@ type Config struct {
|
|||||||
Matrixd MatrixdConfig `toml:"matrixd"`
|
Matrixd MatrixdConfig `toml:"matrixd"`
|
||||||
Observerd ObserverdConfig `toml:"observerd"`
|
Observerd ObserverdConfig `toml:"observerd"`
|
||||||
S3 S3Config `toml:"s3"`
|
S3 S3Config `toml:"s3"`
|
||||||
|
Models ModelsConfig `toml:"models"`
|
||||||
Log LogConfig `toml:"log"`
|
Log LogConfig `toml:"log"`
|
||||||
Auth AuthConfig `toml:"auth"`
|
Auth AuthConfig `toml:"auth"`
|
||||||
}
|
}
|
||||||
@ -162,6 +163,90 @@ type LogConfig struct {
|
|||||||
Level string `toml:"level"`
|
Level string `toml:"level"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ModelsConfig names the models used by each tier of the small-model
|
||||||
|
// pipeline (per project_small_model_pipeline_vision.md and the Rust
|
||||||
|
// `config/providers.toml` convention). Callers reference tier names,
|
||||||
|
// not literal model IDs — bumping a tier means editing this file, not
|
||||||
|
// hunting through code.
|
||||||
|
//
|
||||||
|
// Tier philosophy:
|
||||||
|
// - local_* : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
|
||||||
|
// hot path. Repeated calls per query.
|
||||||
|
// - cloud_* : Ollama Cloud (Pro plan). Larger context, called when
|
||||||
|
// local is uncertain. Auth via OLLAMA_CLOUD_KEY.
|
||||||
|
// - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
|
||||||
|
// Reserved for blockers and full-scope reviews, not
|
||||||
|
// steady state.
|
||||||
|
//
|
||||||
|
// WeakModels is the codified "local-hot-path eligible" list that the
|
||||||
|
// matrix downgrade gate consults — replacing the previous hardcoded
|
||||||
|
// switch in internal/matrix/downgrade.go. A model in this list bypasses
|
||||||
|
// the corpus-downgrade rule: it's already weak, no need to downgrade
|
||||||
|
// further. Strong (paid / cloud / frontier) models trigger the gate.
|
||||||
|
type ModelsConfig struct {
|
||||||
|
LocalFast string `toml:"local_fast"`
|
||||||
|
LocalEmbed string `toml:"local_embed"`
|
||||||
|
LocalJudge string `toml:"local_judge"`
|
||||||
|
LocalReview string `toml:"local_review"`
|
||||||
|
|
||||||
|
CloudJudge string `toml:"cloud_judge"`
|
||||||
|
CloudReview string `toml:"cloud_review"`
|
||||||
|
CloudStrong string `toml:"cloud_strong"`
|
||||||
|
|
||||||
|
FrontierReview string `toml:"frontier_review"`
|
||||||
|
FrontierArch string `toml:"frontier_arch"`
|
||||||
|
FrontierStrong string `toml:"frontier_strong"`
|
||||||
|
FrontierFree string `toml:"frontier_free"`
|
||||||
|
|
||||||
|
WeakModels []string `toml:"weak_models"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve maps a tier name (e.g. "local_judge") to the configured
|
||||||
|
// model ID. Unknown tier returns "". Callers should fall back to a
|
||||||
|
// hardcoded default and log a warning when this returns empty — the
|
||||||
|
// alternative (panic) would make a missing tier crash the binary at
|
||||||
|
// startup, which is too aggressive for an additive config.
|
||||||
|
func (m ModelsConfig) Resolve(tier string) string {
|
||||||
|
switch tier {
|
||||||
|
case "local_fast":
|
||||||
|
return m.LocalFast
|
||||||
|
case "local_embed":
|
||||||
|
return m.LocalEmbed
|
||||||
|
case "local_judge":
|
||||||
|
return m.LocalJudge
|
||||||
|
case "local_review":
|
||||||
|
return m.LocalReview
|
||||||
|
case "cloud_judge":
|
||||||
|
return m.CloudJudge
|
||||||
|
case "cloud_review":
|
||||||
|
return m.CloudReview
|
||||||
|
case "cloud_strong":
|
||||||
|
return m.CloudStrong
|
||||||
|
case "frontier_review":
|
||||||
|
return m.FrontierReview
|
||||||
|
case "frontier_arch":
|
||||||
|
return m.FrontierArch
|
||||||
|
case "frontier_strong":
|
||||||
|
return m.FrontierStrong
|
||||||
|
case "frontier_free":
|
||||||
|
return m.FrontierFree
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsWeak reports whether `model` is in the configured WeakModels list.
|
||||||
|
// Used by matrix.downgrade to decide whether to bypass the strong-model
|
||||||
|
// downgrade gate (weak models stay on the full lakehouse path).
|
||||||
|
func (m ModelsConfig) IsWeak(model string) bool {
|
||||||
|
for _, w := range m.WeakModels {
|
||||||
|
if w == model {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// AuthConfig is the inter-service auth posture from ADR-003.
|
// AuthConfig is the inter-service auth posture from ADR-003.
|
||||||
// Token is a Bearer token; empty means "no auth" (G0 dev mode).
|
// Token is a Bearer token; empty means "no auth" (G0 dev mode).
|
||||||
// AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
|
// AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
|
||||||
@ -244,6 +329,34 @@ func DefaultConfig() Config {
|
|||||||
Bucket: "lakehouse-primary",
|
Bucket: "lakehouse-primary",
|
||||||
UsePathStyle: true,
|
UsePathStyle: true,
|
||||||
},
|
},
|
||||||
|
Models: ModelsConfig{
|
||||||
|
// Tier 1 — local hot path. JSON-clean, fast, deterministic.
|
||||||
|
// qwen3.5:latest replaces qwen2.5 as the local default per
|
||||||
|
// 2026-04-29 architectural review (stronger local model,
|
||||||
|
// same JSON-clean property).
|
||||||
|
LocalFast: "qwen3.5:latest",
|
||||||
|
LocalEmbed: "nomic-embed-text",
|
||||||
|
LocalJudge: "qwen3.5:latest",
|
||||||
|
LocalReview: "qwen3.5:latest",
|
||||||
|
|
||||||
|
// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
|
||||||
|
// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
|
||||||
|
// are the working primaries.
|
||||||
|
CloudJudge: "kimi-k2.6:cloud",
|
||||||
|
CloudReview: "qwen3-coder:480b",
|
||||||
|
CloudStrong: "deepseek-v3.2",
|
||||||
|
|
||||||
|
// Tier 3 — frontier. OpenRouter credits + OpenCode key.
|
||||||
|
// Use sparingly: rate-limited, billed per call.
|
||||||
|
FrontierReview: "openrouter/anthropic/claude-opus-4-7",
|
||||||
|
FrontierArch: "openrouter/moonshotai/kimi-k2-0905",
|
||||||
|
FrontierStrong: "openrouter/openai/gpt-5",
|
||||||
|
FrontierFree: "opencode/claude-opus-4-7",
|
||||||
|
|
||||||
|
// Local-hot-path eligible. matrix.downgrade reads this
|
||||||
|
// list to decide whether to bypass the strong-model gate.
|
||||||
|
WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
|
||||||
|
},
|
||||||
Log: LogConfig{Level: "info"},
|
Log: LogConfig{Level: "info"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -110,6 +110,84 @@ bucket = "custom-bucket"
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDefaultConfig_ModelsTier(t *testing.T) {
|
||||||
|
cfg := DefaultConfig()
|
||||||
|
// Locks tier defaults so a refactor that drops a tier or renames
|
||||||
|
// a default silently can't ship without a test edit.
|
||||||
|
checks := []struct {
|
||||||
|
tier, want string
|
||||||
|
}{
|
||||||
|
{"local_fast", "qwen3.5:latest"},
|
||||||
|
{"local_embed", "nomic-embed-text"},
|
||||||
|
{"local_judge", "qwen3.5:latest"},
|
||||||
|
{"cloud_judge", "kimi-k2.6:cloud"},
|
||||||
|
{"cloud_review", "qwen3-coder:480b"},
|
||||||
|
{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
|
||||||
|
{"frontier_free", "opencode/claude-opus-4-7"},
|
||||||
|
}
|
||||||
|
for _, c := range checks {
|
||||||
|
if got := cfg.Models.Resolve(c.tier); got != c.want {
|
||||||
|
t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Unknown tier returns "" — caller's responsibility to fall back.
|
||||||
|
if got := cfg.Models.Resolve("nonexistent"); got != "" {
|
||||||
|
t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelsConfig_IsWeak(t *testing.T) {
|
||||||
|
cfg := DefaultConfig()
|
||||||
|
// Default WeakModels set is the matrix.downgrade bypass list.
|
||||||
|
if !cfg.Models.IsWeak("qwen3.5:latest") {
|
||||||
|
t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
|
||||||
|
}
|
||||||
|
if !cfg.Models.IsWeak("qwen3:latest") {
|
||||||
|
t.Errorf("qwen3:latest should be weak")
|
||||||
|
}
|
||||||
|
// Strong / cloud / frontier models are NOT weak.
|
||||||
|
for _, m := range []string{
|
||||||
|
"opencode/claude-opus-4-7",
|
||||||
|
"openrouter/openai/gpt-5",
|
||||||
|
"qwen3-coder:480b",
|
||||||
|
"deepseek-v3.2",
|
||||||
|
} {
|
||||||
|
if cfg.Models.IsWeak(m) {
|
||||||
|
t.Errorf("%s should NOT be weak", m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
|
||||||
|
// Override one tier name; verify it loads, rest stay at defaults.
|
||||||
|
dir := t.TempDir()
|
||||||
|
cfgPath := filepath.Join(dir, "lakehouse.toml")
|
||||||
|
body := `[models]
|
||||||
|
local_judge = "custom-judge:latest"
|
||||||
|
weak_models = ["custom-judge:latest", "qwen3:latest"]
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
|
||||||
|
t.Fatalf("write config: %v", err)
|
||||||
|
}
|
||||||
|
cfg, err := LoadConfig(cfgPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig: %v", err)
|
||||||
|
}
|
||||||
|
if cfg.Models.LocalJudge != "custom-judge:latest" {
|
||||||
|
t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
|
||||||
|
}
|
||||||
|
if !cfg.Models.IsWeak("custom-judge:latest") {
|
||||||
|
t.Errorf("custom-judge:latest should be weak after override")
|
||||||
|
}
|
||||||
|
// Unspecified tiers keep defaults — TOML decoder doesn't zero
|
||||||
|
// fields it didn't see, BUT slices are replaced wholesale. The
|
||||||
|
// weak_models override above is intentional so callers know
|
||||||
|
// they're trading the full default list when they set this.
|
||||||
|
if cfg.Models.LocalFast != "qwen3.5:latest" {
|
||||||
|
t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
|
func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
cfgPath := filepath.Join(dir, "bad.toml")
|
cfgPath := filepath.Join(dir, "bad.toml")
|
||||||
|
|||||||
@ -80,3 +80,39 @@ use_path_style = true
|
|||||||
|
|
||||||
[log]
|
[log]
|
||||||
level = "info"
|
level = "info"
|
||||||
|
|
||||||
|
# Model tier registry — names map to actual model IDs per the small-
|
||||||
|
# model pipeline architecture (project_small_model_pipeline_vision.md).
|
||||||
|
# Bumping a model means editing one line here, not hunting through code.
|
||||||
|
#
|
||||||
|
# Tier philosophy:
|
||||||
|
# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls.
|
||||||
|
# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier.
|
||||||
|
# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
|
||||||
|
# Reserved for blockers and full-scope reviews.
|
||||||
|
#
|
||||||
|
# weak_models is the codified "local-hot-path eligible" list that the
|
||||||
|
# matrix.downgrade gate reads. A model in this list bypasses the
|
||||||
|
# strong-model downgrade rule (it's already weak — no need to downgrade
|
||||||
|
# corpora further).
|
||||||
|
[models]
|
||||||
|
# Tier 1 — local hot path
|
||||||
|
local_fast = "qwen3.5:latest"
|
||||||
|
local_embed = "nomic-embed-text"
|
||||||
|
local_judge = "qwen3.5:latest"
|
||||||
|
local_review = "qwen3.5:latest"
|
||||||
|
|
||||||
|
# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
|
||||||
|
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
|
||||||
|
cloud_judge = "kimi-k2.6:cloud"
|
||||||
|
cloud_review = "qwen3-coder:480b"
|
||||||
|
cloud_strong = "deepseek-v3.2"
|
||||||
|
|
||||||
|
# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
|
||||||
|
frontier_review = "openrouter/anthropic/claude-opus-4-7"
|
||||||
|
frontier_arch = "openrouter/moonshotai/kimi-k2-0905"
|
||||||
|
frontier_strong = "openrouter/openai/gpt-5"
|
||||||
|
frontier_free = "opencode/claude-opus-4-7"
|
||||||
|
|
||||||
|
# Local-hot-path eligible — matrix.downgrade bypass list.
|
||||||
|
weak_models = ["qwen3.5:latest", "qwen3:latest"]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user