phase 1: add [models] tier config — additive, no callers migrate yet

Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md)
in lakehouse.toml [models] section. Tier names map to actual model
IDs; bumping a model means editing one line, not hunting through code.

Tier philosophy:
- local_*    : on-box Ollama. Inner-loop hot path. Repeated calls.
- cloud_*    : Ollama Cloud (Pro plan). Larger context, fail-up tier.
- frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.

weak_models is the codified "local-hot-path eligible" list — phase 2
will migrate matrix.downgrade to read it instead of hardcoding.

Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local
(stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud
judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as
frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as
frontier_free leveraging the OpenCode subscription.

3 new tests in internal/shared/config_test.go:
- TestDefaultConfig_ModelsTier — locks tier defaults
- TestModelsConfig_IsWeak     — weak-bypass list
- TestLoadConfig_ModelsTOMLRoundTrip — override semantics

just verify PASS (g2 had one flake on first run — Ollama transfer
truncation; clean on retry, unrelated to this change).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-29 23:48:45 -05:00
parent 3dd7d9fe30
commit ec1d031996
3 changed files with 227 additions and 0 deletions

View File

@ -30,6 +30,7 @@ type Config struct {
Matrixd MatrixdConfig `toml:"matrixd"`
Observerd ObserverdConfig `toml:"observerd"`
S3 S3Config `toml:"s3"`
Models ModelsConfig `toml:"models"`
Log LogConfig `toml:"log"`
Auth AuthConfig `toml:"auth"`
}
@ -162,6 +163,90 @@ type LogConfig struct {
Level string `toml:"level"`
}
// ModelsConfig names the models used by each tier of the small-model
// pipeline (per project_small_model_pipeline_vision.md and the Rust
// `config/providers.toml` convention). Callers reference tier names,
// not literal model IDs — bumping a tier means editing this file, not
// hunting through code.
//
// Tier philosophy:
// - local_* : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
// hot path. Repeated calls per query.
// - cloud_* : Ollama Cloud (Pro plan). Larger context, called when
// local is uncertain. Auth via OLLAMA_CLOUD_KEY.
// - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
// Reserved for blockers and full-scope reviews, not
// steady state.
//
// WeakModels is the codified "local-hot-path eligible" list that the
// matrix downgrade gate consults — replacing the previous hardcoded
// switch in internal/matrix/downgrade.go. A model in this list bypasses
// the corpus-downgrade rule: it's already weak, no need to downgrade
// further. Strong (paid / cloud / frontier) models trigger the gate.
type ModelsConfig struct {
LocalFast string `toml:"local_fast"`
LocalEmbed string `toml:"local_embed"`
LocalJudge string `toml:"local_judge"`
LocalReview string `toml:"local_review"`
CloudJudge string `toml:"cloud_judge"`
CloudReview string `toml:"cloud_review"`
CloudStrong string `toml:"cloud_strong"`
FrontierReview string `toml:"frontier_review"`
FrontierArch string `toml:"frontier_arch"`
FrontierStrong string `toml:"frontier_strong"`
FrontierFree string `toml:"frontier_free"`
WeakModels []string `toml:"weak_models"`
}
// Resolve maps a tier name (e.g. "local_judge") to the configured
// model ID. Unknown tier returns "". Callers should fall back to a
// hardcoded default and log a warning when this returns empty — the
// alternative (panic) would make a missing tier crash the binary at
// startup, which is too aggressive for an additive config.
func (m ModelsConfig) Resolve(tier string) string {
switch tier {
case "local_fast":
return m.LocalFast
case "local_embed":
return m.LocalEmbed
case "local_judge":
return m.LocalJudge
case "local_review":
return m.LocalReview
case "cloud_judge":
return m.CloudJudge
case "cloud_review":
return m.CloudReview
case "cloud_strong":
return m.CloudStrong
case "frontier_review":
return m.FrontierReview
case "frontier_arch":
return m.FrontierArch
case "frontier_strong":
return m.FrontierStrong
case "frontier_free":
return m.FrontierFree
default:
return ""
}
}
// IsWeak reports whether `model` is in the configured WeakModels list.
// Used by matrix.downgrade to decide whether to bypass the strong-model
// downgrade gate (weak models stay on the full lakehouse path).
func (m ModelsConfig) IsWeak(model string) bool {
for _, w := range m.WeakModels {
if w == model {
return true
}
}
return false
}
// AuthConfig is the inter-service auth posture from ADR-003.
// Token is a Bearer token; empty means "no auth" (G0 dev mode).
// AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
@ -244,6 +329,34 @@ func DefaultConfig() Config {
Bucket: "lakehouse-primary",
UsePathStyle: true,
},
Models: ModelsConfig{
// Tier 1 — local hot path. JSON-clean, fast, deterministic.
// qwen3.5:latest replaces qwen2.5 as the local default per
// 2026-04-29 architectural review (stronger local model,
// same JSON-clean property).
LocalFast: "qwen3.5:latest",
LocalEmbed: "nomic-embed-text",
LocalJudge: "qwen3.5:latest",
LocalReview: "qwen3.5:latest",
// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
// are the working primaries.
CloudJudge: "kimi-k2.6:cloud",
CloudReview: "qwen3-coder:480b",
CloudStrong: "deepseek-v3.2",
// Tier 3 — frontier. OpenRouter credits + OpenCode key.
// Use sparingly: rate-limited, billed per call.
FrontierReview: "openrouter/anthropic/claude-opus-4-7",
FrontierArch: "openrouter/moonshotai/kimi-k2-0905",
FrontierStrong: "openrouter/openai/gpt-5",
FrontierFree: "opencode/claude-opus-4-7",
// Local-hot-path eligible. matrix.downgrade reads this
// list to decide whether to bypass the strong-model gate.
WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
},
Log: LogConfig{Level: "info"},
}
}

View File

@ -110,6 +110,84 @@ bucket = "custom-bucket"
}
}
func TestDefaultConfig_ModelsTier(t *testing.T) {
cfg := DefaultConfig()
// Locks tier defaults so a refactor that drops a tier or renames
// a default silently can't ship without a test edit.
checks := []struct {
tier, want string
}{
{"local_fast", "qwen3.5:latest"},
{"local_embed", "nomic-embed-text"},
{"local_judge", "qwen3.5:latest"},
{"cloud_judge", "kimi-k2.6:cloud"},
{"cloud_review", "qwen3-coder:480b"},
{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
{"frontier_free", "opencode/claude-opus-4-7"},
}
for _, c := range checks {
if got := cfg.Models.Resolve(c.tier); got != c.want {
t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
}
}
// Unknown tier returns "" — caller's responsibility to fall back.
if got := cfg.Models.Resolve("nonexistent"); got != "" {
t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
}
}
func TestModelsConfig_IsWeak(t *testing.T) {
cfg := DefaultConfig()
// Default WeakModels set is the matrix.downgrade bypass list.
if !cfg.Models.IsWeak("qwen3.5:latest") {
t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
}
if !cfg.Models.IsWeak("qwen3:latest") {
t.Errorf("qwen3:latest should be weak")
}
// Strong / cloud / frontier models are NOT weak.
for _, m := range []string{
"opencode/claude-opus-4-7",
"openrouter/openai/gpt-5",
"qwen3-coder:480b",
"deepseek-v3.2",
} {
if cfg.Models.IsWeak(m) {
t.Errorf("%s should NOT be weak", m)
}
}
}
func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
// Override one tier name; verify it loads, rest stay at defaults.
dir := t.TempDir()
cfgPath := filepath.Join(dir, "lakehouse.toml")
body := `[models]
local_judge = "custom-judge:latest"
weak_models = ["custom-judge:latest", "qwen3:latest"]
`
if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
t.Fatalf("write config: %v", err)
}
cfg, err := LoadConfig(cfgPath)
if err != nil {
t.Fatalf("LoadConfig: %v", err)
}
if cfg.Models.LocalJudge != "custom-judge:latest" {
t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
}
if !cfg.Models.IsWeak("custom-judge:latest") {
t.Errorf("custom-judge:latest should be weak after override")
}
// Unspecified tiers keep defaults — TOML decoder doesn't zero
// fields it didn't see, BUT slices are replaced wholesale. The
// weak_models override above is intentional so callers know
// they're trading the full default list when they set this.
if cfg.Models.LocalFast != "qwen3.5:latest" {
t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
}
}
func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
dir := t.TempDir()
cfgPath := filepath.Join(dir, "bad.toml")

View File

@ -80,3 +80,39 @@ use_path_style = true
[log]
level = "info"
# Model tier registry — names map to actual model IDs per the small-
# model pipeline architecture (project_small_model_pipeline_vision.md).
# Bumping a model means editing one line here, not hunting through code.
#
# Tier philosophy:
# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls.
# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier.
# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
# Reserved for blockers and full-scope reviews.
#
# weak_models is the codified "local-hot-path eligible" list that the
# matrix.downgrade gate reads. A model in this list bypasses the
# strong-model downgrade rule (it's already weak — no need to downgrade
# corpora further).
[models]
# Tier 1 — local hot path
local_fast = "qwen3.5:latest"
local_embed = "nomic-embed-text"
local_judge = "qwen3.5:latest"
local_review = "qwen3.5:latest"
# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
cloud_judge = "kimi-k2.6:cloud"
cloud_review = "qwen3-coder:480b"
cloud_strong = "deepseek-v3.2"
# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
frontier_review = "openrouter/anthropic/claude-opus-4-7"
frontier_arch = "openrouter/moonshotai/kimi-k2-0905"
frontier_strong = "openrouter/openai/gpt-5"
frontier_free = "opencode/claude-opus-4-7"
# Local-hot-path eligible — matrix.downgrade bypass list.
weak_models = ["qwen3.5:latest", "qwen3:latest"]