root 22c0b42e96 config: mark unwired ModelsConfig tier fields as scaffolding
Surfaced during today's local-only audit. The ModelsConfig struct
parses 11 tier fields from lakehouse.toml (LocalFast/LocalEmbed/
LocalJudge/LocalReview/CloudJudge/CloudReview/CloudStrong/
FrontierReview/FrontierArch/FrontierStrong/FrontierFree) and
exposes Resolve(tier) → model. As of 2026-05-03, NO code calls
Resolve(). Operators setting these in lakehouse.toml see no runtime
effect — silent config drift.

Not removing the fields (would silently swallow operator's existing
TOML and make it harder to wire later). Adding a clear UNWIRED
warning comment instead.

WeakModels IS live (consumed by internal/workflow/modes.go +
internal/matrix/downgrade.go) — split out with its own comment so
it's not lumped with the dead fields.

If a future commit wires up Resolve() consumers, replace the
UNWIRED comment with the consumer reference.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 02:54:10 -05:00

520 lines
20 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package shared also provides the TOML config loader. Per ADR
// equivalent of Rust ADR-006 (TOML config over env vars), every
// service reads `lakehouse.toml` with sane defaults and env
// overrides. Config is hot-reload-unaware in G0; reload-on-SIGHUP
// is a G1+ concern.
package shared
import (
"errors"
"fmt"
"io/fs"
"log/slog"
"os"
"github.com/pelletier/go-toml/v2"
)
// Config is the unified Lakehouse config. Each service reads only
// the section it cares about, but they all share the same file so
// operators have one place to look.
type Config struct {
Gateway GatewayConfig `toml:"gateway"`
Storaged ServiceConfig `toml:"storaged"`
Catalogd CatalogConfig `toml:"catalogd"`
Ingestd IngestConfig `toml:"ingestd"`
Queryd QuerydConfig `toml:"queryd"`
Vectord VectordConfig `toml:"vectord"`
Embedd EmbeddConfig `toml:"embedd"`
Pathwayd PathwaydConfig `toml:"pathwayd"`
Matrixd MatrixdConfig `toml:"matrixd"`
Observerd ObserverdConfig `toml:"observerd"`
Chatd ChatdConfig `toml:"chatd"`
Validatord ValidatordConfig `toml:"validatord"`
S3 S3Config `toml:"s3"`
Models ModelsConfig `toml:"models"`
Log LogConfig `toml:"log"`
Auth AuthConfig `toml:"auth"`
}
// IngestConfig adds ingestd-specific knobs. ingestd needs to PUT
// parquet to storaged AND register manifests with catalogd, so it
// holds two upstream URLs in addition to its own bind.
//
// MaxIngestBytes caps the multipart body size. CSVs are typically
// 4-6× larger than the resulting Snappy-compressed Parquet, so 256
// MiB CSV → ~50 MiB Parquet — well under storaged's 256 MiB PUT
// cap. Real-scale validation (2026-04-29) showed 500K workers ×
// 18 cols = 344 MiB CSV → 71 MiB Parquet; bumping this knob to
// 512 MiB is the documented path for that workload.
type IngestConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
CatalogdURL string `toml:"catalogd_url"`
MaxIngestBytes int64 `toml:"max_ingest_bytes"`
}
// GatewayConfig adds the upstream URLs the reverse proxy fronts.
// Each route family (/v1/storage, /v1/catalog, /v1/ingest, /v1/sql,
// /v1/vectors, /v1/embed, /v1/pathway, /v1/matrix, /v1/observer)
// has its own upstream so we can scale services independently or
// move them to different boxes without touching gateway code.
type GatewayConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
CatalogdURL string `toml:"catalogd_url"`
IngestdURL string `toml:"ingestd_url"`
QuerydURL string `toml:"queryd_url"`
VectordURL string `toml:"vectord_url"`
EmbeddURL string `toml:"embedd_url"`
PathwaydURL string `toml:"pathwayd_url"`
MatrixdURL string `toml:"matrixd_url"`
ObserverdURL string `toml:"observerd_url"`
ChatdURL string `toml:"chatd_url"`
ValidatordURL string `toml:"validatord_url"`
}
// EmbeddConfig drives the embed service. ProviderURL points at the
// embedding backend (Ollama in G2, possibly OpenAI/Voyage in G3+).
// DefaultModel is what gets used when callers don't specify a
// model in their request body. CacheSize is the LRU cache cap on
// (model, sha256(text)) → vector lookups; 0 disables caching.
// Default 10000 entries ≈ 30 MiB at d=768.
type EmbeddConfig struct {
Bind string `toml:"bind"`
ProviderURL string `toml:"provider_url"`
DefaultModel string `toml:"default_model"`
CacheSize int `toml:"cache_size"`
}
// VectordConfig adds vectord-specific knobs. StoragedURL is
// optional — empty string disables persistence, useful for ephemeral
// dev or test runs. When set, indexes Save after every state change
// and Load on startup.
type VectordConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
}
// PathwaydConfig drives the pathway-memory service (cmd/pathwayd).
// PersistPath: file path to the JSONL log; empty = in-memory only
// (test/dev). Production sets a stable path under /var/lib/lakehouse
// or similar so traces survive restart.
type PathwaydConfig struct {
Bind string `toml:"bind"`
PersistPath string `toml:"persist_path"`
}
// MatrixdConfig drives the matrix-indexer service (cmd/matrixd).
// Per docs/SPEC.md §3.4: multi-corpus retrieve+merge over vectord
// with embed-via-embedd for query text. Both upstream URLs are
// required — matrixd has no in-process fallback.
type MatrixdConfig struct {
Bind string `toml:"bind"`
EmbeddURL string `toml:"embedd_url"`
VectordURL string `toml:"vectord_url"`
}
// ChatdConfig drives the chat dispatcher service (cmd/chatd) — Phase 4.
// Routes /v1/chat to the right provider based on model-name prefix
// or :cloud suffix. Per-provider API keys come from env vars (or
// /etc/lakehouse/<provider>.env files); empty keys leave the provider
// unregistered so requests for that provider 404 cleanly.
//
// OllamaURL is the local Ollama upstream (no auth). Empty disables
// the local Ollama provider — useful in deployments that don't run
// Ollama on the box (cloud-only operation).
type ChatdConfig struct {
Bind string `toml:"bind"`
OllamaURL string `toml:"ollama_url"`
// API-key env var names. Default to the conventional names.
// Operators can rename these for environments using different env
// var conventions, but the defaults match /etc/lakehouse/*.env
// files that systemd already loads.
OllamaCloudKeyEnv string `toml:"ollama_cloud_key_env"`
OpenRouterKeyEnv string `toml:"openrouter_key_env"`
OpenCodeKeyEnv string `toml:"opencode_key_env"`
KimiKeyEnv string `toml:"kimi_key_env"`
// Optional .env file paths — a fallback when the env var isn't set
// in the process environment. Same keys, "KEY=value" lines.
OllamaCloudKeyFile string `toml:"ollama_cloud_key_file"`
OpenRouterKeyFile string `toml:"openrouter_key_file"`
OpenCodeKeyFile string `toml:"opencode_key_file"`
KimiKeyFile string `toml:"kimi_key_file"`
// Per-call timeout in seconds. 0 = 180s default.
TimeoutSecs int `toml:"timeout_secs"`
}
// ValidatordConfig drives the validator service (cmd/validatord).
// Hosts /validate (FillValidator + EmailValidator + PlaybookValidator)
// and /iterate (generate→validate→correct loop). Routes to chatd via
// ChatdURL for the iteration loop's LLM hops.
//
// RosterPath points at a JSONL roster (one WorkerRecord per line) that
// FillValidator and EmailValidator use for worker-existence checks.
// Empty disables the roster — worker-existence checks all fail
// Consistency, which is the correct behavior when the roster isn't
// configured. Production sets a stable path under /var/lib/lakehouse/.
type ValidatordConfig struct {
Bind string `toml:"bind"`
ChatdURL string `toml:"chatd_url"`
RosterPath string `toml:"roster_path"`
// Per-call cap on the iteration loop. 0 = 3 (Phase 43 default).
DefaultMaxIterations int `toml:"default_max_iterations"`
// Per-call cap on chat hop max_tokens. 0 = 4096.
DefaultMaxTokens int `toml:"default_max_tokens"`
// Per-call timeout for the chat hop in seconds. 0 = 240s.
ChatTimeoutSecs int `toml:"chat_timeout_secs"`
// SessionLogPath: where to append SessionRecord JSONL rows for
// offline analysis (DuckDB queries, scrum review tooling). Empty
// = disabled. Production sets a stable path under
// /var/lib/lakehouse/validator/sessions.jsonl. Append-only,
// best-effort; see internal/validator/session_log.go.
SessionLogPath string `toml:"session_log_path"`
}
// ObserverdConfig drives the observer service (cmd/observerd).
// PersistPath: file path to the JSONL ops log; empty = in-memory
// only (test/dev). Production sets a stable path under
// /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
// Mirrors the PathwaydConfig pattern.
type ObserverdConfig struct {
Bind string `toml:"bind"`
PersistPath string `toml:"persist_path"`
}
// QuerydConfig adds queryd-specific knobs. queryd talks DuckDB
// directly to MinIO via DuckDB's httpfs extension (so no storaged
// URL needed), and reads the catalog over HTTP for view registration.
// SecretsPath defaults to /etc/lakehouse/secrets-go.toml — the same
// file storaged uses, since both services need the S3 credentials.
type QuerydConfig struct {
Bind string `toml:"bind"`
CatalogdURL string `toml:"catalogd_url"`
SecretsPath string `toml:"secrets_path"`
RefreshEvery string `toml:"refresh_every"` // duration string, e.g. "30s"
}
// CatalogConfig adds catalogd-specific knobs on top of the standard
// bind. StoragedURL points at the storaged service for manifest
// persistence; G0 defaults to the localhost bind.
type CatalogConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
}
// ServiceConfig is the per-binary bind config. Default Bind ""
// means "use the service's hardcoded G0 default" — see DefaultConfig.
type ServiceConfig struct {
Bind string `toml:"bind"`
}
// S3Config holds S3-compatible storage settings. Endpoint blank →
// AWS default. Bucket "" → "lakehouse-primary".
type S3Config struct {
Endpoint string `toml:"endpoint"`
Region string `toml:"region"`
Bucket string `toml:"bucket"`
AccessKeyID string `toml:"access_key_id"`
SecretAccessKey string `toml:"secret_access_key"`
UsePathStyle bool `toml:"use_path_style"`
}
// LogConfig — slog level for now; structured fields land G1+.
type LogConfig struct {
Level string `toml:"level"`
}
// ModelsConfig names the models used by each tier of the small-model
// pipeline (per project_small_model_pipeline_vision.md and the Rust
// `config/providers.toml` convention). Callers reference tier names,
// not literal model IDs — bumping a tier means editing this file, not
// hunting through code.
//
// Tier philosophy:
// - local_* : on-box Ollama. Cheap, fast, JSON-clean. Inner-loop
// hot path. Repeated calls per query.
// - cloud_* : Ollama Cloud (Pro plan). Larger context, called when
// local is uncertain. Auth via OLLAMA_CLOUD_KEY.
// - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
// Reserved for blockers and full-scope reviews, not
// steady state.
//
// WeakModels is the codified "local-hot-path eligible" list that the
// matrix downgrade gate consults — replacing the previous hardcoded
// switch in internal/matrix/downgrade.go. A model in this list bypasses
// the corpus-downgrade rule: it's already weak, no need to downgrade
// further. Strong (paid / cloud / frontier) models trigger the gate.
type ModelsConfig struct {
// ⚠ UNWIRED scaffolding — these tier fields parse from TOML and
// Resolve() reads them, but as of 2026-05-03 NO OTHER CODE calls
// Resolve(). Setting these in lakehouse.toml has no runtime effect
// on routing. Customer hot path uses local Ollama via direct model
// name per PRD line 70 (everything runs locally). If you wire up
// Resolve() consumers, document them here so the next person knows
// the field is live, not dead scaffolding.
LocalFast string `toml:"local_fast"`
LocalEmbed string `toml:"local_embed"`
LocalJudge string `toml:"local_judge"`
LocalReview string `toml:"local_review"`
CloudJudge string `toml:"cloud_judge"`
CloudReview string `toml:"cloud_review"`
CloudStrong string `toml:"cloud_strong"`
FrontierReview string `toml:"frontier_review"`
FrontierArch string `toml:"frontier_arch"`
FrontierStrong string `toml:"frontier_strong"`
FrontierFree string `toml:"frontier_free"`
// WeakModels IS live — internal/workflow/modes.go reads it at
// startup, internal/matrix/downgrade.go uses the list for the
// strong-model auto-downgrade gate. Don't lump with unwired fields.
WeakModels []string `toml:"weak_models"`
}
// Resolve maps a tier name (e.g. "local_judge") to the configured
// model ID. Unknown tier returns "". Callers should fall back to a
// hardcoded default and log a warning when this returns empty — the
// alternative (panic) would make a missing tier crash the binary at
// startup, which is too aggressive for an additive config.
func (m ModelsConfig) Resolve(tier string) string {
switch tier {
case "local_fast":
return m.LocalFast
case "local_embed":
return m.LocalEmbed
case "local_judge":
return m.LocalJudge
case "local_review":
return m.LocalReview
case "cloud_judge":
return m.CloudJudge
case "cloud_review":
return m.CloudReview
case "cloud_strong":
return m.CloudStrong
case "frontier_review":
return m.FrontierReview
case "frontier_arch":
return m.FrontierArch
case "frontier_strong":
return m.FrontierStrong
case "frontier_free":
return m.FrontierFree
default:
return ""
}
}
// IsWeak reports whether `model` is in the configured WeakModels list.
// Used by matrix.downgrade to decide whether to bypass the strong-model
// downgrade gate (weak models stay on the full lakehouse path).
func (m ModelsConfig) IsWeak(model string) bool {
for _, w := range m.WeakModels {
if w == model {
return true
}
}
return false
}
// AuthConfig is the inter-service auth posture from ADR-003.
// Token is a Bearer token; empty means "no auth" (G0 dev mode).
// AllowedIPs is a list of CIDRs (or bare IPs treated as /32);
// empty means "any source IP."
//
// Both layers operate independently when set:
// - Token + AllowedIPs both empty → middleware is a no-op
// - Token only → 401 unless Bearer matches
// - AllowedIPs only → 403 unless r.RemoteAddr in CIDR
// - Both → both gates apply
//
// The startup gate in shared.Run refuses to start with non-loopback
// bind AND empty Token — that's the audit's R-001 + R-007 worst
// case (no auth, world-reachable). LH_<SVC>_ALLOW_NONLOOPBACK=1 still
// bypasses the bind gate for explicit dev cases; the auth gate is
// independent of that bypass and is the real production guard.
type AuthConfig struct {
Token string `toml:"token"`
AllowedIPs []string `toml:"allowed_ips"`
// TokenEnv names an environment variable; LoadConfig populates
// Token from os.Getenv(TokenEnv) when Token is empty. Per ADR-006
// 6.2: production deploys put the secret in /etc/lakehouse/auth.env
// (mode 0600) loaded by systemd EnvironmentFile=, NOT in the
// committed TOML. TokenEnv defaults to "AUTH_TOKEN".
TokenEnv string `toml:"token_env"`
// SecondaryTokens lets operators stage a rotation: both primary
// and any secondary token pass auth during the rotation window.
// After every caller updates, operators promote secondary →
// primary and clear secondary. Per ADR-006 Decision 6.5.
SecondaryTokens []string `toml:"secondary_tokens"`
}
// DefaultConfig returns the G0 dev defaults. Ports are shifted to
// 3110+ to coexist with the live Rust lakehouse on 3100/3201-3204
// during the migration. G5 cutover flips gateway back to 3100.
func DefaultConfig() Config {
return Config{
Gateway: GatewayConfig{
Bind: "127.0.0.1:3110",
StoragedURL: "http://127.0.0.1:3211",
CatalogdURL: "http://127.0.0.1:3212",
IngestdURL: "http://127.0.0.1:3213",
QuerydURL: "http://127.0.0.1:3214",
VectordURL: "http://127.0.0.1:3215",
EmbeddURL: "http://127.0.0.1:3216",
PathwaydURL: "http://127.0.0.1:3217",
MatrixdURL: "http://127.0.0.1:3218",
ObserverdURL: "http://127.0.0.1:3219",
ChatdURL: "http://127.0.0.1:3220",
ValidatordURL: "http://127.0.0.1:3221",
},
Storaged: ServiceConfig{Bind: "127.0.0.1:3211"},
Catalogd: CatalogConfig{Bind: "127.0.0.1:3212", StoragedURL: "http://127.0.0.1:3211"},
Ingestd: IngestConfig{
Bind: "127.0.0.1:3213",
StoragedURL: "http://127.0.0.1:3211",
CatalogdURL: "http://127.0.0.1:3212",
MaxIngestBytes: 256 << 20, // 256 MiB; bump per deployment via lakehouse.toml
},
Vectord: VectordConfig{
Bind: "127.0.0.1:3215",
StoragedURL: "http://127.0.0.1:3211",
},
Embedd: EmbeddConfig{
Bind: "127.0.0.1:3216",
ProviderURL: "http://localhost:11434", // local Ollama
DefaultModel: "nomic-embed-text",
CacheSize: 10_000, // ~30 MiB at d=768; set to 0 to disable
},
Pathwayd: PathwaydConfig{
Bind: "127.0.0.1:3217",
// PersistPath empty by default = in-memory only. Production
// sets to e.g. /var/lib/lakehouse/pathway/state.jsonl.
},
Matrixd: MatrixdConfig{
Bind: "127.0.0.1:3218",
EmbeddURL: "http://127.0.0.1:3216",
VectordURL: "http://127.0.0.1:3215",
},
Observerd: ObserverdConfig{
Bind: "127.0.0.1:3219",
// PersistPath empty by default = in-memory only.
},
Validatord: ValidatordConfig{
Bind: "127.0.0.1:3221",
ChatdURL: "http://127.0.0.1:3220",
RosterPath: "", // empty = no roster; worker-existence checks fail Consistency
DefaultMaxIterations: 3,
DefaultMaxTokens: 4096,
ChatTimeoutSecs: 240,
SessionLogPath: "", // empty = no session JSONL. Operators set under /var/lib/lakehouse/validator/sessions.jsonl.
},
Chatd: ChatdConfig{
Bind: "127.0.0.1:3220",
OllamaURL: "http://localhost:11434",
OllamaCloudKeyEnv: "OLLAMA_CLOUD_KEY",
OpenRouterKeyEnv: "OPENROUTER_API_KEY",
OpenCodeKeyEnv: "OPENCODE_API_KEY",
KimiKeyEnv: "KIMI_API_KEY",
OllamaCloudKeyFile: "/etc/lakehouse/ollama_cloud.env",
OpenRouterKeyFile: "/etc/lakehouse/openrouter.env",
OpenCodeKeyFile: "/etc/lakehouse/opencode.env",
KimiKeyFile: "/etc/lakehouse/kimi.env",
TimeoutSecs: 180,
},
Queryd: QuerydConfig{
Bind: "127.0.0.1:3214",
CatalogdURL: "http://127.0.0.1:3212",
SecretsPath: "/etc/lakehouse/secrets-go.toml",
RefreshEvery: "30s",
},
S3: S3Config{
Endpoint: "http://localhost:9000",
Region: "us-east-1",
Bucket: "lakehouse-primary",
UsePathStyle: true,
},
Models: ModelsConfig{
// Tier 1 — local hot path. JSON-clean, fast, deterministic.
// qwen3.5:latest replaces qwen2.5 as the local default per
// 2026-04-29 architectural review (stronger local model,
// same JSON-clean property).
LocalFast: "qwen3.5:latest",
LocalEmbed: "nomic-embed-text",
LocalJudge: "qwen3.5:latest",
LocalReview: "qwen3.5:latest",
// Tier 2 — Ollama Cloud (Pro plan). 2026-04-28 upgrade.
// kimi-k2:1t is upstream-broken; deepseek/kimi-k2.6/qwen3-coder
// are the working primaries.
CloudJudge: "kimi-k2.6:cloud",
CloudReview: "qwen3-coder:480b",
CloudStrong: "deepseek-v3.2",
// Tier 3 — frontier. OpenRouter credits + OpenCode key.
// Use sparingly: rate-limited, billed per call.
FrontierReview: "openrouter/anthropic/claude-opus-4-7",
FrontierArch: "openrouter/moonshotai/kimi-k2-0905",
FrontierStrong: "openrouter/openai/gpt-5",
FrontierFree: "opencode/claude-opus-4-7",
// Local-hot-path eligible. matrix.downgrade reads this
// list to decide whether to bypass the strong-model gate.
WeakModels: []string{"qwen3.5:latest", "qwen3:latest"},
},
Log: LogConfig{Level: "info"},
}
}
// LoadConfig reads `lakehouse.toml` from path; if path is empty or
// the file doesn't exist, returns DefaultConfig. Any decode error is
// fatal (we don't want a misconfigured service silently falling back
// to defaults — that's the kind of bug you find at 2am).
//
// Per Opus + Qwen WARN #3: when path WAS given but the file is
// missing, log a warning so silent default-fallback doesn't hide
// misconfiguration. Empty path is fine (caller didn't ask for a
// file); non-empty + missing is suspicious.
func LoadConfig(path string) (Config, error) {
cfg := DefaultConfig()
if path == "" {
return cfg, nil
}
b, err := os.ReadFile(path)
if errors.Is(err, fs.ErrNotExist) {
slog.Warn("config file not found, using defaults",
"path", path,
"hint", "create the file or pass -config /path/to/lakehouse.toml")
return cfg, nil
}
if err != nil {
return cfg, fmt.Errorf("read config: %w", err)
}
if err := toml.Unmarshal(b, &cfg); err != nil {
return cfg, fmt.Errorf("parse config: %w", err)
}
resolveAuthFromEnv(&cfg.Auth)
return cfg, nil
}
// resolveAuthFromEnv populates cfg.Auth.Token from os.Getenv(TokenEnv)
// when Token is empty. Per ADR-006 Decision 6.2: production deploys
// keep the secret in /etc/lakehouse/auth.env (mode 0600), loaded by
// systemd EnvironmentFile=, never in the committed TOML.
//
// TokenEnv defaults to "AUTH_TOKEN" so operators don't have to
// configure both — setting AUTH_TOKEN env is enough.
func resolveAuthFromEnv(auth *AuthConfig) {
envName := auth.TokenEnv
if envName == "" {
envName = "AUTH_TOKEN"
}
if auth.Token == "" {
if v := os.Getenv(envName); v != "" {
auth.Token = v
}
}
}