Codifies the small-model-pipeline tiering (per project_small_model_pipeline_vision.md) in lakehouse.toml [models] section. Tier names map to actual model IDs; bumping a model means editing one line, not hunting through code. Tier philosophy: - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. weak_models is the codified "local-hot-path eligible" list — phase 2 will migrate matrix.downgrade to read it instead of hardcoding. Defaults reflect 2026-04-29 architecture: qwen3.5:latest as local (stronger than qwen2.5, same JSON-clean property), kimi-k2.6 as cloud judge (kimi-k2:1t still upstream-broken), opus-4-7 + kimi-k2-0905 as frontier review/arch via OpenRouter, opencode/claude-opus-4-7 as frontier_free leveraging the OpenCode subscription. 3 new tests in internal/shared/config_test.go: - TestDefaultConfig_ModelsTier — locks tier defaults - TestModelsConfig_IsWeak — weak-bypass list - TestLoadConfig_ModelsTOMLRoundTrip — override semantics just verify PASS (g2 had one flake on first run — Ollama transfer truncation; clean on retry, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
119 lines
4.2 KiB
TOML
119 lines
4.2 KiB
TOML
# Lakehouse-Go config — G0 dev defaults. Overrides via env are a
|
||
# G1+ concern; for G0 edit this file and restart the affected service.
|
||
|
||
# G0 dev ports — shifted to 3110+ so the Go services run alongside
|
||
# the live Rust lakehouse on 3100/3201-3204 without colliding. G5
|
||
# (demo cutover) flips gateway back to 3100 when Rust retires.
|
||
[gateway]
|
||
bind = "127.0.0.1:3110"
|
||
storaged_url = "http://127.0.0.1:3211"
|
||
catalogd_url = "http://127.0.0.1:3212"
|
||
ingestd_url = "http://127.0.0.1:3213"
|
||
queryd_url = "http://127.0.0.1:3214"
|
||
vectord_url = "http://127.0.0.1:3215"
|
||
embedd_url = "http://127.0.0.1:3216"
|
||
pathwayd_url = "http://127.0.0.1:3217"
|
||
matrixd_url = "http://127.0.0.1:3218"
|
||
observerd_url = "http://127.0.0.1:3219"
|
||
|
||
[storaged]
|
||
bind = "127.0.0.1:3211"
|
||
|
||
[catalogd]
|
||
bind = "127.0.0.1:3212"
|
||
storaged_url = "http://127.0.0.1:3211"
|
||
|
||
[ingestd]
|
||
bind = "127.0.0.1:3213"
|
||
storaged_url = "http://127.0.0.1:3211"
|
||
catalogd_url = "http://127.0.0.1:3212"
|
||
# CSV uploads are ~4-6× the resulting Parquet. 256 MiB cap keeps the in-memory
|
||
# parse + Arrow + Parquet output footprint bounded. Bump for known large
|
||
# datasets (e.g. workers_500k → 344 MiB CSV needs 512 MiB).
|
||
max_ingest_bytes = 268435456
|
||
|
||
[vectord]
|
||
bind = "127.0.0.1:3215"
|
||
# Optional — set to empty string to disable persistence (dev/test).
|
||
storaged_url = "http://127.0.0.1:3211"
|
||
|
||
[embedd]
|
||
bind = "127.0.0.1:3216"
|
||
# G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing
|
||
# this URL + the wire format inside the provider.
|
||
provider_url = "http://localhost:11434"
|
||
default_model = "nomic-embed-text"
|
||
|
||
[queryd]
|
||
bind = "127.0.0.1:3214"
|
||
catalogd_url = "http://127.0.0.1:3212"
|
||
secrets_path = "/etc/lakehouse/secrets-go.toml"
|
||
refresh_every = "30s"
|
||
|
||
[pathwayd]
|
||
bind = "127.0.0.1:3217"
|
||
# Empty = in-memory only (dev/test). Production sets a path under
|
||
# /var/lib/lakehouse/pathway/state.jsonl so traces survive restart.
|
||
persist_path = ""
|
||
|
||
[matrixd]
|
||
bind = "127.0.0.1:3218"
|
||
# matrixd calls embedd (query-text → vector) and vectord (per-corpus
|
||
# search) directly. Localhost defaults; in distributed deployments
|
||
# these point at the gateway's upstream addresses.
|
||
embedd_url = "http://127.0.0.1:3216"
|
||
vectord_url = "http://127.0.0.1:3215"
|
||
|
||
[observerd]
|
||
bind = "127.0.0.1:3219"
|
||
# Empty = in-memory only (dev/test). Production sets a path under
|
||
# /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
|
||
persist_path = ""
|
||
|
||
[s3]
|
||
endpoint = "http://localhost:9000"
|
||
region = "us-east-1"
|
||
bucket = "lakehouse-go-primary" # G0 dedicated bucket so Rust + Go coexist
|
||
access_key_id = "" # populated by SecretsProvider from /etc/lakehouse/secrets-go.toml
|
||
secret_access_key = "" # ditto
|
||
use_path_style = true
|
||
|
||
[log]
|
||
level = "info"
|
||
|
||
# Model tier registry — names map to actual model IDs per the small-
|
||
# model pipeline architecture (project_small_model_pipeline_vision.md).
|
||
# Bumping a model means editing one line here, not hunting through code.
|
||
#
|
||
# Tier philosophy:
|
||
# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls.
|
||
# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier.
|
||
# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
|
||
# Reserved for blockers and full-scope reviews.
|
||
#
|
||
# weak_models is the codified "local-hot-path eligible" list that the
|
||
# matrix.downgrade gate reads. A model in this list bypasses the
|
||
# strong-model downgrade rule (it's already weak — no need to downgrade
|
||
# corpora further).
|
||
[models]
|
||
# Tier 1 — local hot path
|
||
local_fast = "qwen3.5:latest"
|
||
local_embed = "nomic-embed-text"
|
||
local_judge = "qwen3.5:latest"
|
||
local_review = "qwen3.5:latest"
|
||
|
||
# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
|
||
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
|
||
cloud_judge = "kimi-k2.6:cloud"
|
||
cloud_review = "qwen3-coder:480b"
|
||
cloud_strong = "deepseek-v3.2"
|
||
|
||
# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
|
||
frontier_review = "openrouter/anthropic/claude-opus-4-7"
|
||
frontier_arch = "openrouter/moonshotai/kimi-k2-0905"
|
||
frontier_strong = "openrouter/openai/gpt-5"
|
||
frontier_free = "opencode/claude-opus-4-7"
|
||
|
||
# Local-hot-path eligible — matrix.downgrade bypass list.
|
||
weak_models = ["qwen3.5:latest", "qwen3:latest"]
|