# Lakehouse-Go config — G0 dev defaults. Overrides via env are a # G1+ concern; for G0 edit this file and restart the affected service. # G0 dev ports — shifted to 3110+ so the Go services run alongside # the live Rust lakehouse on 3100/3201-3204 without colliding. G5 # (demo cutover) flips gateway back to 3100 when Rust retires. [gateway] bind = "127.0.0.1:3110" storaged_url = "http://127.0.0.1:3211" catalogd_url = "http://127.0.0.1:3212" ingestd_url = "http://127.0.0.1:3213" queryd_url = "http://127.0.0.1:3214" vectord_url = "http://127.0.0.1:3215" embedd_url = "http://127.0.0.1:3216" pathwayd_url = "http://127.0.0.1:3217" matrixd_url = "http://127.0.0.1:3218" observerd_url = "http://127.0.0.1:3219" chatd_url = "http://127.0.0.1:3220" [storaged] bind = "127.0.0.1:3211" [catalogd] bind = "127.0.0.1:3212" storaged_url = "http://127.0.0.1:3211" [ingestd] bind = "127.0.0.1:3213" storaged_url = "http://127.0.0.1:3211" catalogd_url = "http://127.0.0.1:3212" # CSV uploads are ~4-6× the resulting Parquet. 256 MiB cap keeps the in-memory # parse + Arrow + Parquet output footprint bounded. Bump for known large # datasets (e.g. workers_500k → 344 MiB CSV needs 512 MiB). max_ingest_bytes = 268435456 [vectord] bind = "127.0.0.1:3215" # Optional — set to empty string to disable persistence (dev/test). storaged_url = "http://127.0.0.1:3211" [embedd] bind = "127.0.0.1:3216" # G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing # this URL + the wire format inside the provider. provider_url = "http://localhost:11434" default_model = "nomic-embed-text-v2-moe" [queryd] bind = "127.0.0.1:3214" catalogd_url = "http://127.0.0.1:3212" secrets_path = "/etc/lakehouse/secrets-go.toml" refresh_every = "30s" [pathwayd] bind = "127.0.0.1:3217" # Empty = in-memory only (dev/test). Production sets a path under # /var/lib/lakehouse/pathway/state.jsonl so traces survive restart. persist_path = "" [matrixd] bind = "127.0.0.1:3218" # matrixd calls embedd (query-text → vector) and vectord (per-corpus # search) directly. Localhost defaults; in distributed deployments # these point at the gateway's upstream addresses. embedd_url = "http://127.0.0.1:3216" vectord_url = "http://127.0.0.1:3215" [observerd] bind = "127.0.0.1:3219" # Empty = in-memory only (dev/test). Production sets a path under # /var/lib/lakehouse/observer/ops.jsonl so ops survive restart. persist_path = "" [chatd] # LLM chat dispatcher (Phase 4). Routes /v1/chat to the right provider # based on model name prefix or :cloud suffix: # ollama/ → local Ollama (no auth) # ollama_cloud/ → Ollama Cloud # :cloud → Ollama Cloud (suffix variant) # openrouter// → OpenRouter # opencode/ → OpenCode (Zen+Go unified) # kimi/ → Kimi For Coding # bare names (e.g. qwen3.5:latest) → local Ollama (default) bind = "127.0.0.1:3220" ollama_url = "http://localhost:11434" # Per-provider key resolution: env var first, then .env file fallback. # Empty file path skips the file lookup. systemd EnvironmentFile= # loads these natively, so service runtime sees the env vars. ollama_cloud_key_env = "OLLAMA_CLOUD_KEY" openrouter_key_env = "OPENROUTER_API_KEY" opencode_key_env = "OPENCODE_API_KEY" kimi_key_env = "KIMI_API_KEY" ollama_cloud_key_file = "/etc/lakehouse/ollama_cloud.env" openrouter_key_file = "/etc/lakehouse/openrouter.env" opencode_key_file = "/etc/lakehouse/opencode.env" kimi_key_file = "/etc/lakehouse/kimi.env" # Per-call timeout (seconds). Cloud reasoning models can take >60s # for long prompts, so 180 is the default. timeout_secs = 180 [s3] endpoint = "http://localhost:9000" region = "us-east-1" bucket = "lakehouse-go-primary" # G0 dedicated bucket so Rust + Go coexist access_key_id = "" # populated by SecretsProvider from /etc/lakehouse/secrets-go.toml secret_access_key = "" # ditto use_path_style = true [log] level = "info" # Model tier registry — names map to actual model IDs per the small- # model pipeline architecture (project_small_model_pipeline_vision.md). # Bumping a model means editing one line here, not hunting through code. # # Tier philosophy: # - local_* : on-box Ollama. Inner-loop hot path. Repeated calls. # - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier. # - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call. # Reserved for blockers and full-scope reviews. # # weak_models is the codified "local-hot-path eligible" list that the # matrix.downgrade gate reads. A model in this list bypasses the # strong-model downgrade rule (it's already weak — no need to downgrade # corpora further). [models] # Tier 1 — local hot path local_fast = "qwen3.5:latest" local_embed = "nomic-embed-text-v2-moe" # 475M MoE, drop-in upgrade from 137M v1 — verified 2026-04-30 same 768-dim # local_judge stays on qwen2.5:latest — qwen3.5:latest is a vision-SSM # build with 256K context that runs ~30s per judge call against the # playbook_lift loop (verified 2026-04-30). qwen2.5:latest at ~1s/call # is 30× faster and held lift theory across the 21-query reality test # (7/8 lift, 87.5%). The 8de94eb "bump qwen2.5 → qwen3.5" was a casual # version-up; this revert is workload-specific. local_judge = "qwen2.5:latest" local_review = "qwen3.5:latest" # Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken; # deepseek/kimi-k2.6/qwen3-coder are the working primaries. cloud_judge = "kimi-k2.6:cloud" cloud_review = "qwen3-coder:480b" cloud_strong = "deepseek-v3.2" # Tier 3 — frontier. Use sparingly; rate-limited + per-call billing. frontier_review = "openrouter/anthropic/claude-opus-4-7" frontier_arch = "openrouter/moonshotai/kimi-k2-0905" frontier_strong = "openrouter/openai/gpt-5" frontier_free = "opencode/claude-opus-4-7" # Local-hot-path eligible — matrix.downgrade bypass list. weak_models = ["qwen3.5:latest", "qwen3:latest"]