golangLAKEHOUSE/lakehouse.toml

# Lakehouse-Go config — G0 dev defaults. Overrides via env are a
# G1+ concern; for G0 edit this file and restart the affected service.

# G0 dev ports — shifted to 3110+ so the Go services run alongside
# the live Rust lakehouse on 3100/3201-3204 without colliding. G5
# (demo cutover) flips gateway back to 3100 when Rust retires.
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url  = "http://127.0.0.1:3213"
queryd_url   = "http://127.0.0.1:3214"
vectord_url  = "http://127.0.0.1:3215"
embedd_url   = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url  = "http://127.0.0.1:3218"
observerd_url = "http://127.0.0.1:3219"
chatd_url    = "http://127.0.0.1:3220"
validatord_url = "http://127.0.0.1:3221"

[storaged]
bind = "127.0.0.1:3211"

[catalogd]
bind = "127.0.0.1:3212"
storaged_url = "http://127.0.0.1:3211"

[ingestd]
bind = "127.0.0.1:3213"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
# CSV uploads are ~4-6× the resulting Parquet. 256 MiB cap keeps the in-memory
# parse + Arrow + Parquet output footprint bounded. Bump for known large
# datasets (e.g. workers_500k → 344 MiB CSV needs 512 MiB).
max_ingest_bytes = 268435456

[vectord]
bind = "127.0.0.1:3215"
# Optional — set to empty string to disable persistence (dev/test).
storaged_url = "http://127.0.0.1:3211"

[embedd]
bind = "127.0.0.1:3216"
# G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing
# this URL + the wire format inside the provider.
provider_url  = "http://localhost:11434"
default_model = "nomic-embed-text-v2-moe"

[queryd]
bind = "127.0.0.1:3214"
catalogd_url = "http://127.0.0.1:3212"
secrets_path = "/etc/lakehouse/secrets-go.toml"
refresh_every = "30s"

[pathwayd]
bind = "127.0.0.1:3217"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/pathway/state.jsonl so traces survive restart.
persist_path = ""

[matrixd]
bind = "127.0.0.1:3218"
# matrixd calls embedd (query-text → vector) and vectord (per-corpus
# search) directly. Localhost defaults; in distributed deployments
# these point at the gateway's upstream addresses.
embedd_url  = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"

[observerd]
bind = "127.0.0.1:3219"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
persist_path = ""

[chatd]
# LLM chat dispatcher (Phase 4). Routes /v1/chat to the right provider
# based on model name prefix or :cloud suffix:
#   ollama/<model>            → local Ollama (no auth)
#   ollama_cloud/<model>      → Ollama Cloud
#   <model>:cloud             → Ollama Cloud (suffix variant)
#   openrouter/<vendor>/<m>   → OpenRouter
#   opencode/<model>          → OpenCode (Zen+Go unified)
#   kimi/<model>              → Kimi For Coding
#   bare names (e.g. qwen3.5:latest) → local Ollama (default)
bind       = "127.0.0.1:3220"
ollama_url = "http://localhost:11434"

# Per-provider key resolution: env var first, then .env file fallback.
# Empty file path skips the file lookup. systemd EnvironmentFile=
# loads these natively, so service runtime sees the env vars.
ollama_cloud_key_env  = "OLLAMA_CLOUD_KEY"
openrouter_key_env    = "OPENROUTER_API_KEY"
opencode_key_env      = "OPENCODE_API_KEY"
kimi_key_env          = "KIMI_API_KEY"

ollama_cloud_key_file = "/etc/lakehouse/ollama_cloud.env"
openrouter_key_file   = "/etc/lakehouse/openrouter.env"
opencode_key_file     = "/etc/lakehouse/opencode.env"
kimi_key_file         = "/etc/lakehouse/kimi.env"

# Per-call timeout (seconds). Cloud reasoning models can take >60s
# for long prompts, so 180 is the default.
timeout_secs = 180

[validatord]
# Production-validator network surface (Phase 43 PRD parity).
# Hosts /validate (FillValidator + EmailValidator + PlaybookValidator)
# and /iterate (generate→validate→correct loop).
bind     = "127.0.0.1:3221"
chatd_url = "http://127.0.0.1:3220"
# Roster of valid workers. Empty = no roster — worker-existence checks
# all fail Consistency (correct fail-closed posture). Production points
# at a path regenerated from workers_500k.parquet on a schedule:
#   roster_path = "/var/lib/lakehouse/validator/roster.jsonl"
roster_path = ""
# Per-call cap on the iteration loop (Phase 43 default: 3).
default_max_iterations = 3
# Per-call cap on chat hop max_tokens.
default_max_tokens = 4096
# Chat hop timeout (seconds). 240s tolerates frontier reasoning models.
chat_timeout_secs = 240
# Session log: where to append one JSONL row per /v1/iterate session
# for offline DuckDB analysis. Empty = disabled. Production:
#   session_log_path = "/var/lib/lakehouse/validator/sessions.jsonl"
# Each row: schema=session.iterate.v1, session_id (= Langfuse trace_id),
# kind, model, iterations, attempts[], final_verdict, grounded_in_roster,
# duration_ms. See internal/validator/session_log.go.
session_log_path = ""

[s3]
endpoint        = "http://localhost:9000"
region          = "us-east-1"
bucket          = "lakehouse-go-primary"   # G0 dedicated bucket so Rust + Go coexist
access_key_id   = ""                       # populated by SecretsProvider from /etc/lakehouse/secrets-go.toml
secret_access_key = ""                     # ditto
use_path_style  = true

[log]
level = "info"

# Model tier registry — names map to actual model IDs per the small-
# model pipeline architecture (project_small_model_pipeline_vision.md).
# Bumping a model means editing one line here, not hunting through code.
#
# Tier philosophy:
#   - local_*    : on-box Ollama. Inner-loop hot path. Repeated calls.
#   - cloud_*    : Ollama Cloud (Pro plan). Larger context, fail-up tier.
#   - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
#                  Reserved for blockers and full-scope reviews.
#
# weak_models is the codified "local-hot-path eligible" list that the
# matrix.downgrade gate reads. A model in this list bypasses the
# strong-model downgrade rule (it's already weak — no need to downgrade
# corpora further).
[models]
# Tier 1 — local hot path
local_fast    = "qwen3.5:latest"
local_embed   = "nomic-embed-text-v2-moe"  # 475M MoE, drop-in upgrade from 137M v1 — verified 2026-04-30 same 768-dim
# local_judge stays on qwen2.5:latest — qwen3.5:latest is a vision-SSM
# build with 256K context that runs ~30s per judge call against the
# playbook_lift loop (verified 2026-04-30). qwen2.5:latest at ~1s/call
# is 30× faster and held lift theory across the 21-query reality test
# (7/8 lift, 87.5%). The 8de94eb "bump qwen2.5 → qwen3.5" was a casual
# version-up; this revert is workload-specific.
local_judge   = "qwen2.5:latest"
local_review  = "qwen3.5:latest"

# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
cloud_judge   = "kimi-k2.6:cloud"
cloud_review  = "qwen3-coder:480b"
cloud_strong  = "deepseek-v3.2"

# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
frontier_review = "openrouter/anthropic/claude-opus-4-7"
frontier_arch   = "openrouter/moonshotai/kimi-k2-0905"
frontier_strong = "openrouter/openai/gpt-5"
frontier_free   = "opencode/claude-opus-4-7"

# Local-hot-path eligible — matrix.downgrade bypass list.
weak_models = ["qwen3.5:latest", "qwen3:latest"]