golangLAKEHOUSE/lakehouse.toml
root f9e72412c1 validatord: /v1/validate + /v1/iterate HTTP surface (port 3221)
Closes the last "Go primary" backlog item in
docs/ARCHITECTURE_COMPARISON.md. Go now owns the entire validator
path end-to-end — no Rust dep for staffing safety net.

Architecture: cmd/validatord on :3221 hosts both endpoints. Calls
chatd directly for the iterate loop's LLM hop (no gateway
self-loopback like the Rust shape). Gateway proxies /v1/validate +
/v1/iterate to validatord.

What's in:
- internal/validator/playbook.go — 3rd validator kind (PRD checks:
  fill: prefix, endorsed_names ≤ target_count×2, fingerprint required)
- internal/validator/lookup_jsonl.go — JSONL roster loader (Parquet
  deferred; producer one-liner documented in package comment)
- internal/validator/iterate.go — ExtractJSON helper + Iterate
  orchestrator with ChatCaller seam for unit tests
- cmd/validatord/main.go — HTTP routes, roster load, chat client
- internal/shared/config.go — ValidatordConfig + gateway URL field
- lakehouse.toml — [validatord] section
- cmd/gateway/main.go — proxy routes for /v1/validate + /v1/iterate

Smoke: 5/5 PASS through gateway :3110:
  ✓ playbook happy path
  ✓ playbook missing fingerprint → 422 schema/fingerprint
  ✓ phantom candidate W-PHANTOM → 422 consistency
  ✓ unknown kind → 400
  ✓ roster loaded with 3 records

go test ./... green across 33 packages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:53:20 -05:00

175 lines
6.8 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Lakehouse-Go config — G0 dev defaults. Overrides via env are a
# G1+ concern; for G0 edit this file and restart the affected service.
# G0 dev ports — shifted to 3110+ so the Go services run alongside
# the live Rust lakehouse on 3100/3201-3204 without colliding. G5
# (demo cutover) flips gateway back to 3100 when Rust retires.
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
observerd_url = "http://127.0.0.1:3219"
chatd_url = "http://127.0.0.1:3220"
validatord_url = "http://127.0.0.1:3221"
[storaged]
bind = "127.0.0.1:3211"
[catalogd]
bind = "127.0.0.1:3212"
storaged_url = "http://127.0.0.1:3211"
[ingestd]
bind = "127.0.0.1:3213"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
# CSV uploads are ~4-6× the resulting Parquet. 256 MiB cap keeps the in-memory
# parse + Arrow + Parquet output footprint bounded. Bump for known large
# datasets (e.g. workers_500k → 344 MiB CSV needs 512 MiB).
max_ingest_bytes = 268435456
[vectord]
bind = "127.0.0.1:3215"
# Optional — set to empty string to disable persistence (dev/test).
storaged_url = "http://127.0.0.1:3211"
[embedd]
bind = "127.0.0.1:3216"
# G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing
# this URL + the wire format inside the provider.
provider_url = "http://localhost:11434"
default_model = "nomic-embed-text-v2-moe"
[queryd]
bind = "127.0.0.1:3214"
catalogd_url = "http://127.0.0.1:3212"
secrets_path = "/etc/lakehouse/secrets-go.toml"
refresh_every = "30s"
[pathwayd]
bind = "127.0.0.1:3217"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/pathway/state.jsonl so traces survive restart.
persist_path = ""
[matrixd]
bind = "127.0.0.1:3218"
# matrixd calls embedd (query-text → vector) and vectord (per-corpus
# search) directly. Localhost defaults; in distributed deployments
# these point at the gateway's upstream addresses.
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
[observerd]
bind = "127.0.0.1:3219"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
persist_path = ""
[chatd]
# LLM chat dispatcher (Phase 4). Routes /v1/chat to the right provider
# based on model name prefix or :cloud suffix:
# ollama/<model> → local Ollama (no auth)
# ollama_cloud/<model> → Ollama Cloud
# <model>:cloud → Ollama Cloud (suffix variant)
# openrouter/<vendor>/<m> → OpenRouter
# opencode/<model> → OpenCode (Zen+Go unified)
# kimi/<model> → Kimi For Coding
# bare names (e.g. qwen3.5:latest) → local Ollama (default)
bind = "127.0.0.1:3220"
ollama_url = "http://localhost:11434"
# Per-provider key resolution: env var first, then .env file fallback.
# Empty file path skips the file lookup. systemd EnvironmentFile=
# loads these natively, so service runtime sees the env vars.
ollama_cloud_key_env = "OLLAMA_CLOUD_KEY"
openrouter_key_env = "OPENROUTER_API_KEY"
opencode_key_env = "OPENCODE_API_KEY"
kimi_key_env = "KIMI_API_KEY"
ollama_cloud_key_file = "/etc/lakehouse/ollama_cloud.env"
openrouter_key_file = "/etc/lakehouse/openrouter.env"
opencode_key_file = "/etc/lakehouse/opencode.env"
kimi_key_file = "/etc/lakehouse/kimi.env"
# Per-call timeout (seconds). Cloud reasoning models can take >60s
# for long prompts, so 180 is the default.
timeout_secs = 180
[validatord]
# Production-validator network surface (Phase 43 PRD parity).
# Hosts /validate (FillValidator + EmailValidator + PlaybookValidator)
# and /iterate (generate→validate→correct loop).
bind = "127.0.0.1:3221"
chatd_url = "http://127.0.0.1:3220"
# Roster of valid workers. Empty = no roster — worker-existence checks
# all fail Consistency (correct fail-closed posture). Production points
# at a path regenerated from workers_500k.parquet on a schedule:
# roster_path = "/var/lib/lakehouse/validator/roster.jsonl"
roster_path = ""
# Per-call cap on the iteration loop (Phase 43 default: 3).
default_max_iterations = 3
# Per-call cap on chat hop max_tokens.
default_max_tokens = 4096
# Chat hop timeout (seconds). 240s tolerates frontier reasoning models.
chat_timeout_secs = 240
[s3]
endpoint = "http://localhost:9000"
region = "us-east-1"
bucket = "lakehouse-go-primary" # G0 dedicated bucket so Rust + Go coexist
access_key_id = "" # populated by SecretsProvider from /etc/lakehouse/secrets-go.toml
secret_access_key = "" # ditto
use_path_style = true
[log]
level = "info"
# Model tier registry — names map to actual model IDs per the small-
# model pipeline architecture (project_small_model_pipeline_vision.md).
# Bumping a model means editing one line here, not hunting through code.
#
# Tier philosophy:
# - local_* : on-box Ollama. Inner-loop hot path. Repeated calls.
# - cloud_* : Ollama Cloud (Pro plan). Larger context, fail-up tier.
# - frontier_* : OpenRouter / OpenCode. Rate-limited, billed per call.
# Reserved for blockers and full-scope reviews.
#
# weak_models is the codified "local-hot-path eligible" list that the
# matrix.downgrade gate reads. A model in this list bypasses the
# strong-model downgrade rule (it's already weak — no need to downgrade
# corpora further).
[models]
# Tier 1 — local hot path
local_fast = "qwen3.5:latest"
local_embed = "nomic-embed-text-v2-moe" # 475M MoE, drop-in upgrade from 137M v1 — verified 2026-04-30 same 768-dim
# local_judge stays on qwen2.5:latest — qwen3.5:latest is a vision-SSM
# build with 256K context that runs ~30s per judge call against the
# playbook_lift loop (verified 2026-04-30). qwen2.5:latest at ~1s/call
# is 30× faster and held lift theory across the 21-query reality test
# (7/8 lift, 87.5%). The 8de94eb "bump qwen2.5 → qwen3.5" was a casual
# version-up; this revert is workload-specific.
local_judge = "qwen2.5:latest"
local_review = "qwen3.5:latest"
# Tier 2 — Ollama Cloud (Pro). kimi-k2:1t still upstream-broken;
# deepseek/kimi-k2.6/qwen3-coder are the working primaries.
cloud_judge = "kimi-k2.6:cloud"
cloud_review = "qwen3-coder:480b"
cloud_strong = "deepseek-v3.2"
# Tier 3 — frontier. Use sparingly; rate-limited + per-call billing.
frontier_review = "openrouter/anthropic/claude-opus-4-7"
frontier_arch = "openrouter/moonshotai/kimi-k2-0905"
frontier_strong = "openrouter/openai/gpt-5"
frontier_free = "opencode/claude-opus-4-7"
# Local-hot-path eligible — matrix.downgrade bypass list.
weak_models = ["qwen3.5:latest", "qwen3:latest"]