matrix-agent-validated/lakehouse.toml

# Lakehouse Configuration

[gateway]
host = "0.0.0.0"
port = 3100

[storage]
root = "./data"
profile_root = "./data/_profiles"
rescue_bucket = "rescue"

[[storage.buckets]]
name = "primary"
backend = "local"
root = "./data"

[[storage.buckets]]
name = "rescue"
backend = "local"
root = "./data/_rescue"

[[storage.buckets]]
name = "testing"
backend = "local"
root = "./data/_testing"

# S3 bucket via MinIO. The name "s3:lakehouse" is the convention
# lance_backend.rs uses to emit s3:// URIs for Lance datasets.
# Credentials resolved via environment (AWS_ACCESS_KEY_ID etc) or
# the secrets provider.
[[storage.buckets]]
name = "s3:lakehouse"
backend = "s3"
bucket = "lakehouse"
endpoint = "http://localhost:9000"
region = "us-east-1"
secret_ref = "minio-lakehouse"

[catalog]
# Manifests persisted to object storage under this prefix
manifest_prefix = "_catalog/manifests"

[query]
# max_rows_per_query = 10000

[sidecar]
url = "http://localhost:3200"

[ai]
embed_model = "nomic-embed-text"
gen_model = "qwen2.5"
rerank_model = "qwen2.5"

[auth]
enabled = false
# api_key = "changeme"

[observability]
# Export traces to stdout (set to "otlp" for OpenTelemetry collector)
exporter = "stdout"
service_name = "lakehouse"

[agent]
# Phase 16.2 — background autotune agent. Opt-in: set enabled = true to
# let the agent continuously propose + trial HNSW configs and auto-promote
# winners. Defaults are conservative so it stays out of the way of live
# search traffic on shared Ollama.
enabled = true
cycle_interval_secs = 120                 # periodic wake if no triggers
cooldown_between_trials_secs = 10         # min gap between trials
min_recall = 0.9                          # never promote below this
max_trials_per_hour = 20                  # hard budget cap

# Model roster — available for profile hot-swap
# qwen3: 8.2B, 40K context, thinking+tools, best for reasoning tasks
# qwen2.5: 7B, 8K context, fast, good for SQL generation
# mistral: 7B, 8K context, good for general generation
# nomic-embed-text: 137M, embedding-only, used by all profiles