diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..314f758 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,37 @@ +# Match what `go build` actually needs; everything else bloats the +# build context and slows COPY . . to a crawl. + +bin/ +out/ +.git/ +.github/ +.idea/ +.vscode/ +*.swp +*.swo + +# Reality test outputs — regenerable, not needed in the image. +reports/ +data/ + +# Per-run smoke logs in /tmp obviously don't end up here, but defensive. +*.log + +# Local config + secrets — must NEVER ship in an image. +lakehouse.toml +secrets-go.toml +auth.env +*.env + +# Editor swap files +.DS_Store +Thumbs.db + +# Existing systemd unit files don't belong in the container — the +# image deploys via docker-compose, not systemd-inside-container. +deploy/systemd/ + +# Reality-test JSON corpora are large + the multi_coord_stress +# binary will reference them as files, but they get mounted at +# runtime, not baked in. +tests/reality/contracts/*.json diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7b6f71c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,89 @@ +# syntax=docker/dockerfile:1.6 +# +# Multi-stage Dockerfile for Lakehouse-Go. +# +# Single image carries all 11 daemon binaries; docker-compose runs +# one container per daemon (matches the systemd unit topology in +# deploy/systemd/). Operators can also `docker run lakehouse-go +# /usr/local/bin/lakehouse/` to invoke any one daemon +# directly. +# +# Builder uses golang:1.25-bookworm (DuckDB cgo needs gcc + glibc; +# alpine's musl doesn't link the official duckdb-go bindings cleanly). +# Runtime is debian:bookworm-slim — same libc, much smaller surface. +# +# Build: +# docker build -t lakehouse-go:latest . +# Or with a tag: +# docker build -t lakehouse-go:$(git rev-parse --short HEAD) . + +# ── Stage 1: builder ──────────────────────────────────────────── +FROM golang:1.25-bookworm AS builder + +# build-essential pulls gcc + make + libc-dev — DuckDB cgo needs all three. +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /src + +# Copy go.mod + go.sum first so module download is cacheable across +# source-only changes. +COPY go.mod go.sum ./ +RUN go mod download + +# Source. +COPY . . + +# Build all 11 daemon binaries + the staffing_workers script (used +# by the multi_coord_stress harness; ships in the same image so +# operators can run reality tests against a deployed stack). +RUN go build -trimpath -o /out/ \ + ./cmd/storaged ./cmd/catalogd ./cmd/ingestd ./cmd/queryd \ + ./cmd/embedd ./cmd/vectord ./cmd/pathwayd ./cmd/observerd \ + ./cmd/matrixd ./cmd/gateway ./cmd/chatd \ + ./scripts/staffing_workers ./scripts/playbook_lift ./scripts/multi_coord_stress + +# ── Stage 2: runtime ──────────────────────────────────────────── +FROM debian:bookworm-slim + +# CA certs for outbound HTTPS (Ollama Cloud, OpenRouter, OpenCode, +# Kimi). curl + jq for in-container health checks + smoke probes. +# tini handles PID 1 signal forwarding so docker stop sends SIGTERM +# to the actual daemon, not just to a wrapper. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + jq \ + tini \ + && rm -rf /var/lib/apt/lists/* + +# Non-root runtime user — same name as the systemd User= directive +# in deploy/systemd/, so file ownership stays consistent across +# deployment modes (docker-compose vs systemd). +RUN groupadd --system --gid 999 lakehouse \ + && useradd --system --uid 999 --gid 999 \ + --no-create-home --shell /usr/sbin/nologin lakehouse + +# Layout matches /usr/local/bin/lakehouse/ from REPLICATION.md +# so docs apply equally to systemd + docker deployments. +COPY --from=builder /out/* /usr/local/bin/lakehouse/ + +# /var/lib/lakehouse for pathway/observer JSONLs; /var/log/lakehouse +# in case operators want file logs in addition to docker logs. +RUN mkdir -p /var/lib/lakehouse/pathway /var/lib/lakehouse/observer /var/log/lakehouse \ + && chown -R lakehouse:lakehouse /var/lib/lakehouse /var/log/lakehouse + +USER lakehouse +WORKDIR /var/lib/lakehouse + +# No default CMD — operators (or docker-compose) MUST specify which +# daemon. Forces explicit topology rather than implicit "run +# everything in one container." +ENTRYPOINT ["/usr/bin/tini", "--"] + +# Default healthcheck targets gateway's port. Per-service compose +# overrides land per their own port. +HEALTHCHECK --interval=10s --timeout=2s --start-period=5s --retries=3 \ + CMD curl -sSf http://127.0.0.1:3110/health || exit 1 diff --git a/REPLICATION.md b/REPLICATION.md index 2b80d71..5432190 100644 --- a/REPLICATION.md +++ b/REPLICATION.md @@ -149,6 +149,60 @@ sudo sed -i "s|^AUTH_SECONDARY_TOKEN=.*|AUTH_SECONDARY_TOKEN=|" /etc/lakehouse/a sudo systemctl restart lakehouse-go.target ``` +## Docker / docker-compose deploy (alternative to systemd) + +The single-image `Dockerfile` carries all 11 daemons; `docker-compose.yml` +runs one container per daemon with the same dependency graph as the +systemd units. Useful when the host doesn't have systemd (Mac dev +boxes, remote VMs without root) or when you want all of Lakehouse-Go +isolated to a private docker network. + +```bash +# Build the image (multi-stage; ~3 min on first build, ~30s with +# cached go module download). +docker build -t lakehouse-go:latest . + +# Place config + secrets next to docker-compose.yml. The compose file +# bind-mounts these into every container at /etc/lakehouse/. +cp lakehouse.toml lakehouse.toml # already in repo; edit if needed +cp deploy/etc-lakehouse/secrets-go.toml.example secrets-go.toml +chmod 0600 secrets-go.toml +cp deploy/etc-lakehouse/auth.env.example auth.env +chmod 0600 auth.env +# Per-provider chatd keys (each its own file so missing == provider +# unregistered, NOT chatd startup failure): +for p in ollama_cloud openrouter opencode kimi; do + echo "${p^^}_API_KEY=" > $p.env + chmod 0600 $p.env +done + +# $EDITOR each file to fill in real values... + +# Bring up the stack. +docker compose up -d +docker compose ps # all 11 services Healthy +docker compose logs -f gateway + +# Validate via the gateway like the systemd path. +curl -sS http://127.0.0.1:3110/v1/chat/providers | jq + +# Tear down. +docker compose down +# State volume (pathway/observer JSONLs) survives `down`. To wipe: +docker compose down -v +``` + +### Key docker-vs-systemd differences + +| Concern | systemd | docker-compose | +|---|---|---| +| Process supervision | systemd | tini + docker daemon | +| Logs | journald | `docker logs` (or routed to a sink via logging driver) | +| Restarts on failure | `Restart=on-failure` | `restart: unless-stopped` | +| File ownership | `User=lakehouse` (uid varies) | `user: 999:999` (uid is fixed in the image) | +| Reaches MinIO/Ollama | host network | host's address from inside the bridge network — typically `host.docker.internal` (Mac/Win) or `172.17.0.1` (Linux). Set `[s3].endpoint` + `[embedd].provider_url` accordingly. | +| Backup target | `/var/lib/lakehouse/` on host | the `lakehouse-state` named volume; bind to a host path via the commented-out `driver_opts` in compose if needed | + ## Logs systemd routes everything to journald with per-daemon SyslogIdentifier: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b65deb9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,180 @@ +# Lakehouse-Go — 11-daemon docker-compose deployment. +# +# Same dependency graph as deploy/systemd/, mapped to Compose's +# `depends_on` + healthchecks. Operators MUST provide: +# - lakehouse.toml mounted at /etc/lakehouse/lakehouse.toml +# - secrets-go.toml mounted at /etc/lakehouse/secrets-go.toml +# (mode 0600 on host) +# - auth.env mounted at /etc/lakehouse/auth.env (per ADR-006 — +# required if any service binds non-loopback) +# - MinIO + Ollama reachable from the lakehouse network +# +# Bring up: docker compose up -d +# Tear down: docker compose down +# Logs: docker compose logs -f gateway +# Status: docker compose ps + +x-lakehouse-base: &lakehouse-base + image: lakehouse-go:latest + restart: unless-stopped + networks: [lakehouse] + user: "999:999" # matches the lakehouse user baked into the image + volumes: + - ./lakehouse.toml:/etc/lakehouse/lakehouse.toml:ro + - lakehouse-state:/var/lib/lakehouse + env_file: + - ./auth.env + +x-healthcheck: &lakehouse-healthcheck + test: ["CMD", "curl", "-sSf", "http://127.0.0.1:$$PORT/health"] + interval: 10s + timeout: 2s + start_period: 5s + retries: 3 + +services: + + storaged: + <<: *lakehouse-base + container_name: lakehouse-storaged + command: ["/usr/local/bin/lakehouse/storaged", "-config", "/etc/lakehouse/lakehouse.toml", "-secrets", "/etc/lakehouse/secrets-go.toml"] + ports: ["3211:3211"] + volumes: + - ./lakehouse.toml:/etc/lakehouse/lakehouse.toml:ro + - ./secrets-go.toml:/etc/lakehouse/secrets-go.toml:ro + - lakehouse-state:/var/lib/lakehouse + environment: [PORT=3211] + healthcheck: *lakehouse-healthcheck + + catalogd: + <<: *lakehouse-base + container_name: lakehouse-catalogd + command: ["/usr/local/bin/lakehouse/catalogd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3212:3212"] + environment: [PORT=3212] + healthcheck: *lakehouse-healthcheck + depends_on: + storaged: { condition: service_healthy } + + ingestd: + <<: *lakehouse-base + container_name: lakehouse-ingestd + command: ["/usr/local/bin/lakehouse/ingestd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3213:3213"] + environment: [PORT=3213] + healthcheck: *lakehouse-healthcheck + depends_on: + storaged: { condition: service_healthy } + catalogd: { condition: service_healthy } + + queryd: + <<: *lakehouse-base + container_name: lakehouse-queryd + command: ["/usr/local/bin/lakehouse/queryd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3214:3214"] + volumes: + - ./lakehouse.toml:/etc/lakehouse/lakehouse.toml:ro + - ./secrets-go.toml:/etc/lakehouse/secrets-go.toml:ro + - lakehouse-state:/var/lib/lakehouse + environment: [PORT=3214] + healthcheck: *lakehouse-healthcheck + depends_on: + catalogd: { condition: service_healthy } + + vectord: + <<: *lakehouse-base + container_name: lakehouse-vectord + command: ["/usr/local/bin/lakehouse/vectord", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3215:3215"] + environment: [PORT=3215] + healthcheck: *lakehouse-healthcheck + + embedd: + <<: *lakehouse-base + container_name: lakehouse-embedd + command: ["/usr/local/bin/lakehouse/embedd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3216:3216"] + environment: [PORT=3216] + healthcheck: *lakehouse-healthcheck + # No depends_on — Ollama is operator infra, not a compose service. + # embedd surfaces unreachable Ollama as 502 at request time. + + pathwayd: + <<: *lakehouse-base + container_name: lakehouse-pathwayd + command: ["/usr/local/bin/lakehouse/pathwayd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3217:3217"] + environment: [PORT=3217] + healthcheck: *lakehouse-healthcheck + + observerd: + <<: *lakehouse-base + container_name: lakehouse-observerd + command: ["/usr/local/bin/lakehouse/observerd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3219:3219"] + environment: [PORT=3219] + healthcheck: *lakehouse-healthcheck + + matrixd: + <<: *lakehouse-base + container_name: lakehouse-matrixd + command: ["/usr/local/bin/lakehouse/matrixd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3218:3218"] + environment: [PORT=3218] + healthcheck: *lakehouse-healthcheck + depends_on: + embedd: { condition: service_healthy } + vectord: { condition: service_healthy } + + chatd: + <<: *lakehouse-base + container_name: lakehouse-chatd + command: ["/usr/local/bin/lakehouse/chatd", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3220:3220"] + environment: [PORT=3220] + # chatd's per-provider key files — each as its own env_file so a + # missing file is just "this provider unregistered" not a startup + # failure. Same pattern as the systemd unit's EnvironmentFile=- list. + env_file: + - ./auth.env + - ./ollama_cloud.env + - ./openrouter.env + - ./opencode.env + - ./kimi.env + healthcheck: *lakehouse-healthcheck + + gateway: + <<: *lakehouse-base + container_name: lakehouse-gateway + command: ["/usr/local/bin/lakehouse/gateway", "-config", "/etc/lakehouse/lakehouse.toml"] + ports: ["3110:3110"] + environment: [PORT=3110] + healthcheck: *lakehouse-healthcheck + # Wants= equivalent in compose: depends_on without + # condition: service_healthy, so a single upstream restart + # doesn't cascade-restart the gateway. + depends_on: + - storaged + - catalogd + - ingestd + - queryd + - vectord + - embedd + - pathwayd + - observerd + - matrixd + - chatd + +networks: + lakehouse: + driver: bridge + +volumes: + lakehouse-state: + # /var/lib/lakehouse persisted across container restarts. Bind + # to a host path if backups need to live outside the volume: + # driver: local + # driver_opts: + # type: none + # o: bind + # device: /opt/lakehouse-state