# syntax=docker/dockerfile:1.6 # # Multi-stage Dockerfile for Lakehouse-Go. # # Single image carries all 11 daemon binaries; docker-compose runs # one container per daemon (matches the systemd unit topology in # deploy/systemd/). Operators can also `docker run lakehouse-go # /usr/local/bin/lakehouse/` to invoke any one daemon # directly. # # Builder uses golang:1.25-bookworm (DuckDB cgo needs gcc + glibc; # alpine's musl doesn't link the official duckdb-go bindings cleanly). # Runtime is debian:bookworm-slim — same libc, much smaller surface. # # Build: # docker build -t lakehouse-go:latest . # Or with a tag: # docker build -t lakehouse-go:$(git rev-parse --short HEAD) . # ── Stage 1: builder ──────────────────────────────────────────── FROM golang:1.25-bookworm AS builder # build-essential pulls gcc + make + libc-dev — DuckDB cgo needs all three. RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ ca-certificates \ && rm -rf /var/lib/apt/lists/* WORKDIR /src # Copy go.mod + go.sum first so module download is cacheable across # source-only changes. COPY go.mod go.sum ./ RUN go mod download # Source. COPY . . # Build all 11 daemon binaries + the staffing_workers script (used # by the multi_coord_stress harness; ships in the same image so # operators can run reality tests against a deployed stack). RUN go build -trimpath -o /out/ \ ./cmd/storaged ./cmd/catalogd ./cmd/ingestd ./cmd/queryd \ ./cmd/embedd ./cmd/vectord ./cmd/pathwayd ./cmd/observerd \ ./cmd/matrixd ./cmd/gateway ./cmd/chatd \ ./scripts/staffing_workers ./scripts/playbook_lift ./scripts/multi_coord_stress # ── Stage 2: runtime ──────────────────────────────────────────── FROM debian:bookworm-slim # CA certs for outbound HTTPS (Ollama Cloud, OpenRouter, OpenCode, # Kimi). curl + jq for in-container health checks + smoke probes. # tini handles PID 1 signal forwarding so docker stop sends SIGTERM # to the actual daemon, not just to a wrapper. RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ curl \ jq \ tini \ && rm -rf /var/lib/apt/lists/* # Non-root runtime user — same name as the systemd User= directive # in deploy/systemd/, so file ownership stays consistent across # deployment modes (docker-compose vs systemd). RUN groupadd --system --gid 999 lakehouse \ && useradd --system --uid 999 --gid 999 \ --no-create-home --shell /usr/sbin/nologin lakehouse # Layout matches /usr/local/bin/lakehouse/ from REPLICATION.md # so docs apply equally to systemd + docker deployments. COPY --from=builder /out/* /usr/local/bin/lakehouse/ # /var/lib/lakehouse for pathway/observer JSONLs; /var/log/lakehouse # in case operators want file logs in addition to docker logs. RUN mkdir -p /var/lib/lakehouse/pathway /var/lib/lakehouse/observer /var/log/lakehouse \ && chown -R lakehouse:lakehouse /var/lib/lakehouse /var/log/lakehouse USER lakehouse WORKDIR /var/lib/lakehouse # No default CMD — operators (or docker-compose) MUST specify which # daemon. Forces explicit topology rather than implicit "run # everything in one container." ENTRYPOINT ["/usr/bin/tini", "--"] # Default healthcheck targets gateway's port. Per-service compose # overrides land per their own port. HEALTHCHECK --interval=10s --timeout=2s --start-period=5s --retries=3 \ CMD curl -sSf http://127.0.0.1:3110/health || exit 1