#!/usr/bin/env bash # scripts/cutover/start_go_stack.sh # # Bring up the full Go stack persistently — alongside the live Rust # gateway on :3100 + alongside the harness-transient stacks the # smokes spin up. All Go daemons land on the parallel port range # :3110 + :3211-:3220 (no collision with Rust on :3100). Persistent # daemons run under DIFFERENT BINARY NAMES (bin/persistent-*) and # write to a SEPARATE MinIO BUCKET (lakehouse-go-persistent) so the # pre-push smoke chain — which uses anchored `pkill -f "bin/(name)$"` # teardown + reads from `lakehouse-go-primary` — can run without # tearing down or polluting our long-running state. # # Three isolation layers (the third was added 2026-05-01 after the # first push test exposed a port-collision bug — smoke catalogd # failed to bind :3212 because persistent catalogd already had it, # but smoke's poll_health 3212 succeeded responding to the # persistent daemon, and the smoke happily proceeded talking to # the persistent stack with the wrong bucket expectations): # # 1. BINARY NAMES — persistent stack runs via symlinks # bin/persistent- → bin/. Smoke pkill pattern # `bin/(storaged|...|gateway)$` matches `bin/$` substrings; # `bin/persistent-` doesn't match because the slash is # followed by 'p', not the daemon-name first letter. # 2. MINIO BUCKETS — persistent stack uses lakehouse-go-persistent; # smoke harnesses use lakehouse-go-primary. Different buckets # mean rehydrate paths can't see each other's `_vectors/*` # persistence files. # 3. PORTS — persistent stack uses :4110 + :4211-:4219 (gateway + # upstreams). Smoke harness uses :3110 + :3211-:3219. Both # reach for the SAME chatd at :3220 because chatd is # read-mostly (LLM dispatch, no persistent state to clobber) # and operators don't want to maintain two LLM provider key # sets. The temp toml at /tmp/lakehouse-persistent.toml # overrides bucket + bind ports + upstream URLs (except chatd). # # Logs land in /tmp/gostack-logs/.log (one per daemon). # # Used to bring up the persistent stack 2026-05-01 — the first time # the Go side has run as long-running daemons rather than per-harness # transient processes. The 2-isolation-layer split was added the # same day after the pre-push gate caught a smoke-vs-persistent # collision (g1p_smoke saw count=2 when expecting count=1 because # vectord's MinIO bucket had both the smoke's persist_demo AND the # persistent stack's workers index). set -euo pipefail cd "$(dirname "$0")/../.." if [ ! -d bin ]; then echo "[gostack] bin/ missing — run 'just build' first" >&2 exit 1 fi # ── Layer 1: symlink-based binary names ───────────────────────────── # Create bin/persistent-* symlinks to bin/* so the persistent stack # has distinct cmdline strings that smoke pkill won't match. Idempotent # (existing symlinks are left alone). DAEMONS=(storaged catalogd ingestd queryd embedd vectord pathwayd observerd matrixd gateway) for d in "${DAEMONS[@]}"; do target="bin/persistent-$d" if [ ! -L "$target" ] && [ ! -e "$target" ]; then ln -s "$d" "$target" fi done # ── Layer 2: separate MinIO bucket via temp config ────────────────── # Generate /tmp/lakehouse-persistent.toml from the canonical # lakehouse.toml with [s3].bucket overridden. Caller can override the # bucket name via LH_PERSISTENT_BUCKET env var. PERSISTENT_BUCKET="${LH_PERSISTENT_BUCKET:-lakehouse-go-persistent}" TEMP_TOML=/tmp/lakehouse-persistent.toml # Create the bucket if missing. mc is idempotent with --ignore-existing. if command -v mc >/dev/null 2>&1; then mc mb --ignore-existing "local/$PERSISTENT_BUCKET" >/dev/null 2>&1 || true fi # sed-replace the bucket line + port range. Anchored to specific # substrings so accidental matches don't fire. chatd's :3220 stays # unchanged (read-mostly LLM dispatch, no persistent state). sed -e "s/lakehouse-go-primary/$PERSISTENT_BUCKET/g" \ -e 's|127\.0\.0\.1:3110|127.0.0.1:4110|g' \ -e 's|127\.0\.0\.1:3211|127.0.0.1:4211|g' \ -e 's|127\.0\.0\.1:3212|127.0.0.1:4212|g' \ -e 's|127\.0\.0\.1:3213|127.0.0.1:4213|g' \ -e 's|127\.0\.0\.1:3214|127.0.0.1:4214|g' \ -e 's|127\.0\.0\.1:3215|127.0.0.1:4215|g' \ -e 's|127\.0\.0\.1:3216|127.0.0.1:4216|g' \ -e 's|127\.0\.0\.1:3217|127.0.0.1:4217|g' \ -e 's|127\.0\.0\.1:3218|127.0.0.1:4218|g' \ -e 's|127\.0\.0\.1:3219|127.0.0.1:4219|g' \ lakehouse.toml > "$TEMP_TOML" echo "[gostack] config: $TEMP_TOML (bucket=$PERSISTENT_BUCKET, ports=4110+4211-4219)" # ── Cleanup any prior persistent daemons ──────────────────────────── # Match by the persistent- prefix so smoke processes are untouched. echo "[gostack] killing any stale persistent Go daemons (anchored on persistent-)" pkill -f "bin/persistent-(storaged|catalogd|ingestd|queryd|embedd|vectord|pathwayd|observerd|matrixd|gateway)$" 2>/dev/null || true sleep 0.5 mkdir -p /tmp/gostack-logs start() { local bin="$1" local port="$2" local log="/tmp/gostack-logs/persistent-$bin.log" nohup ./bin/persistent-"$bin" -config "$TEMP_TOML" > "$log" 2>&1 & disown for _ in $(seq 1 50); do if curl -sSf -m 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then echo " persistent-$bin :$port up (log: $log)" return 0 fi sleep 0.1 done echo " persistent-$bin :$port FAILED — log tail:" tail -20 "$log" return 1 } echo "[gostack] starting in dependency order (port range :4xxx)" start storaged 4211 start catalogd 4212 start ingestd 4213 start queryd 4214 start embedd 4216 start vectord 4215 start pathwayd 4217 start observerd 4219 start matrixd 4218 start gateway 4110 # chatd is started independently — its provider key files come from # /etc/lakehouse/{ollama_cloud,openrouter,opencode,kimi}.env; if # chatd is already up (long-running from a prior session) we don't # touch it. chatd uses no S3, so no temp-toml override needed. if ! curl -sSf -m 1 http://127.0.0.1:3220/health >/dev/null 2>&1; then echo "[gostack] chatd :3220 not up; starting" nohup ./bin/chatd -config lakehouse.toml > /tmp/gostack-logs/chatd.log 2>&1 & disown for _ in $(seq 1 50); do if curl -sSf -m 1 "http://127.0.0.1:3220/health" >/dev/null 2>&1; then echo " chatd :3220 up" break fi sleep 0.1 done else echo " chatd :3220 already up (skipping)" fi echo echo "[gostack] ready · sweep:" for p in 4110 4211 4212 4213 4214 4215 4216 4217 4218 4219 3220; do curl -sSf -m 1 "http://127.0.0.1:$p/health" 2>/dev/null | head -c 80 echo done echo echo "[gostack] persistent stack: ports :4110+:4211-:4219 · bucket=$PERSISTENT_BUCKET" echo "[gostack] smoke harnesses: ports :3110+:3211-:3219 · bucket=lakehouse-go-primary" echo "[gostack] shared: chatd at :3220 (read-mostly LLM dispatch)" echo "[gostack] tear down via: pkill -f 'bin/persistent-'"