Compare commits
No commits in common. "4fd560cad6176549f70f9a73e28a2a1bc9d77316" and "77a3dcf266173a9cba642e915c3028b19b84d6ae" have entirely different histories.
4fd560cad6
...
77a3dcf266
@ -2,48 +2,31 @@
|
|||||||
# scripts/cutover/start_go_stack.sh
|
# scripts/cutover/start_go_stack.sh
|
||||||
#
|
#
|
||||||
# Bring up the full Go stack persistently — alongside the live Rust
|
# Bring up the full Go stack persistently — alongside the live Rust
|
||||||
# gateway on :3100 + alongside the harness-transient stacks the
|
# gateway on :3100. All Go daemons land on the parallel port range
|
||||||
# smokes spin up. All Go daemons land on the parallel port range
|
# :3110 + :3211-:3220 so there's no port collision.
|
||||||
# :3110 + :3211-:3220 (no collision with Rust on :3100). Persistent
|
|
||||||
# daemons run under DIFFERENT BINARY NAMES (bin/persistent-*) and
|
|
||||||
# write to a SEPARATE MinIO BUCKET (lakehouse-go-persistent) so the
|
|
||||||
# pre-push smoke chain — which uses anchored `pkill -f "bin/(name)$"`
|
|
||||||
# teardown + reads from `lakehouse-go-primary` — can run without
|
|
||||||
# tearing down or polluting our long-running state.
|
|
||||||
#
|
#
|
||||||
# Three isolation layers (the third was added 2026-05-01 after the
|
# Unlike playbook_lift.sh's transient harness boot (which kills the
|
||||||
# first push test exposed a port-collision bug — smoke catalogd
|
# stack on exit), this script starts every daemon detached via nohup
|
||||||
# failed to bind :3212 because persistent catalogd already had it,
|
# + disown. Operators run it once at boot or after a restart; the
|
||||||
# but smoke's poll_health 3212 succeeded responding to the
|
# stack stays up until a `pkill -f "bin/(name)"` or reboot.
|
||||||
# persistent daemon, and the smoke happily proceeded talking to
|
|
||||||
# the persistent stack with the wrong bucket expectations):
|
|
||||||
#
|
|
||||||
# 1. BINARY NAMES — persistent stack runs via symlinks
|
|
||||||
# bin/persistent-<name> → bin/<name>. Smoke pkill pattern
|
|
||||||
# `bin/(storaged|...|gateway)$` matches `bin/<name>$` substrings;
|
|
||||||
# `bin/persistent-<name>` doesn't match because the slash is
|
|
||||||
# followed by 'p', not the daemon-name first letter.
|
|
||||||
# 2. MINIO BUCKETS — persistent stack uses lakehouse-go-persistent;
|
|
||||||
# smoke harnesses use lakehouse-go-primary. Different buckets
|
|
||||||
# mean rehydrate paths can't see each other's `_vectors/*`
|
|
||||||
# persistence files.
|
|
||||||
# 3. PORTS — persistent stack uses :4110 + :4211-:4219 (gateway +
|
|
||||||
# upstreams). Smoke harness uses :3110 + :3211-:3219. Both
|
|
||||||
# reach for the SAME chatd at :3220 because chatd is
|
|
||||||
# read-mostly (LLM dispatch, no persistent state to clobber)
|
|
||||||
# and operators don't want to maintain two LLM provider key
|
|
||||||
# sets. The temp toml at /tmp/lakehouse-persistent.toml
|
|
||||||
# overrides bucket + bind ports + upstream URLs (except chatd).
|
|
||||||
#
|
#
|
||||||
# Logs land in /tmp/gostack-logs/<bin>.log (one per daemon).
|
# Logs land in /tmp/gostack-logs/<bin>.log (one per daemon).
|
||||||
#
|
#
|
||||||
# Used to bring up the persistent stack 2026-05-01 — the first time
|
# Used to bring up the persistent stack 2026-05-01 — the first time
|
||||||
# the Go side has run as long-running daemons rather than per-harness
|
# the Go side has run as long-running daemons rather than per-harness
|
||||||
# transient processes. The 2-isolation-layer split was added the
|
# transient processes.
|
||||||
# same day after the pre-push gate caught a smoke-vs-persistent
|
#
|
||||||
# collision (g1p_smoke saw count=2 when expecting count=1 because
|
# KNOWN CONSTRAINT: the pre-push smoke chain (`just verify` →
|
||||||
# vectord's MinIO bucket had both the smoke's persist_demo AND the
|
# scripts/{d,g}*_smoke.sh) uses the SAME anchored `pkill -f
|
||||||
# persistent stack's workers index).
|
# "bin/(name)$"` pattern this script does, and ALSO matches our
|
||||||
|
# persistent daemons by name. Pushing while the persistent stack
|
||||||
|
# is up will kill 7 of 11 daemons (gateway, storaged, catalogd,
|
||||||
|
# ingestd, queryd, embedd, vectord; the smokes don't reach for
|
||||||
|
# pathwayd/observerd/matrixd/chatd). Workaround: re-run this
|
||||||
|
# script after every push. A proper fix is to give the persistent
|
||||||
|
# stack a different binary name (e.g. via build tags or a
|
||||||
|
# wrapper symlink) so smoke-side pkill doesn't see it; deferred
|
||||||
|
# until the trigger fires (i.e. when an operator gets bitten).
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -54,54 +37,10 @@ if [ ! -d bin ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── Layer 1: symlink-based binary names ─────────────────────────────
|
# Ensure no leftover from a transient harness run. Anchored pattern
|
||||||
# Create bin/persistent-* symlinks to bin/* so the persistent stack
|
# per feedback_pkill_scope; never bare `bin/`.
|
||||||
# has distinct cmdline strings that smoke pkill won't match. Idempotent
|
echo "[gostack] killing any stale Go daemons (anchored pkill)"
|
||||||
# (existing symlinks are left alone).
|
pkill -f "bin/(storaged|catalogd|ingestd|queryd|embedd|vectord|pathwayd|observerd|matrixd|gateway)$" 2>/dev/null || true
|
||||||
|
|
||||||
DAEMONS=(storaged catalogd ingestd queryd embedd vectord pathwayd observerd matrixd gateway)
|
|
||||||
|
|
||||||
for d in "${DAEMONS[@]}"; do
|
|
||||||
target="bin/persistent-$d"
|
|
||||||
if [ ! -L "$target" ] && [ ! -e "$target" ]; then
|
|
||||||
ln -s "$d" "$target"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# ── Layer 2: separate MinIO bucket via temp config ──────────────────
|
|
||||||
# Generate /tmp/lakehouse-persistent.toml from the canonical
|
|
||||||
# lakehouse.toml with [s3].bucket overridden. Caller can override the
|
|
||||||
# bucket name via LH_PERSISTENT_BUCKET env var.
|
|
||||||
|
|
||||||
PERSISTENT_BUCKET="${LH_PERSISTENT_BUCKET:-lakehouse-go-persistent}"
|
|
||||||
TEMP_TOML=/tmp/lakehouse-persistent.toml
|
|
||||||
|
|
||||||
# Create the bucket if missing. mc is idempotent with --ignore-existing.
|
|
||||||
if command -v mc >/dev/null 2>&1; then
|
|
||||||
mc mb --ignore-existing "local/$PERSISTENT_BUCKET" >/dev/null 2>&1 || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
# sed-replace the bucket line + port range. Anchored to specific
|
|
||||||
# substrings so accidental matches don't fire. chatd's :3220 stays
|
|
||||||
# unchanged (read-mostly LLM dispatch, no persistent state).
|
|
||||||
sed -e "s/lakehouse-go-primary/$PERSISTENT_BUCKET/g" \
|
|
||||||
-e 's|127\.0\.0\.1:3110|127.0.0.1:4110|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3211|127.0.0.1:4211|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3212|127.0.0.1:4212|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3213|127.0.0.1:4213|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3214|127.0.0.1:4214|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3215|127.0.0.1:4215|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3216|127.0.0.1:4216|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3217|127.0.0.1:4217|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3218|127.0.0.1:4218|g' \
|
|
||||||
-e 's|127\.0\.0\.1:3219|127.0.0.1:4219|g' \
|
|
||||||
lakehouse.toml > "$TEMP_TOML"
|
|
||||||
echo "[gostack] config: $TEMP_TOML (bucket=$PERSISTENT_BUCKET, ports=4110+4211-4219)"
|
|
||||||
|
|
||||||
# ── Cleanup any prior persistent daemons ────────────────────────────
|
|
||||||
# Match by the persistent- prefix so smoke processes are untouched.
|
|
||||||
echo "[gostack] killing any stale persistent Go daemons (anchored on persistent-)"
|
|
||||||
pkill -f "bin/persistent-(storaged|catalogd|ingestd|queryd|embedd|vectord|pathwayd|observerd|matrixd|gateway)$" 2>/dev/null || true
|
|
||||||
sleep 0.5
|
sleep 0.5
|
||||||
|
|
||||||
mkdir -p /tmp/gostack-logs
|
mkdir -p /tmp/gostack-logs
|
||||||
@ -109,58 +48,46 @@ mkdir -p /tmp/gostack-logs
|
|||||||
start() {
|
start() {
|
||||||
local bin="$1"
|
local bin="$1"
|
||||||
local port="$2"
|
local port="$2"
|
||||||
local log="/tmp/gostack-logs/persistent-$bin.log"
|
local log="/tmp/gostack-logs/$bin.log"
|
||||||
nohup ./bin/persistent-"$bin" -config "$TEMP_TOML" > "$log" 2>&1 & disown
|
nohup ./bin/"$bin" -config lakehouse.toml > "$log" 2>&1 & disown
|
||||||
for _ in $(seq 1 50); do
|
for _ in $(seq 1 50); do
|
||||||
if curl -sSf -m 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then
|
if curl -sSf -m 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then
|
||||||
echo " persistent-$bin :$port up (log: $log)"
|
echo " $bin :$port up (log: $log)"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
done
|
done
|
||||||
echo " persistent-$bin :$port FAILED — log tail:"
|
echo " $bin :$port FAILED — log tail:"
|
||||||
tail -20 "$log"
|
tail -20 "$log"
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "[gostack] starting in dependency order (port range :4xxx)"
|
echo "[gostack] starting in dependency order"
|
||||||
start storaged 4211
|
start storaged 3211
|
||||||
start catalogd 4212
|
start catalogd 3212
|
||||||
start ingestd 4213
|
start ingestd 3213
|
||||||
start queryd 4214
|
start queryd 3214
|
||||||
start embedd 4216
|
start embedd 3216
|
||||||
start vectord 4215
|
start vectord 3215
|
||||||
start pathwayd 4217
|
start pathwayd 3217
|
||||||
start observerd 4219
|
start observerd 3219
|
||||||
start matrixd 4218
|
start matrixd 3218
|
||||||
start gateway 4110
|
start gateway 3110
|
||||||
|
|
||||||
# chatd is started independently — its provider key files come from
|
# chatd is started independently — its provider key files come from
|
||||||
# /etc/lakehouse/{ollama_cloud,openrouter,opencode,kimi}.env; if
|
# /etc/lakehouse/{ollama_cloud,openrouter,opencode,kimi}.env; if
|
||||||
# chatd is already up (long-running from a prior session) we don't
|
# chatd is already up (long-running from a prior session) we don't
|
||||||
# touch it. chatd uses no S3, so no temp-toml override needed.
|
# touch it.
|
||||||
if ! curl -sSf -m 1 http://127.0.0.1:3220/health >/dev/null 2>&1; then
|
if ! curl -sSf -m 1 http://127.0.0.1:3220/health >/dev/null 2>&1; then
|
||||||
echo "[gostack] chatd :3220 not up; starting"
|
echo "[gostack] chatd :3220 not up; starting"
|
||||||
nohup ./bin/chatd -config lakehouse.toml > /tmp/gostack-logs/chatd.log 2>&1 & disown
|
start chatd 3220
|
||||||
for _ in $(seq 1 50); do
|
|
||||||
if curl -sSf -m 1 "http://127.0.0.1:3220/health" >/dev/null 2>&1; then
|
|
||||||
echo " chatd :3220 up"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 0.1
|
|
||||||
done
|
|
||||||
else
|
else
|
||||||
echo " chatd :3220 already up (skipping)"
|
echo " chatd :3220 already up (skipping)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "[gostack] ready · sweep:"
|
echo "[gostack] ready · sweep:"
|
||||||
for p in 4110 4211 4212 4213 4214 4215 4216 4217 4218 4219 3220; do
|
for p in 3110 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220; do
|
||||||
curl -sSf -m 1 "http://127.0.0.1:$p/health" 2>/dev/null | head -c 80
|
curl -sSf -m 1 "http://127.0.0.1:$p/health" 2>/dev/null | head -c 80
|
||||||
echo
|
echo
|
||||||
done
|
done
|
||||||
echo
|
|
||||||
echo "[gostack] persistent stack: ports :4110+:4211-:4219 · bucket=$PERSISTENT_BUCKET"
|
|
||||||
echo "[gostack] smoke harnesses: ports :3110+:3211-:3219 · bucket=lakehouse-go-primary"
|
|
||||||
echo "[gostack] shared: chatd at :3220 (read-mostly LLM dispatch)"
|
|
||||||
echo "[gostack] tear down via: pkill -f 'bin/persistent-'"
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user