From ad1670d36a7dca80165930bc9cf61379e2930d0c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Apr 2026 07:14:57 -0500 Subject: [PATCH] =?UTF-8?q?storaged=20cap=20smoke=20=E2=80=94=20verifies?= =?UTF-8?q?=20ADR-002=20at=20300=20MiB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the "needs heavy integration smoke" follow-up from the ADR-002 commit (423a381). Until now the per-prefix PUT cap was verified only by unit tests + commits' theory; this smoke runs the actual cap path with real bytes. Three assertions, ~2s wall: 1. PUT 300 MiB to _vectors/ → 200 (cap raised to 4 GiB for the vectord persistence prefix). 2. PUT same 300 MiB to datasets/ → 413 (default 256 MiB cap still protects routine traffic). 3. GET _vectors/ → sha256 round-trips (no truncation between cap-raise and S3 multipart streaming). scripts/storaged_cap_smoke.sh Builds storaged + gateway, boots them, generates 300 MiB deterministic /dev/zero payload (sha stable across runs), runs the 3 assertions, cleans up the keys + processes via trap. /dev/zero generation chosen over yes/head pipe — pipefail catches the SIGPIPE from yes when head closes early. just smoke-storaged-cap Wrapper recipe. Outside the main `just verify` chain because 300 MiB payload generation + transfer is MB-heavy. Run after meaningful storaged or vectord-persistence changes. Verified: bash scripts/storaged_cap_smoke.sh — 3/3 PASS · 2s wall just verify — vet + test + 9 smokes still 33s Co-Authored-By: Claude Opus 4.7 (1M context) --- justfile | 6 ++ scripts/storaged_cap_smoke.sh | 127 ++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100755 scripts/storaged_cap_smoke.sh diff --git a/justfile b/justfile index fcb399d..5be6c0c 100644 --- a/justfile +++ b/justfile @@ -59,6 +59,12 @@ smoke day: smoke-g2-fixtures: @bash scripts/g2_smoke_fixtures.sh +# Storaged cap smoke — verifies ADR-002's per-prefix PUT cap at 300 MiB. +# Outside the main chain (MB-heavy, ~5-10s wall); run after meaningful +# storaged or vectord-persistence changes. +smoke-storaged-cap: + @bash scripts/storaged_cap_smoke.sh + # All 9 smokes in dependency order. Halts on first failure. smoke-all: #!/usr/bin/env bash diff --git a/scripts/storaged_cap_smoke.sh b/scripts/storaged_cap_smoke.sh new file mode 100755 index 0000000..94adae5 --- /dev/null +++ b/scripts/storaged_cap_smoke.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +# Storaged cap smoke — verifies ADR-002's per-prefix PUT cap. +# +# What it asserts: +# 1. 300 MiB PUT to _vectors/ → 200 (cap raised to 4 GiB) +# 2. 300 MiB PUT to datasets/ → 413 (default 256 MiB cap) +# 3. Round-trip read: 300 MiB GET from _vectors/ +# sha-matches what we PUT (no truncation) +# +# Why this lives outside the main chain (just verify): it generates +# a 300 MiB payload, transfers it twice, takes ~5-10s on this box. +# Routine pre-push gating doesn't need that overhead — this smoke +# is opt-in via `just smoke-storaged-cap` after meaningful changes +# to storaged or vectord persistence. +# +# Usage: ./scripts/storaged_cap_smoke.sh + +set -euo pipefail +cd "$(dirname "$0")/.." + +export PATH="$PATH:/usr/local/go/bin" + +PAYLOAD_MIB=300 +VECTORS_KEY="_vectors/cap_smoke_$$.bin" +NONVECTORS_KEY="datasets/cap_smoke_$$.bin" + +echo "[cap-smoke] building storaged + gateway..." +go build -o bin/ ./cmd/storaged ./cmd/gateway + +pkill -f "bin/(storaged|gateway)" 2>/dev/null || true +sleep 0.3 + +PIDS=() +TMP="$(mktemp -d)" +PAYLOAD="$TMP/payload.bin" +ROUND_TRIP="$TMP/roundtrip.bin" + +cleanup() { + echo "[cap-smoke] cleanup" + # Best-effort delete of test keys (404 is fine). + curl -sS -m 5 -X DELETE "http://127.0.0.1:3110/v1/storage/delete/${VECTORS_KEY}" >/dev/null 2>&1 || true + curl -sS -m 5 -X DELETE "http://127.0.0.1:3110/v1/storage/delete/${NONVECTORS_KEY}" >/dev/null 2>&1 || true + for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done + rm -rf "$TMP" +} +trap cleanup EXIT INT TERM + +poll_health() { + local port="$1" deadline=$(($(date +%s) + 5)) + while [ "$(date +%s)" -lt "$deadline" ]; do + if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi + sleep 0.05 + done + return 1 +} + +echo "[cap-smoke] launching storaged → gateway..." +./bin/storaged > "$TMP/storaged.log" 2>&1 & +PIDS+=($!) +poll_health 3211 || { echo "storaged failed"; tail -10 "$TMP/storaged.log"; exit 1; } + +./bin/gateway > "$TMP/gateway.log" 2>&1 & +PIDS+=($!) +poll_health 3110 || { echo "gateway failed"; tail -10 "$TMP/gateway.log"; exit 1; } + +echo "[cap-smoke] generating ${PAYLOAD_MIB} MiB deterministic payload..." +# /dev/zero gives all-zero bytes — deterministic across runs and +# machines, sha is stable, no pipe (avoids SIGPIPE under pipefail). +dd if=/dev/zero of="$PAYLOAD" bs=1M count="$PAYLOAD_MIB" status=none +EXPECTED_SIZE=$(stat -c%s "$PAYLOAD") +EXPECTED_SHA=$(sha256sum "$PAYLOAD" | awk '{print $1}') +echo " size=$EXPECTED_SIZE sha=${EXPECTED_SHA:0:12}..." + +FAILED=0 + +# ── Test 1: PUT 300 MiB to _vectors/ → 200 (was 413 before ADR-002) ── +echo "[cap-smoke] Test 1: PUT ${PAYLOAD_MIB} MiB to _vectors/ (should pass)" +HTTP=$(curl -sS -X PUT \ + --upload-file "$PAYLOAD" \ + -H "Content-Type: application/octet-stream" \ + -o "$TMP/put_vectors.body" \ + -w "%{http_code}" \ + "http://127.0.0.1:3110/v1/storage/put/${VECTORS_KEY}") +if [ "$HTTP" = "200" ]; then + echo " ✓ PUT _vectors/ → 200" +else + echo " ✗ PUT _vectors/ → $HTTP (expected 200; ADR-002 cap fix may have regressed)" + cat "$TMP/put_vectors.body"; echo + FAILED=1 +fi + +# ── Test 2: PUT same payload to datasets/ → 413 (default cap) ── +echo "[cap-smoke] Test 2: PUT ${PAYLOAD_MIB} MiB to datasets/ (should reject)" +HTTP=$(curl -sS -X PUT \ + --upload-file "$PAYLOAD" \ + -H "Content-Type: application/octet-stream" \ + -o /dev/null \ + -w "%{http_code}" \ + "http://127.0.0.1:3110/v1/storage/put/${NONVECTORS_KEY}") +if [ "$HTTP" = "413" ]; then + echo " ✓ PUT datasets/ → 413 (default cap protects routine prefixes)" +else + echo " ✗ PUT datasets/ → $HTTP (expected 413; default cap may be too permissive)" + FAILED=1 +fi + +# ── Test 3: GET round-trip from _vectors/ — sha must match ── +echo "[cap-smoke] Test 3: GET _vectors/ — sha matches input" +curl -sS -o "$ROUND_TRIP" "http://127.0.0.1:3110/v1/storage/get/${VECTORS_KEY}" +ACTUAL_SHA=$(sha256sum "$ROUND_TRIP" | awk '{print $1}') +ACTUAL_SIZE=$(stat -c%s "$ROUND_TRIP") +if [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ] && [ "$ACTUAL_SIZE" = "$EXPECTED_SIZE" ]; then + echo " ✓ GET round-trip preserves bytes (size=$ACTUAL_SIZE sha=${ACTUAL_SHA:0:12})" +else + echo " ✗ GET round-trip mismatch" + echo " expected size=$EXPECTED_SIZE sha=${EXPECTED_SHA:0:12}" + echo " actual size=$ACTUAL_SIZE sha=${ACTUAL_SHA:0:12}" + FAILED=1 +fi + +if [ "$FAILED" = "0" ]; then + echo "[cap-smoke] ✓ Storaged cap smoke: PASSED" + exit 0 +else + echo "[cap-smoke] ✗ Storaged cap smoke: FAILED" + exit 1 +fi