#!/usr/bin/env bash # Production substrate smoke — single command that verifies every # production-critical surface end-to-end. Exits non-zero on the first # failure so an operator can run this before: # - Swapping workers_500k.parquet → real Chicago contractor data # - Spinning up the Asterisk voice agent against /v1/chat # - Running staffing inference loops via /v1/iterate # - Wiring the assistant against the gateway # # Usage: # ./scripts/production_smoke.sh # # Tunable via env: # GATEWAY=http://localhost:3100 # gateway base URL # FAIL_FAST=1 # exit on first failure (default 1) # VERBOSE=1 # print full responses on success too set -e GATEWAY="${GATEWAY:-http://localhost:3100}" FAIL_FAST="${FAIL_FAST:-1}" VERBOSE="${VERBOSE:-0}" PASS=0 FAIL=0 FAILURES=() check() { local name="$1" local expected_status="$2" local cmd="$3" echo -n " [$(($PASS + $FAIL + 1))] $name ... " local resp resp=$(eval "$cmd" 2>&1) || true local status="${resp%%|||*}" local body="${resp#*|||}" if [ "$status" = "$expected_status" ]; then PASS=$((PASS + 1)) echo "✓ ($status)" if [ "$VERBOSE" = "1" ]; then echo " $body" | head -3 | sed 's/^/ /'; fi else FAIL=$((FAIL + 1)) FAILURES+=("$name: expected $expected_status, got $status") echo "✗ (got $status, expected $expected_status)" echo " $body" | head -3 | sed 's/^/ /' [ "$FAIL_FAST" = "1" ] && { print_summary; exit 1; } fi } curl_with_status() { # Run curl, capture HTTP status + body, format as "status|||body" local args=("$@") curl -sS -w "\n%{http_code}" "${args[@]}" 2>&1 | awk ' { lines[NR]=$0 } END { status=lines[NR] body="" for (i=1; i&1) || HEALTH_RESP="{}" WORKERS_COUNT=$(echo "$HEALTH_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('workers_count',0))" 2>/dev/null || echo 0) PROVIDERS_OK=$(echo "$HEALTH_RESP" | python3 -c "import sys,json; d=json.load(sys.stdin).get('providers_configured',{}); print(sum(1 for v in d.values() if v))" 2>/dev/null || echo 0) echo " workers_count: $WORKERS_COUNT" echo " providers_configured (count): $PROVIDERS_OK" if [ "$WORKERS_COUNT" -lt 1 ]; then FAIL=$((FAIL + 1)) FAILURES+=("workers_count=0 — parquet load failed or empty") echo " ✗ workers not loaded" [ "$FAIL_FAST" = "1" ] && { print_summary; exit 1; } else PASS=$((PASS + 1)) echo " ✓ workers loaded" fi # ─── 3. Truth Layer ────────────────────────────────────────────────── echo "▶ Truth Layer" check "/v1/context returns rules" "200" \ 'curl_with_status -m 10 "$GATEWAY/v1/context"' # ─── 4. /v1/chat (provider=ollama) ────────────────────────────────── echo "▶ /v1/chat (provider=ollama, fast model)" check "/v1/chat ping" "200" \ 'curl_with_status -m 60 -X POST "$GATEWAY/v1/chat" \ -H "content-type: application/json" \ -d "{\"provider\":\"ollama\",\"model\":\"qwen3.5:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"reply: PONG\"}],\"max_tokens\":30,\"temperature\":0,\"think\":false}"' # ─── 5. /v1/validate (negative + positive) ────────────────────────── echo "▶ /v1/validate" check "phantom candidate_id → 422 Consistency" "422" \ 'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \ -H "content-type: application/json" \ -d "{\"kind\":\"fill\",\"artifact\":{\"fills\":[{\"candidate_id\":\"W-FAKE-0\",\"name\":\"Fake\"}]},\"context\":{\"target_count\":1}}"' check "real worker (W-1) → 200 OK" "200" \ 'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \ -H "content-type: application/json" \ -d "{\"kind\":\"fill\",\"artifact\":{\"fills\":[{\"candidate_id\":\"W-1\",\"name\":\"Anyone\"}]},\"context\":{\"target_count\":1}}"' check "SSN in body → 422 Policy" "422" \ 'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \ -H "content-type: application/json" \ -d "{\"kind\":\"email\",\"artifact\":{\"to\":\"a@b.com\",\"body\":\"Your SSN 123-45-6789 is on file.\"}}"' # ─── 6. /v1/iterate (bounded retry loop) ─────────────────────────── # Phantom worker → expect 422 IterateFailure with history (not 200) echo "▶ /v1/iterate (bounded retry)" check "/v1/iterate phantom → bounded fail" "422" \ 'curl_with_status -m 240 -X POST "$GATEWAY/v1/iterate" \ -H "content-type: application/json" \ -d "{\"kind\":\"fill\",\"provider\":\"ollama\",\"model\":\"qwen3.5:latest\",\"system\":\"Reply with ONLY: {\\\"fills\\\":[{\\\"candidate_id\\\":\\\"W-99999999\\\",\\\"name\\\":\\\"X\\\"}]}\",\"prompt\":\"emit it\",\"context\":{\"target_count\":1},\"max_iterations\":1,\"max_tokens\":200,\"temperature\":0}"' # ─── 7. Doc-drift batch ───────────────────────────────────────────── echo "▶ Doc-drift scan" check "/vectors/playbook_memory/doc_drift/scan" "200" \ 'curl_with_status -m 60 -X POST "$GATEWAY/vectors/playbook_memory/doc_drift/scan"' # ─── 8. Usage tracking ────────────────────────────────────────────── echo "▶ Usage tracking" USAGE=$(curl -sS -m 10 "$GATEWAY/v1/usage" 2>&1) USAGE_REQS=$(echo "$USAGE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('requests',0))" 2>/dev/null || echo 0) echo " usage.requests: $USAGE_REQS (should be > 0 if /v1/chat fired)" if [ "$USAGE_REQS" -ge 1 ]; then PASS=$((PASS + 1)) echo " ✓ /v1/usage tracking" else FAIL=$((FAIL + 1)) FAILURES+=("/v1/usage didn't increment after /v1/chat call") echo " ✗ /v1/usage didn't increment" fi print_summary [ $FAIL -eq 0 ] && exit 0 || exit 1