lakehouse/scripts/production_smoke.sh
root 41b0a99ed2 chore: add real content that was sitting untracked
Surfaced by today's untracked-files audit. None of these are accidents —
multiple are referenced by name in CLAUDE.md and memory files but were
never added.

Categories:
- docs/PHASE_AUDIT_GUIDE.md (106 LOC) — Claude Code phase audit guidance
- ops/systemd/lakehouse-langfuse-bridge.service — Langfuse bridge unit
- package.json — top-level npm manifest
- scripts/e2e_pipeline_check.sh + production_smoke.sh — real test scripts
- reports/kimi/audit-last-week*.md — the "Two reports live" CLAUDE.md cites
- tests/multi-agent/scenarios/ — 44 staffing scenarios (cutover decision A)
- tests/multi-agent/playbooks/ — 102 playbook records
- tests/battery/, tests/agent_test/PRD.md, tests/real-world/* — real tests
- sidecar/sidecar/{lab_ui,pipeline_lab}.py — 888 LOC dev-only UIs that
  remain in service post-sidecar-drop (commit ba928b1 explicitly kept them)

Sensitivity check: scenarios use synthetic company names ("Heritage Foods",
"Cornerstone Fabrication"); audit reports describe code findings only;
no PII or secrets surfaced.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 22:22:10 -05:00

158 lines
7.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Production substrate smoke — single command that verifies every
# production-critical surface end-to-end. Exits non-zero on the first
# failure so an operator can run this before:
# - Swapping workers_500k.parquet → real Chicago contractor data
# - Spinning up the Asterisk voice agent against /v1/chat
# - Running staffing inference loops via /v1/iterate
# - Wiring the assistant against the gateway
#
# Usage:
# ./scripts/production_smoke.sh
#
# Tunable via env:
# GATEWAY=http://localhost:3100 # gateway base URL
# FAIL_FAST=1 # exit on first failure (default 1)
# VERBOSE=1 # print full responses on success too
set -e
GATEWAY="${GATEWAY:-http://localhost:3100}"
FAIL_FAST="${FAIL_FAST:-1}"
VERBOSE="${VERBOSE:-0}"
PASS=0
FAIL=0
FAILURES=()
check() {
local name="$1"
local expected_status="$2"
local cmd="$3"
echo -n " [$(($PASS + $FAIL + 1))] $name ... "
local resp
resp=$(eval "$cmd" 2>&1) || true
local status="${resp%%|||*}"
local body="${resp#*|||}"
if [ "$status" = "$expected_status" ]; then
PASS=$((PASS + 1))
echo "✓ ($status)"
if [ "$VERBOSE" = "1" ]; then echo " $body" | head -3 | sed 's/^/ /'; fi
else
FAIL=$((FAIL + 1))
FAILURES+=("$name: expected $expected_status, got $status")
echo "✗ (got $status, expected $expected_status)"
echo " $body" | head -3 | sed 's/^/ /'
[ "$FAIL_FAST" = "1" ] && { print_summary; exit 1; }
fi
}
curl_with_status() {
# Run curl, capture HTTP status + body, format as "status|||body"
local args=("$@")
curl -sS -w "\n%{http_code}" "${args[@]}" 2>&1 | awk '
{ lines[NR]=$0 }
END {
status=lines[NR]
body=""
for (i=1; i<NR; i++) body=body lines[i] (i<NR-1?"\n":"")
print status "|||" body
}
'
}
print_summary() {
echo ""
echo "═══════════════════════════════════════════════════════════════"
echo " $PASS passed · $FAIL failed"
if [ ${#FAILURES[@]} -gt 0 ]; then
echo " failures:"
for f in "${FAILURES[@]}"; do echo " - $f"; done
fi
echo "═══════════════════════════════════════════════════════════════"
}
echo "Production substrate smoke test against $GATEWAY"
echo ""
# ─── 1. Liveness ─────────────────────────────────────────────────────
echo "▶ Liveness"
check "gateway /health" "200" \
'curl_with_status -m 5 "$GATEWAY/health"'
# ─── 2. Operational health ──────────────────────────────────────────
echo "▶ Operational state"
HEALTH_RESP=$(curl -sS -m 10 "$GATEWAY/v1/health" 2>&1) || HEALTH_RESP="{}"
WORKERS_COUNT=$(echo "$HEALTH_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('workers_count',0))" 2>/dev/null || echo 0)
PROVIDERS_OK=$(echo "$HEALTH_RESP" | python3 -c "import sys,json; d=json.load(sys.stdin).get('providers_configured',{}); print(sum(1 for v in d.values() if v))" 2>/dev/null || echo 0)
echo " workers_count: $WORKERS_COUNT"
echo " providers_configured (count): $PROVIDERS_OK"
if [ "$WORKERS_COUNT" -lt 1 ]; then
FAIL=$((FAIL + 1))
FAILURES+=("workers_count=0 — parquet load failed or empty")
echo " ✗ workers not loaded"
[ "$FAIL_FAST" = "1" ] && { print_summary; exit 1; }
else
PASS=$((PASS + 1))
echo " ✓ workers loaded"
fi
# ─── 3. Truth Layer ──────────────────────────────────────────────────
echo "▶ Truth Layer"
check "/v1/context returns rules" "200" \
'curl_with_status -m 10 "$GATEWAY/v1/context"'
# ─── 4. /v1/chat (provider=ollama) ──────────────────────────────────
echo "▶ /v1/chat (provider=ollama, fast model)"
check "/v1/chat ping" "200" \
'curl_with_status -m 60 -X POST "$GATEWAY/v1/chat" \
-H "content-type: application/json" \
-d "{\"provider\":\"ollama\",\"model\":\"qwen3.5:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"reply: PONG\"}],\"max_tokens\":30,\"temperature\":0,\"think\":false}"'
# ─── 5. /v1/validate (negative + positive) ──────────────────────────
echo "▶ /v1/validate"
check "phantom candidate_id → 422 Consistency" "422" \
'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \
-H "content-type: application/json" \
-d "{\"kind\":\"fill\",\"artifact\":{\"fills\":[{\"candidate_id\":\"W-FAKE-0\",\"name\":\"Fake\"}]},\"context\":{\"target_count\":1}}"'
check "real worker (W-1) → 200 OK" "200" \
'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \
-H "content-type: application/json" \
-d "{\"kind\":\"fill\",\"artifact\":{\"fills\":[{\"candidate_id\":\"W-1\",\"name\":\"Anyone\"}]},\"context\":{\"target_count\":1}}"'
check "SSN in body → 422 Policy" "422" \
'curl_with_status -m 10 -X POST "$GATEWAY/v1/validate" \
-H "content-type: application/json" \
-d "{\"kind\":\"email\",\"artifact\":{\"to\":\"a@b.com\",\"body\":\"Your SSN 123-45-6789 is on file.\"}}"'
# ─── 6. /v1/iterate (bounded retry loop) ───────────────────────────
# Phantom worker → expect 422 IterateFailure with history (not 200)
echo "▶ /v1/iterate (bounded retry)"
check "/v1/iterate phantom → bounded fail" "422" \
'curl_with_status -m 240 -X POST "$GATEWAY/v1/iterate" \
-H "content-type: application/json" \
-d "{\"kind\":\"fill\",\"provider\":\"ollama\",\"model\":\"qwen3.5:latest\",\"system\":\"Reply with ONLY: {\\\"fills\\\":[{\\\"candidate_id\\\":\\\"W-99999999\\\",\\\"name\\\":\\\"X\\\"}]}\",\"prompt\":\"emit it\",\"context\":{\"target_count\":1},\"max_iterations\":1,\"max_tokens\":200,\"temperature\":0}"'
# ─── 7. Doc-drift batch ─────────────────────────────────────────────
echo "▶ Doc-drift scan"
check "/vectors/playbook_memory/doc_drift/scan" "200" \
'curl_with_status -m 60 -X POST "$GATEWAY/vectors/playbook_memory/doc_drift/scan"'
# ─── 8. Usage tracking ──────────────────────────────────────────────
echo "▶ Usage tracking"
USAGE=$(curl -sS -m 10 "$GATEWAY/v1/usage" 2>&1)
USAGE_REQS=$(echo "$USAGE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('requests',0))" 2>/dev/null || echo 0)
echo " usage.requests: $USAGE_REQS (should be > 0 if /v1/chat fired)"
if [ "$USAGE_REQS" -ge 1 ]; then
PASS=$((PASS + 1))
echo " ✓ /v1/usage tracking"
else
FAIL=$((FAIL + 1))
FAILURES+=("/v1/usage didn't increment after /v1/chat call")
echo " ✗ /v1/usage didn't increment"
fi
print_summary
[ $FAIL -eq 0 ] && exit 0 || exit 1