new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
176 lines
7.0 KiB
Bash
Executable File
176 lines
7.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# chatd smoke — Phase 4 acceptance gate.
|
|
#
|
|
# Validates:
|
|
# - chatd boots and reports providers via GET /v1/chat/providers
|
|
# - bare model name routes to local Ollama (default provider)
|
|
# - explicit "ollama/<m>" prefix also routes to local Ollama
|
|
# - :cloud suffix without ollama_cloud key registered → 404
|
|
# - unknown/foo prefix falls through to Ollama default
|
|
# - POST /v1/chat returns provider-stamped response with token counts
|
|
#
|
|
# Requires: Ollama on :11434 with qwen3.5:latest (or any chat-capable
|
|
# model — override via SMOKE_MODEL env). Skips (exit 0) if Ollama is
|
|
# absent so this can run on CI boxes without local Ollama.
|
|
#
|
|
# Usage: ./scripts/chatd_smoke.sh
|
|
|
|
set -euo pipefail
|
|
cd "$(dirname "$0")/.."
|
|
|
|
export PATH="$PATH:/usr/local/go/bin"
|
|
|
|
SMOKE_MODEL="${SMOKE_MODEL:-qwen3.5:latest}"
|
|
|
|
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
|
|
echo "[chatd-smoke] Ollama not reachable on :11434 — skipping"
|
|
exit 0
|
|
fi
|
|
|
|
if ! curl -sS http://localhost:11434/api/tags | jq -e --arg m "$SMOKE_MODEL" \
|
|
'.models[] | select(.name == $m)' >/dev/null 2>&1; then
|
|
echo "[chatd-smoke] $SMOKE_MODEL not loaded in Ollama — skipping"
|
|
exit 0
|
|
fi
|
|
|
|
echo "[chatd-smoke] building chatd + gateway..."
|
|
go build -o bin/ ./cmd/chatd ./cmd/gateway
|
|
|
|
pkill -f "bin/(chatd|gateway)" 2>/dev/null || true
|
|
sleep 0.3
|
|
|
|
PIDS=()
|
|
TMP="$(mktemp -d)"
|
|
CFG="$TMP/chatd.toml"
|
|
|
|
cleanup() {
|
|
echo "[chatd-smoke] cleanup"
|
|
for p in "${PIDS[@]:-}"; do [ -n "${p:-}" ] && kill "$p" 2>/dev/null || true; done
|
|
rm -rf "$TMP"
|
|
}
|
|
trap cleanup EXIT INT TERM
|
|
|
|
# Test config: chatd talks only to local Ollama (no cloud keys).
|
|
# This proves the bare-name + prefix-routing + :cloud-without-cloud
|
|
# behaviors without needing API keys in CI.
|
|
cat > "$CFG" <<EOF
|
|
[gateway]
|
|
bind = "127.0.0.1:3110"
|
|
storaged_url = "http://127.0.0.1:3211"
|
|
catalogd_url = "http://127.0.0.1:3212"
|
|
ingestd_url = "http://127.0.0.1:3213"
|
|
queryd_url = "http://127.0.0.1:3214"
|
|
vectord_url = "http://127.0.0.1:3215"
|
|
embedd_url = "http://127.0.0.1:3216"
|
|
pathwayd_url = "http://127.0.0.1:3217"
|
|
matrixd_url = "http://127.0.0.1:3218"
|
|
observerd_url = "http://127.0.0.1:3219"
|
|
chatd_url = "http://127.0.0.1:3220"
|
|
|
|
[chatd]
|
|
bind = "127.0.0.1:3220"
|
|
ollama_url = "http://localhost:11434"
|
|
# Empty *_key_env values disable each provider — only ollama registers.
|
|
ollama_cloud_key_env = "_DISABLED_NEVER_SET_OLLAMA_CLOUD"
|
|
openrouter_key_env = "_DISABLED_NEVER_SET_OPENROUTER"
|
|
opencode_key_env = "_DISABLED_NEVER_SET_OPENCODE"
|
|
kimi_key_env = "_DISABLED_NEVER_SET_KIMI"
|
|
ollama_cloud_key_file = ""
|
|
openrouter_key_file = ""
|
|
opencode_key_file = ""
|
|
kimi_key_file = ""
|
|
timeout_secs = 60
|
|
EOF
|
|
|
|
poll_health() {
|
|
local port="$1" deadline=$(($(date +%s) + 5))
|
|
while [ "$(date +%s)" -lt "$deadline" ]; do
|
|
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
|
|
sleep 0.05
|
|
done
|
|
return 1
|
|
}
|
|
|
|
echo "[chatd-smoke] launching chatd → gateway..."
|
|
./bin/chatd -config "$CFG" > /tmp/chatd.log 2>&1 & PIDS+=($!)
|
|
poll_health 3220 || { echo "chatd failed"; tail /tmp/chatd.log; exit 1; }
|
|
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
|
|
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
|
|
|
|
# 1. providers listing — only ollama registered when other keys absent
|
|
echo "[chatd-smoke] /v1/chat/providers — only ollama registered:"
|
|
RESP="$(curl -sS http://127.0.0.1:3110/v1/chat/providers)"
|
|
PROVIDERS_COUNT="$(echo "$RESP" | jq -r '.providers | length')"
|
|
OLLAMA_AVAIL="$(echo "$RESP" | jq -r '.providers.ollama')"
|
|
if [ "$PROVIDERS_COUNT" != "1" ] || [ "$OLLAMA_AVAIL" != "true" ]; then
|
|
echo " ✗ wanted only ollama=true; got $RESP"
|
|
exit 1
|
|
fi
|
|
echo " ✓ exactly 1 provider (ollama, available=true)"
|
|
|
|
# 2. bare model name → ollama default
|
|
echo "[chatd-smoke] POST /v1/chat with bare model name:"
|
|
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/chat \
|
|
-H 'Content-Type: application/json' \
|
|
-d "{\"model\":\"$SMOKE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"reply with the word ok and nothing else\"}],\"max_tokens\":10}")"
|
|
PROV="$(echo "$RESP" | jq -r '.provider')"
|
|
CONTENT="$(echo "$RESP" | jq -r '.content')"
|
|
LATENCY="$(echo "$RESP" | jq -r '.latency_ms')"
|
|
if [ "$PROV" != "ollama" ] || [ -z "$CONTENT" ] || [ "$LATENCY" -lt 0 ] 2>/dev/null; then
|
|
echo " ✗ expected provider=ollama, non-empty content, positive latency; got $RESP"
|
|
exit 1
|
|
fi
|
|
echo " ✓ provider=ollama, latency=${LATENCY}ms, content=$(echo "$CONTENT" | head -c 60 | tr -d '\n')…"
|
|
|
|
# 3. explicit ollama/ prefix (prefix stripped before upstream call)
|
|
echo "[chatd-smoke] POST /v1/chat with explicit ollama/ prefix:"
|
|
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/chat \
|
|
-H 'Content-Type: application/json' \
|
|
-d "{\"model\":\"ollama/$SMOKE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"reply ok\"}],\"max_tokens\":5}")"
|
|
PROV="$(echo "$RESP" | jq -r '.provider')"
|
|
MODEL="$(echo "$RESP" | jq -r '.model')"
|
|
if [ "$PROV" != "ollama" ] || [ "$MODEL" != "$SMOKE_MODEL" ]; then
|
|
echo " ✗ expected provider=ollama, model=$SMOKE_MODEL (prefix stripped); got prov=$PROV model=$MODEL"
|
|
exit 1
|
|
fi
|
|
echo " ✓ ollama/$SMOKE_MODEL → provider=ollama, model=$SMOKE_MODEL (prefix stripped)"
|
|
|
|
# 4. :cloud suffix without ollama_cloud registered → 404
|
|
echo "[chatd-smoke] POST /v1/chat with :cloud suffix (no cloud provider):"
|
|
STATUS="$(curl -sS -o /tmp/cloud404.json -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/chat \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"model":"kimi-k2.6:cloud","messages":[{"role":"user","content":"hi"}]}')"
|
|
if [ "$STATUS" != "404" ]; then
|
|
echo " ✗ expected 404; got $STATUS body=$(cat /tmp/cloud404.json)"
|
|
exit 1
|
|
fi
|
|
echo " ✓ kimi-k2.6:cloud → 404 (ollama_cloud not registered, no silent fall-through to local)"
|
|
|
|
# 5. unknown prefix falls through to ollama default; upstream 502s
|
|
# because the ollama provider doesn't strip unknown prefixes
|
|
# (it would silently rewrite operator-supplied names). Expected
|
|
# behavior: route to default provider, let upstream reject the
|
|
# literal model name.
|
|
echo "[chatd-smoke] POST /v1/chat with unknown/ prefix (falls through, upstream 502s):"
|
|
STATUS="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/chat \
|
|
-H 'Content-Type: application/json' \
|
|
-d "{\"model\":\"unknown/$SMOKE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}],\"max_tokens\":5}")"
|
|
if [ "$STATUS" != "502" ]; then
|
|
echo " ✗ expected 502 (upstream rejects literal 'unknown/...'); got $STATUS"
|
|
exit 1
|
|
fi
|
|
echo " ✓ unknown/<model> → ollama default → upstream 502 (no silent prefix-strip)"
|
|
|
|
# 6. missing model field → 400
|
|
echo "[chatd-smoke] POST /v1/chat with missing model field:"
|
|
STATUS="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/chat \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"messages":[{"role":"user","content":"hi"}]}')"
|
|
if [ "$STATUS" != "400" ]; then
|
|
echo " ✗ expected 400 for missing model; got $STATUS"
|
|
exit 1
|
|
fi
|
|
echo " ✓ missing model → 400"
|
|
|
|
echo "[chatd-smoke] chatd acceptance gate: PASSED (6/6)"
|