new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
132 lines
4.5 KiB
Go
132 lines
4.5 KiB
Go
package chat
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// openaiServer returns an httptest server that mocks an OpenAI Chat
|
|
// Completions endpoint. Handler captures the last request body for
|
|
// assertion.
|
|
func openaiServer(t *testing.T, status int, respBody string) (*httptest.Server, *string) {
|
|
t.Helper()
|
|
captured := ""
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
bs, _ := io.ReadAll(r.Body)
|
|
captured = string(bs)
|
|
// Ensure Authorization header was set (Bearer prefix).
|
|
if auth := r.Header.Get("Authorization"); !strings.HasPrefix(auth, "Bearer ") {
|
|
t.Errorf("missing Bearer auth header; got %q", auth)
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(status)
|
|
_, _ = w.Write([]byte(respBody))
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
return srv, &captured
|
|
}
|
|
|
|
func TestOpenAICompat_HappyPath(t *testing.T) {
|
|
resp := `{
|
|
"model": "anthropic/claude-opus-4-7",
|
|
"choices": [{"message":{"content":"hello world"}, "finish_reason":"stop"}],
|
|
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
|
|
}`
|
|
srv, captured := openaiServer(t, 200, resp)
|
|
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
out, err := p.Chat(context.Background(), Request{
|
|
Model: "openrouter/anthropic/claude-opus-4-7",
|
|
Messages: []Message{{Role: "user", Content: "hi"}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
if out.Content != "hello world" {
|
|
t.Errorf("Content = %q, want hello world", out.Content)
|
|
}
|
|
// Provider strips its prefix before sending to upstream.
|
|
var sent map[string]any
|
|
if err := json.Unmarshal([]byte(*captured), &sent); err != nil {
|
|
t.Fatalf("parse captured: %v", err)
|
|
}
|
|
if sent["model"] != "anthropic/claude-opus-4-7" {
|
|
t.Errorf("upstream got model = %v, want anthropic/claude-opus-4-7 (prefix stripped)", sent["model"])
|
|
}
|
|
// Token accounting carried through.
|
|
if out.InputTokens != 10 || out.OutputTokens != 5 {
|
|
t.Errorf("tokens = (%d, %d), want (10, 5)", out.InputTokens, out.OutputTokens)
|
|
}
|
|
if out.FinishReason != "stop" {
|
|
t.Errorf("FinishReason = %q, want stop", out.FinishReason)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_FormatJSON(t *testing.T) {
|
|
resp := `{"choices":[{"message":{"content":"{\"a\":1}"},"finish_reason":"stop"}],"usage":{}}`
|
|
srv, captured := openaiServer(t, 200, resp)
|
|
p := newOpenAICompat("opencode", srv.URL+"/zen/v1", "test-key", "opencode", 5*time.Second)
|
|
|
|
_, err := p.Chat(context.Background(), Request{
|
|
Model: "opencode/claude-opus-4-7",
|
|
Messages: []Message{{Role: "user", Content: "give me JSON"}},
|
|
Format: "json",
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
// Format=json should set response_format on the upstream call.
|
|
if !strings.Contains(*captured, `"response_format"`) || !strings.Contains(*captured, `"json_object"`) {
|
|
t.Errorf("Format=json should set response_format json_object; captured=%s", *captured)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_UpstreamError(t *testing.T) {
|
|
srv, _ := openaiServer(t, 429, `{"error":"rate limited"}`)
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
_, err := p.Chat(context.Background(), Request{Model: "openrouter/x"})
|
|
if !errors.Is(err, ErrUpstream) {
|
|
t.Errorf("429 should be ErrUpstream; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_ZeroChoices(t *testing.T) {
|
|
srv, _ := openaiServer(t, 200, `{"choices":[],"usage":{}}`)
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
_, err := p.Chat(context.Background(), Request{Model: "openrouter/x"})
|
|
if !errors.Is(err, ErrUpstream) {
|
|
t.Errorf("zero-choices should ErrUpstream; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_EmptyKeyUnavailable(t *testing.T) {
|
|
p := newOpenAICompat("openrouter", "https://example.com", "", "openrouter", 5*time.Second)
|
|
if p.Available() {
|
|
t.Errorf("empty key should make provider unavailable")
|
|
}
|
|
}
|
|
|
|
func TestNewProviderConstructors(t *testing.T) {
|
|
// Smoke: each public constructor produces a working provider with
|
|
// the right name/prefix and respects empty-key=unavailable.
|
|
or := NewOpenRouter("test-key", 0)
|
|
if or.Name() != "openrouter" || !or.Available() {
|
|
t.Errorf("openrouter constructor wrong: %s avail=%v", or.Name(), or.Available())
|
|
}
|
|
oc := NewOpenCode("test-key", 0)
|
|
if oc.Name() != "opencode" || !oc.Available() {
|
|
t.Errorf("opencode constructor wrong: %s avail=%v", oc.Name(), oc.Available())
|
|
}
|
|
km := NewKimi("", 0) // empty key → unavailable
|
|
if km.Available() {
|
|
t.Errorf("kimi with empty key should be unavailable")
|
|
}
|
|
}
|