new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
169 lines
5.1 KiB
Go
169 lines
5.1 KiB
Go
package chat
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// fakeProvider is a minimal Provider for registry testing — no HTTP,
|
|
// just records what model name reached it.
|
|
type fakeProvider struct {
|
|
name string
|
|
available bool
|
|
got Request // last request
|
|
}
|
|
|
|
func (f *fakeProvider) Name() string { return f.name }
|
|
func (f *fakeProvider) Available() bool { return f.available }
|
|
func (f *fakeProvider) Chat(_ context.Context, req Request) (*Response, error) {
|
|
f.got = req
|
|
return &Response{Model: req.Model, Content: "ok from " + f.name}, nil
|
|
}
|
|
|
|
func newFake(name string, available bool) *fakeProvider {
|
|
return &fakeProvider{name: name, available: available}
|
|
}
|
|
|
|
func TestRegistry_ResolveByPrefix(t *testing.T) {
|
|
ollama := newFake("ollama", true)
|
|
openrouter := newFake("openrouter", true)
|
|
opencode := newFake("opencode", true)
|
|
kimi := newFake("kimi", true)
|
|
|
|
r := NewRegistry(ollama, openrouter, opencode, kimi)
|
|
|
|
cases := []struct {
|
|
model string
|
|
want string
|
|
}{
|
|
{"openrouter/anthropic/claude-opus-4-7", "openrouter"},
|
|
{"opencode/claude-opus-4-7", "opencode"},
|
|
{"kimi/kimi-for-coding", "kimi"},
|
|
{"ollama/qwen3.5:latest", "ollama"}, // explicit prefix
|
|
{"qwen3.5:latest", "ollama"}, // bare → default
|
|
{"unknown/foo/bar", "ollama"}, // unknown prefix → default
|
|
}
|
|
for _, c := range cases {
|
|
p, err := r.Resolve(c.model)
|
|
if err != nil {
|
|
t.Errorf("Resolve(%q): unexpected error: %v", c.model, err)
|
|
continue
|
|
}
|
|
if p.Name() != c.want {
|
|
t.Errorf("Resolve(%q) = %s, want %s", c.model, p.Name(), c.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestRegistry_ResolveCloudSuffix(t *testing.T) {
|
|
ollama := newFake("ollama", true)
|
|
cloud := newFake("ollama_cloud", true)
|
|
r := NewRegistry(ollama, cloud)
|
|
|
|
// :cloud suffix routes to ollama_cloud regardless of any prefix.
|
|
p, err := r.Resolve("kimi-k2.6:cloud")
|
|
if err != nil {
|
|
t.Fatalf("Resolve kimi-k2.6:cloud: %v", err)
|
|
}
|
|
if p.Name() != "ollama_cloud" {
|
|
t.Errorf("kimi-k2.6:cloud should route to ollama_cloud, got %s", p.Name())
|
|
}
|
|
|
|
// Without ollama_cloud registered, :cloud → ErrProviderNotFound
|
|
// (don't silently fall through to local).
|
|
rNoCloud := NewRegistry(ollama)
|
|
if _, err := rNoCloud.Resolve("kimi-k2.6:cloud"); !errors.Is(err, ErrProviderNotFound) {
|
|
t.Errorf("missing ollama_cloud should ErrProviderNotFound; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestRegistry_ResolveErrors(t *testing.T) {
|
|
r := NewRegistry()
|
|
|
|
// Empty model
|
|
if _, err := r.Resolve(""); !errors.Is(err, ErrProviderNotFound) {
|
|
t.Errorf("empty model should ErrProviderNotFound; got %v", err)
|
|
}
|
|
|
|
// No providers registered, any model → 404
|
|
if _, err := r.Resolve("openrouter/foo"); !errors.Is(err, ErrProviderNotFound) {
|
|
t.Errorf("unregistered openrouter should 404; got %v", err)
|
|
}
|
|
if _, err := r.Resolve("bare-model"); !errors.Is(err, ErrProviderNotFound) {
|
|
t.Errorf("bare with no default should 404; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestRegistry_ChatStampsTelemetry(t *testing.T) {
|
|
ollama := newFake("ollama", true)
|
|
r := NewRegistry(ollama)
|
|
|
|
resp, err := r.Chat(context.Background(), Request{Model: "qwen3.5:latest", Messages: []Message{{Role: "user", Content: "hi"}}})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
if resp.Provider != "ollama" {
|
|
t.Errorf("Provider should be stamped to %q, got %q", "ollama", resp.Provider)
|
|
}
|
|
if resp.LatencyMs < 0 {
|
|
t.Errorf("LatencyMs negative: %d", resp.LatencyMs)
|
|
}
|
|
}
|
|
|
|
func TestRegistry_ChatProviderUnavailable(t *testing.T) {
|
|
openrouter := newFake("openrouter", false) // no key
|
|
r := NewRegistry(openrouter)
|
|
|
|
_, err := r.Chat(context.Background(), Request{Model: "openrouter/foo"})
|
|
if !errors.Is(err, ErrProviderDisabled) {
|
|
t.Errorf("unavailable provider should ErrProviderDisabled; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestStripPrefix(t *testing.T) {
|
|
cases := []struct {
|
|
model, prefix, want string
|
|
}{
|
|
{"openrouter/anthropic/claude", "openrouter", "anthropic/claude"},
|
|
{"opencode/claude-opus-4-7", "opencode", "claude-opus-4-7"},
|
|
{"qwen3.5:latest", "ollama", "qwen3.5:latest"}, // no prefix to strip
|
|
{"ollama/qwen3.5:latest", "ollama", "qwen3.5:latest"}, // explicit ollama prefix
|
|
{"kimi-k2.6:cloud", "cloud", "kimi-k2.6:cloud"}, // suffix doesn't trigger strip
|
|
}
|
|
for _, c := range cases {
|
|
if got := StripPrefix(c.model, c.prefix); got != c.want {
|
|
t.Errorf("StripPrefix(%q, %q) = %q, want %q", c.model, c.prefix, got, c.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestRegistry_Names(t *testing.T) {
|
|
r := NewRegistry(
|
|
newFake("zz", true),
|
|
newFake("aa", true),
|
|
newFake("mm", true),
|
|
)
|
|
names := r.Names()
|
|
if len(names) != 3 || names[0] != "aa" || names[1] != "mm" || names[2] != "zz" {
|
|
t.Errorf("Names() = %v, want sorted [aa mm zz]", names)
|
|
}
|
|
}
|
|
|
|
// Time-stamp sanity — the dispatcher should never produce LatencyMs
|
|
// in the past.
|
|
func TestRegistry_LatencyMonotonic(t *testing.T) {
|
|
ollama := newFake("ollama", true)
|
|
r := NewRegistry(ollama)
|
|
t0 := time.Now()
|
|
resp, err := r.Chat(context.Background(), Request{Model: "qwen3.5:latest"})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
elapsed := time.Since(t0).Milliseconds()
|
|
if resp.LatencyMs > elapsed+1 {
|
|
t.Errorf("LatencyMs %d > elapsed %d (impossible)", resp.LatencyMs, elapsed)
|
|
}
|
|
}
|