golangLAKEHOUSE/internal/chat/registry_test.go
root 05273ac06b phase 4: chatd — multi-provider LLM dispatcher (ollama / cloud / openrouter / opencode / kimi)
new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.

routing rules (registry.Resolve):
  ollama/<m>          → local Ollama (prefix stripped)
  ollama_cloud/<m>    → Ollama Cloud
  <m>:cloud           → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
  openrouter/<v>/<m>  → OpenRouter (prefix stripped, OpenAI-compat)
  opencode/<m>        → OpenCode unified Zen+Go
  kimi/<m>            → Kimi For Coding (api.kimi.com/coding/v1)
  bare names          → local Ollama (default)

provider implementations:
- internal/chat/types.go      Provider interface, Request/Response, errors
- internal/chat/registry.go   prefix + :cloud suffix dispatch
- internal/chat/ollama.go     local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go  Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
                                 OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go    BuildRegistry from BuilderInput;
                              ResolveKey reads env then .env file fallback

config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
  per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults

tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
  flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices

think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.

acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400

just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 00:08:29 -05:00

169 lines
5.1 KiB
Go

package chat
import (
"context"
"errors"
"testing"
"time"
)
// fakeProvider is a minimal Provider for registry testing — no HTTP,
// just records what model name reached it.
type fakeProvider struct {
name string
available bool
got Request // last request
}
func (f *fakeProvider) Name() string { return f.name }
func (f *fakeProvider) Available() bool { return f.available }
func (f *fakeProvider) Chat(_ context.Context, req Request) (*Response, error) {
f.got = req
return &Response{Model: req.Model, Content: "ok from " + f.name}, nil
}
func newFake(name string, available bool) *fakeProvider {
return &fakeProvider{name: name, available: available}
}
func TestRegistry_ResolveByPrefix(t *testing.T) {
ollama := newFake("ollama", true)
openrouter := newFake("openrouter", true)
opencode := newFake("opencode", true)
kimi := newFake("kimi", true)
r := NewRegistry(ollama, openrouter, opencode, kimi)
cases := []struct {
model string
want string
}{
{"openrouter/anthropic/claude-opus-4-7", "openrouter"},
{"opencode/claude-opus-4-7", "opencode"},
{"kimi/kimi-for-coding", "kimi"},
{"ollama/qwen3.5:latest", "ollama"}, // explicit prefix
{"qwen3.5:latest", "ollama"}, // bare → default
{"unknown/foo/bar", "ollama"}, // unknown prefix → default
}
for _, c := range cases {
p, err := r.Resolve(c.model)
if err != nil {
t.Errorf("Resolve(%q): unexpected error: %v", c.model, err)
continue
}
if p.Name() != c.want {
t.Errorf("Resolve(%q) = %s, want %s", c.model, p.Name(), c.want)
}
}
}
func TestRegistry_ResolveCloudSuffix(t *testing.T) {
ollama := newFake("ollama", true)
cloud := newFake("ollama_cloud", true)
r := NewRegistry(ollama, cloud)
// :cloud suffix routes to ollama_cloud regardless of any prefix.
p, err := r.Resolve("kimi-k2.6:cloud")
if err != nil {
t.Fatalf("Resolve kimi-k2.6:cloud: %v", err)
}
if p.Name() != "ollama_cloud" {
t.Errorf("kimi-k2.6:cloud should route to ollama_cloud, got %s", p.Name())
}
// Without ollama_cloud registered, :cloud → ErrProviderNotFound
// (don't silently fall through to local).
rNoCloud := NewRegistry(ollama)
if _, err := rNoCloud.Resolve("kimi-k2.6:cloud"); !errors.Is(err, ErrProviderNotFound) {
t.Errorf("missing ollama_cloud should ErrProviderNotFound; got %v", err)
}
}
func TestRegistry_ResolveErrors(t *testing.T) {
r := NewRegistry()
// Empty model
if _, err := r.Resolve(""); !errors.Is(err, ErrProviderNotFound) {
t.Errorf("empty model should ErrProviderNotFound; got %v", err)
}
// No providers registered, any model → 404
if _, err := r.Resolve("openrouter/foo"); !errors.Is(err, ErrProviderNotFound) {
t.Errorf("unregistered openrouter should 404; got %v", err)
}
if _, err := r.Resolve("bare-model"); !errors.Is(err, ErrProviderNotFound) {
t.Errorf("bare with no default should 404; got %v", err)
}
}
func TestRegistry_ChatStampsTelemetry(t *testing.T) {
ollama := newFake("ollama", true)
r := NewRegistry(ollama)
resp, err := r.Chat(context.Background(), Request{Model: "qwen3.5:latest", Messages: []Message{{Role: "user", Content: "hi"}}})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if resp.Provider != "ollama" {
t.Errorf("Provider should be stamped to %q, got %q", "ollama", resp.Provider)
}
if resp.LatencyMs < 0 {
t.Errorf("LatencyMs negative: %d", resp.LatencyMs)
}
}
func TestRegistry_ChatProviderUnavailable(t *testing.T) {
openrouter := newFake("openrouter", false) // no key
r := NewRegistry(openrouter)
_, err := r.Chat(context.Background(), Request{Model: "openrouter/foo"})
if !errors.Is(err, ErrProviderDisabled) {
t.Errorf("unavailable provider should ErrProviderDisabled; got %v", err)
}
}
func TestStripPrefix(t *testing.T) {
cases := []struct {
model, prefix, want string
}{
{"openrouter/anthropic/claude", "openrouter", "anthropic/claude"},
{"opencode/claude-opus-4-7", "opencode", "claude-opus-4-7"},
{"qwen3.5:latest", "ollama", "qwen3.5:latest"}, // no prefix to strip
{"ollama/qwen3.5:latest", "ollama", "qwen3.5:latest"}, // explicit ollama prefix
{"kimi-k2.6:cloud", "cloud", "kimi-k2.6:cloud"}, // suffix doesn't trigger strip
}
for _, c := range cases {
if got := StripPrefix(c.model, c.prefix); got != c.want {
t.Errorf("StripPrefix(%q, %q) = %q, want %q", c.model, c.prefix, got, c.want)
}
}
}
func TestRegistry_Names(t *testing.T) {
r := NewRegistry(
newFake("zz", true),
newFake("aa", true),
newFake("mm", true),
)
names := r.Names()
if len(names) != 3 || names[0] != "aa" || names[1] != "mm" || names[2] != "zz" {
t.Errorf("Names() = %v, want sorted [aa mm zz]", names)
}
}
// Time-stamp sanity — the dispatcher should never produce LatencyMs
// in the past.
func TestRegistry_LatencyMonotonic(t *testing.T) {
ollama := newFake("ollama", true)
r := NewRegistry(ollama)
t0 := time.Now()
resp, err := r.Chat(context.Background(), Request{Model: "qwen3.5:latest"})
if err != nil {
t.Fatalf("Chat: %v", err)
}
elapsed := time.Since(t0).Milliseconds()
if resp.LatencyMs > elapsed+1 {
t.Errorf("LatencyMs %d > elapsed %d (impossible)", resp.LatencyMs, elapsed)
}
}