root 05273ac06b phase 4: chatd — multi-provider LLM dispatcher (ollama / cloud / openrouter / opencode / kimi)
new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.

routing rules (registry.Resolve):
  ollama/<m>          → local Ollama (prefix stripped)
  ollama_cloud/<m>    → Ollama Cloud
  <m>:cloud           → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
  openrouter/<v>/<m>  → OpenRouter (prefix stripped, OpenAI-compat)
  opencode/<m>        → OpenCode unified Zen+Go
  kimi/<m>            → Kimi For Coding (api.kimi.com/coding/v1)
  bare names          → local Ollama (default)

provider implementations:
- internal/chat/types.go      Provider interface, Request/Response, errors
- internal/chat/registry.go   prefix + :cloud suffix dispatch
- internal/chat/ollama.go     local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go  Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
                                 OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go    BuildRegistry from BuilderInput;
                              ResolveKey reads env then .env file fallback

config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
  per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults

tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
  flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices

think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.

acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400

just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 00:08:29 -05:00

129 lines
3.7 KiB
Go

package chat
import (
"context"
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
// ollamaServer mocks /api/chat + /api/tags. Captures last request body.
func ollamaServer(t *testing.T, status int, respBody string) (*httptest.Server, *string) {
t.Helper()
captured := ""
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/api/tags":
w.WriteHeader(200)
_, _ = w.Write([]byte(`{"models":[{"name":"qwen3.5:latest"}]}`))
return
case "/api/chat":
bs, _ := io.ReadAll(r.Body)
captured = string(bs)
w.WriteHeader(status)
_, _ = w.Write([]byte(respBody))
return
default:
w.WriteHeader(404)
}
}))
t.Cleanup(srv.Close)
return srv, &captured
}
func TestOllama_ChatHappyPath(t *testing.T) {
resp := `{
"model": "qwen3.5:latest",
"message": {"content": "ok"},
"done": true,
"prompt_eval_count": 7,
"eval_count": 3
}`
srv, captured := ollamaServer(t, 200, resp)
o := NewOllama(srv.URL, 5*time.Second)
out, err := o.Chat(context.Background(), Request{
Model: "qwen3.5:latest",
Messages: []Message{{Role: "user", Content: "hi"}},
})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if out.Content != "ok" {
t.Errorf("Content = %q, want ok", out.Content)
}
if out.InputTokens != 7 || out.OutputTokens != 3 {
t.Errorf("tokens = (%d, %d), want (7, 3)", out.InputTokens, out.OutputTokens)
}
// Verify captured request shape: stream=false, options.temperature
// surfaced.
var sent map[string]any
if err := json.Unmarshal([]byte(*captured), &sent); err != nil {
t.Fatalf("parse captured: %v", err)
}
if sent["stream"] != false {
t.Errorf("stream should be false, got %v", sent["stream"])
}
}
func TestOllama_StripsExplicitPrefix(t *testing.T) {
srv, captured := ollamaServer(t, 200, `{"message":{"content":""},"done":true}`)
o := NewOllama(srv.URL, 5*time.Second)
_, err := o.Chat(context.Background(), Request{Model: "ollama/qwen3.5:latest"})
if err != nil {
t.Fatalf("Chat: %v", err)
}
var sent map[string]any
_ = json.Unmarshal([]byte(*captured), &sent)
if sent["model"] != "qwen3.5:latest" {
t.Errorf("upstream model = %v, want qwen3.5:latest (prefix stripped)", sent["model"])
}
}
func TestOllama_FormatJSON(t *testing.T) {
srv, captured := ollamaServer(t, 200, `{"message":{"content":"{}"},"done":true}`)
o := NewOllama(srv.URL, 5*time.Second)
_, _ = o.Chat(context.Background(), Request{Model: "qwen3.5:latest", Format: "json"})
if !strings.Contains(*captured, `"format":"json"`) {
t.Errorf("Format=json should set top-level format=json; captured=%s", *captured)
}
}
func TestOllama_Available(t *testing.T) {
srv, _ := ollamaServer(t, 200, "{}")
o := NewOllama(srv.URL, 5*time.Second)
if !o.Available() {
t.Errorf("server is up; Available should be true")
}
}
func TestOllama_UpstreamError(t *testing.T) {
srv, _ := ollamaServer(t, 500, `{"error":"out of memory"}`)
o := NewOllama(srv.URL, 5*time.Second)
_, err := o.Chat(context.Background(), Request{Model: "qwen3.5:latest"})
if !errors.Is(err, ErrUpstream) {
t.Errorf("500 should ErrUpstream; got %v", err)
}
}
func TestOllamaCloud_FlattenMessages(t *testing.T) {
system, prompt := flattenMessages([]Message{
{Role: "system", Content: "You are helpful."},
{Role: "user", Content: "hi"},
{Role: "assistant", Content: "hello!"},
{Role: "user", Content: "how are you?"},
})
if system != "You are helpful." {
t.Errorf("system = %q, want 'You are helpful.'", system)
}
if !strings.Contains(prompt, "User: hi") || !strings.Contains(prompt, "Assistant: hello!") || !strings.Contains(prompt, "User: how are you?") {
t.Errorf("prompt missing role tags: %q", prompt)
}
}