new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
147 lines
4.9 KiB
Go
147 lines
4.9 KiB
Go
package chat
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Registry resolves a model name to its Provider. Lookup is by the
|
|
// first slash-delimited prefix; bare names (no slash) fall through to
|
|
// the configured default provider — typically `ollama` so local
|
|
// model names like `qwen3.5:latest` work without a prefix.
|
|
//
|
|
// Mirrors the Rust gateway's resolve_provider() pattern from
|
|
// crates/gateway/src/v1/mod.rs.
|
|
type Registry struct {
|
|
providers map[string]Provider // name → provider
|
|
defaultName string // resolved when no prefix matches
|
|
}
|
|
|
|
// NewRegistry builds a registry from a list of providers. The first
|
|
// "ollama" provider becomes the bare-name default; callers can
|
|
// override via SetDefault.
|
|
func NewRegistry(providers ...Provider) *Registry {
|
|
r := &Registry{providers: make(map[string]Provider, len(providers))}
|
|
for _, p := range providers {
|
|
r.providers[p.Name()] = p
|
|
if r.defaultName == "" && p.Name() == "ollama" {
|
|
r.defaultName = "ollama"
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// Register adds or replaces a provider. Used after construction (e.g.
|
|
// for tests injecting fakes).
|
|
func (r *Registry) Register(p Provider) {
|
|
r.providers[p.Name()] = p
|
|
}
|
|
|
|
// SetDefault sets the provider used when no prefix matches. Empty
|
|
// model names always 404 — the default only kicks in for unprefixed
|
|
// non-empty names.
|
|
func (r *Registry) SetDefault(name string) {
|
|
r.defaultName = name
|
|
}
|
|
|
|
// Names returns the registered provider names, sorted (deterministic
|
|
// output for /v1/chat/providers listing).
|
|
func (r *Registry) Names() []string {
|
|
out := make([]string, 0, len(r.providers))
|
|
for n := range r.providers {
|
|
out = append(out, n)
|
|
}
|
|
// Sorted manually to avoid pulling sort import for one call site;
|
|
// O(n²) is fine for n≤10.
|
|
for i := 1; i < len(out); i++ {
|
|
for j := i; j > 0 && out[j] < out[j-1]; j-- {
|
|
out[j], out[j-1] = out[j-1], out[j]
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// Resolve returns the Provider for a model name. Resolution rules:
|
|
//
|
|
// 1. Empty model → ErrProviderNotFound
|
|
// 2. Suffix ":cloud" → ollama_cloud (e.g. "kimi-k2.6:cloud")
|
|
// 3. Prefix match (e.g. "openrouter/...") → that provider
|
|
// 4. No prefix or unknown prefix → default provider (typically ollama)
|
|
// 5. No default registered → ErrProviderNotFound
|
|
//
|
|
// The suffix rule mirrors the Rust gateway and the Ollama Cloud
|
|
// upstream's own naming convention — kimi-k2.6:cloud, qwen3-coder:480b
|
|
// (when on cloud) etc. Without it, every cloud model would need a
|
|
// "cloud/" prefix in lakehouse.toml, which clashes with the Ollama
|
|
// upstream that wants the bare suffix-named model.
|
|
func (r *Registry) Resolve(model string) (Provider, error) {
|
|
if model == "" {
|
|
return nil, fmt.Errorf("%w: empty model name", ErrProviderNotFound)
|
|
}
|
|
// Suffix detection — `:cloud` always means Ollama Cloud.
|
|
if strings.HasSuffix(model, ":cloud") {
|
|
if p, ok := r.providers["ollama_cloud"]; ok {
|
|
return p, nil
|
|
}
|
|
// :cloud suffix with no ollama_cloud provider → 404. Don't
|
|
// silently fall through to local Ollama; that would burn the
|
|
// model name on a provider that doesn't have it.
|
|
return nil, fmt.Errorf("%w: %q has :cloud suffix but ollama_cloud provider is not registered", ErrProviderNotFound, model)
|
|
}
|
|
// Prefix match: "openrouter/anthropic/claude-opus-4-7" splits on
|
|
// first "/". Multi-segment provider names not supported (none
|
|
// shipped use them).
|
|
if idx := strings.Index(model, "/"); idx > 0 {
|
|
prefix := model[:idx]
|
|
if p, ok := r.providers[prefix]; ok {
|
|
return p, nil
|
|
}
|
|
// Unknown prefix — falls through to default. Lets bare model
|
|
// names with slashes (e.g. "anthropic/claude-3.5") still hit
|
|
// ollama if that's how the operator named local models.
|
|
}
|
|
if r.defaultName == "" {
|
|
return nil, fmt.Errorf("%w: %q", ErrProviderNotFound, model)
|
|
}
|
|
p, ok := r.providers[r.defaultName]
|
|
if !ok {
|
|
return nil, fmt.Errorf("%w: default provider %q not registered", ErrProviderNotFound, r.defaultName)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Chat is the dispatcher entry point: resolve provider, dispatch,
|
|
// stamp telemetry on the response. Returns ErrProviderDisabled when
|
|
// the resolved provider isn't Available() (caller should map to 503).
|
|
func (r *Registry) Chat(ctx context.Context, req Request) (*Response, error) {
|
|
p, err := r.Resolve(req.Model)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !p.Available() {
|
|
return nil, fmt.Errorf("%w: %s", ErrProviderDisabled, p.Name())
|
|
}
|
|
t0 := time.Now()
|
|
resp, err := p.Chat(ctx, req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
resp.LatencyMs = time.Since(t0).Milliseconds()
|
|
resp.Provider = p.Name()
|
|
return resp, nil
|
|
}
|
|
|
|
// StripPrefix removes the leading "<provider>/" from model when
|
|
// present. Helpers for upstream calls — providers that need the bare
|
|
// model name (e.g. OpenRouter sees "anthropic/claude-opus-4-7", not
|
|
// "openrouter/anthropic/claude-opus-4-7") use this.
|
|
func StripPrefix(model, prefix string) string {
|
|
want := prefix + "/"
|
|
if strings.HasPrefix(model, want) {
|
|
return model[len(want):]
|
|
}
|
|
return model
|
|
}
|