new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
154 lines
4.9 KiB
Go
154 lines
4.9 KiB
Go
// chatd is the LLM chat dispatcher service (Phase 4 — small-model
|
|
// pipeline tier abstraction). Routes POST /chat to the right
|
|
// provider based on the model-name prefix or :cloud suffix:
|
|
//
|
|
// ollama/<m> → local Ollama (no auth)
|
|
// ollama_cloud/<m> → Ollama Cloud (Bearer auth)
|
|
// <m>:cloud → Ollama Cloud (suffix variant)
|
|
// openrouter/<v>/<m> → OpenRouter (Bearer auth)
|
|
// opencode/<m> → OpenCode unified Zen+Go (Bearer auth)
|
|
// kimi/<m> → Kimi For Coding (Bearer auth)
|
|
// bare names → local Ollama (default)
|
|
//
|
|
// Provider keys come from env vars (or /etc/lakehouse/<provider>.env
|
|
// fallback files). Providers with empty keys stay unregistered, so
|
|
// requests for them 404 cleanly instead of 503-ing at call time.
|
|
//
|
|
// Routes:
|
|
//
|
|
// POST /chat — dispatch a chat request to the resolved provider
|
|
// GET /providers — list registered providers (telemetry / health)
|
|
// GET /health — readiness (always 200 — sub-providers are
|
|
// independently checked via /providers)
|
|
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"flag"
|
|
"errors"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/chat"
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
|
|
)
|
|
|
|
const maxRequestBytes = 4 << 20 // 4 MiB cap on /chat bodies
|
|
|
|
func main() {
|
|
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
|
|
flag.Parse()
|
|
|
|
cfg, err := shared.LoadConfig(*configPath)
|
|
if err != nil {
|
|
slog.Error("config", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
timeout := time.Duration(cfg.Chatd.TimeoutSecs) * time.Second
|
|
registry := chat.BuildRegistry(chat.BuilderInput{
|
|
OllamaURL: cfg.Chatd.OllamaURL,
|
|
OllamaCloudKey: chat.ResolveKey(cfg.Chatd.OllamaCloudKeyEnv, cfg.Chatd.OllamaCloudKeyEnv, cfg.Chatd.OllamaCloudKeyFile),
|
|
OpenRouterKey: chat.ResolveKey(cfg.Chatd.OpenRouterKeyEnv, cfg.Chatd.OpenRouterKeyEnv, cfg.Chatd.OpenRouterKeyFile),
|
|
OpenCodeKey: chat.ResolveKey(cfg.Chatd.OpenCodeKeyEnv, cfg.Chatd.OpenCodeKeyEnv, cfg.Chatd.OpenCodeKeyFile),
|
|
KimiKey: chat.ResolveKey(cfg.Chatd.KimiKeyEnv, cfg.Chatd.KimiKeyEnv, cfg.Chatd.KimiKeyFile),
|
|
Timeout: timeout,
|
|
})
|
|
|
|
h := &handlers{registry: registry}
|
|
if err := shared.Run("chatd", cfg.Chatd.Bind, h.register, cfg.Auth); err != nil {
|
|
slog.Error("server", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
type handlers struct {
|
|
registry *chat.Registry
|
|
}
|
|
|
|
func (h *handlers) register(r chi.Router) {
|
|
// Routes mirror what the gateway proxies: /v1/chat → /chat (POST)
|
|
// and /v1/chat/providers → /chat/providers (GET). Keeping providers
|
|
// under /chat/ avoids a separate /providers root route that would
|
|
// need its own gateway proxy entry.
|
|
r.Post("/chat", h.handleChat)
|
|
r.Get("/chat/providers", h.handleProviders)
|
|
}
|
|
|
|
func (h *handlers) handleChat(w http.ResponseWriter, r *http.Request) {
|
|
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
|
|
defer r.Body.Close()
|
|
|
|
var req chat.Request
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "invalid JSON: "+err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
if req.Model == "" || len(req.Messages) == 0 {
|
|
http.Error(w, "model and messages are required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
resp, err := h.registry.Chat(r.Context(), req)
|
|
if err != nil {
|
|
writeChatError(w, err)
|
|
return
|
|
}
|
|
writeJSON(w, http.StatusOK, resp)
|
|
}
|
|
|
|
func (h *handlers) handleProviders(w http.ResponseWriter, _ *http.Request) {
|
|
names := h.registry.Names()
|
|
statuses := make(map[string]bool, len(names))
|
|
for _, n := range names {
|
|
// Look up the provider via Resolve (uses registry's prefix
|
|
// rules). Fake a request for the bare name to skip prefix
|
|
// stripping in Resolve — feed it the prefixed form.
|
|
var probe string
|
|
if n == "ollama" {
|
|
probe = "qwen3.5:latest" // bare-name default route
|
|
} else {
|
|
probe = n + "/probe"
|
|
}
|
|
p, err := h.registry.Resolve(probe)
|
|
if err != nil {
|
|
statuses[n] = false
|
|
continue
|
|
}
|
|
statuses[n] = p.Available()
|
|
}
|
|
writeJSON(w, http.StatusOK, map[string]any{
|
|
"providers": statuses,
|
|
})
|
|
}
|
|
|
|
// writeChatError maps chat sentinel errors to HTTP status codes.
|
|
// Unknown errors map to 500.
|
|
func writeChatError(w http.ResponseWriter, err error) {
|
|
switch {
|
|
case errors.Is(err, chat.ErrProviderNotFound):
|
|
http.Error(w, err.Error(), http.StatusNotFound)
|
|
case errors.Is(err, chat.ErrProviderDisabled):
|
|
http.Error(w, err.Error(), http.StatusServiceUnavailable)
|
|
case errors.Is(err, chat.ErrUpstream):
|
|
http.Error(w, err.Error(), http.StatusBadGateway)
|
|
case errors.Is(err, chat.ErrTimeout):
|
|
http.Error(w, err.Error(), http.StatusGatewayTimeout)
|
|
default:
|
|
slog.Error("chat", "err", err)
|
|
http.Error(w, "internal", http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
func writeJSON(w http.ResponseWriter, status int, body any) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(status)
|
|
if err := json.NewEncoder(w).Encode(body); err != nil {
|
|
slog.Error("encode", "err", err)
|
|
}
|
|
}
|