new cmd/chatd on :3220 routes /v1/chat to the right provider based
on model-name prefix or :cloud suffix. closes the architectural gap
named in lakehouse.toml [models]: tiers map to model IDs, but until
phase 4 there was no service that could actually CALL those models
from go.
routing rules (registry.Resolve):
ollama/<m> → local Ollama (prefix stripped)
ollama_cloud/<m> → Ollama Cloud
<m>:cloud → Ollama Cloud (suffix variant — kimi-k2.6:cloud)
openrouter/<v>/<m> → OpenRouter (prefix stripped, OpenAI-compat)
opencode/<m> → OpenCode unified Zen+Go
kimi/<m> → Kimi For Coding (api.kimi.com/coding/v1)
bare names → local Ollama (default)
provider implementations:
- internal/chat/types.go Provider interface, Request/Response, errors
- internal/chat/registry.go prefix + :cloud suffix dispatch
- internal/chat/ollama.go local Ollama via /api/chat (think=false default)
- internal/chat/ollama_cloud.go Ollama Cloud via /api/generate (Bearer auth)
- internal/chat/openai_compat.go shared OpenAI Chat Completions for the
OpenRouter/OpenCode/Kimi family
- internal/chat/builder.go BuildRegistry from BuilderInput;
ResolveKey reads env then .env file fallback
config:
- ChatdConfig in internal/shared/config.go with bind, ollama_url,
per-provider key env names + .env fallback paths, timeout
- Gateway gains chatd_url + /v1/chat + /v1/chat/* routes
- lakehouse.toml [chatd] block with /etc/lakehouse/<provider>.env defaults
tests (19 in internal/chat):
- registry: prefix + :cloud + errors + telemetry + provider listing
- ollama: happy path + prefix strip + format=json + 500 mapping +
flatten_messages
- openai_compat: happy path + format=json + 429 mapping + zero-choices
think=false default in ollama + ollama_cloud — local hot path skips
reasoning, low-budget callers (the playbook_lift judge at max_tokens=10)
get direct answers instead of empty content + done_reason=length.
proven via chatd_smoke acceptance.
acceptance gate: scripts/chatd_smoke.sh — 6/6 PASS:
1. /v1/chat/providers lists exactly registered providers (1 in dev mode)
2. bare model → ollama default with content + token counts + latency
3. explicit ollama/<m> → prefix stripped at upstream
4. <m>:cloud without ollama_cloud registered → 404 (no silent fall-through)
5. unknown/<m> → falls through to default → upstream 502 (no prefix rewrite)
6. missing model field → 400
just verify: PASS (vet + 30 packages × short tests + 9 smokes).
chatd_smoke is a domain smoke (not in just verify, mirrors matrix /
observer / pathway pattern).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
142 lines
4.5 KiB
Go
142 lines
4.5 KiB
Go
package chat
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// openaiCompat implements the OpenAI Chat Completions wire format,
|
|
// which OpenRouter / OpenCode / Kimi all speak. Differences between
|
|
// those three providers are limited to:
|
|
// - base URL (e.g. /v1/chat/completions vs /zen/v1/chat/completions)
|
|
// - prefix stripped from req.Model
|
|
// - bearer token source (different env vars)
|
|
//
|
|
// Each concrete provider wraps this with its own constructor pinning
|
|
// those values.
|
|
type openaiCompat struct {
|
|
name string
|
|
baseURL string
|
|
apiKey string
|
|
prefix string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
func newOpenAICompat(name, baseURL, apiKey, prefix string, timeout time.Duration) *openaiCompat {
|
|
if timeout == 0 {
|
|
timeout = 180 * time.Second
|
|
}
|
|
return &openaiCompat{
|
|
name: name,
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
apiKey: apiKey,
|
|
prefix: prefix,
|
|
httpClient: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
func (c *openaiCompat) Name() string { return c.name }
|
|
func (c *openaiCompat) Available() bool { return c.apiKey != "" }
|
|
|
|
func (c *openaiCompat) Chat(ctx context.Context, req Request) (*Response, error) {
|
|
model := StripPrefix(req.Model, c.prefix)
|
|
|
|
body := map[string]any{
|
|
"model": model,
|
|
"messages": req.Messages,
|
|
"stream": false,
|
|
"temperature": req.Temperature,
|
|
}
|
|
if req.MaxTokens > 0 {
|
|
body["max_tokens"] = req.MaxTokens
|
|
}
|
|
if req.Format == "json" {
|
|
// OpenAI-compat response_format. OpenRouter passes through;
|
|
// OpenCode + Kimi accept it. Worst case the upstream ignores it.
|
|
body["response_format"] = map[string]any{"type": "json_object"}
|
|
}
|
|
|
|
bs, _ := json.Marshal(body)
|
|
url := c.baseURL + "/chat/completions"
|
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bs))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/json")
|
|
httpReq.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
// HTTP-Referer + X-Title are OpenRouter-specific but harmless on
|
|
// other providers — they pass them through to upstream attribution.
|
|
httpReq.Header.Set("HTTP-Referer", "https://golanglakehouse.local")
|
|
httpReq.Header.Set("X-Title", "Lakehouse-Go")
|
|
|
|
resp, err := c.httpClient.Do(httpReq)
|
|
if err != nil {
|
|
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
|
return nil, fmt.Errorf("%w: %s", ErrTimeout, c.name)
|
|
}
|
|
return nil, fmt.Errorf("%s: %w", c.name, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
rb, _ := io.ReadAll(resp.Body)
|
|
if resp.StatusCode/100 != 2 {
|
|
return nil, fmt.Errorf("%w: %s %d: %s", ErrUpstream, c.name, resp.StatusCode, abbrev(string(rb), 300))
|
|
}
|
|
|
|
var oaResp struct {
|
|
Model string `json:"model"`
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
FinishReason string `json:"finish_reason"`
|
|
} `json:"choices"`
|
|
Usage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
} `json:"usage"`
|
|
}
|
|
if err := json.Unmarshal(rb, &oaResp); err != nil {
|
|
return nil, fmt.Errorf("%s decode: %w (body=%s)", c.name, err, abbrev(string(rb), 200))
|
|
}
|
|
if len(oaResp.Choices) == 0 {
|
|
return nil, fmt.Errorf("%w: %s returned 0 choices: %s", ErrUpstream, c.name, abbrev(string(rb), 200))
|
|
}
|
|
|
|
return &Response{
|
|
Model: model,
|
|
Content: oaResp.Choices[0].Message.Content,
|
|
InputTokens: oaResp.Usage.PromptTokens,
|
|
OutputTokens: oaResp.Usage.CompletionTokens,
|
|
FinishReason: oaResp.Choices[0].FinishReason,
|
|
}, nil
|
|
}
|
|
|
|
// NewOpenRouter returns a provider for openrouter.ai. apiKey
|
|
// resolved at construction (typically from OPENROUTER_API_KEY env or
|
|
// /etc/lakehouse/openrouter.env).
|
|
func NewOpenRouter(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("openrouter", "https://openrouter.ai/api/v1", apiKey, "openrouter", timeout)
|
|
}
|
|
|
|
// NewOpenCode returns a provider for OpenCode's unified Zen+Go endpoint.
|
|
// One key reaches Anthropic Opus, GPT-5, Gemini 3.1, Kimi K2.6,
|
|
// DeepSeek, GLM, Qwen, plus 4 free-tier models.
|
|
func NewOpenCode(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("opencode", "https://opencode.ai/zen/v1", apiKey, "opencode", timeout)
|
|
}
|
|
|
|
// NewKimi returns a provider for the direct Kimi For Coding endpoint.
|
|
// `api.kimi.com` is a separate account system from api.moonshot.ai —
|
|
// keys are NOT interchangeable.
|
|
func NewKimi(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("kimi", "https://api.kimi.com/coding/v1", apiKey, "kimi", timeout)
|
|
}
|