diff --git a/cmd/chatd/main.go b/cmd/chatd/main.go index 85fe620..4311126 100644 --- a/cmd/chatd/main.go +++ b/cmd/chatd/main.go @@ -52,10 +52,10 @@ func main() { timeout := time.Duration(cfg.Chatd.TimeoutSecs) * time.Second registry := chat.BuildRegistry(chat.BuilderInput{ OllamaURL: cfg.Chatd.OllamaURL, - OllamaCloudKey: chat.ResolveKey(cfg.Chatd.OllamaCloudKeyEnv, cfg.Chatd.OllamaCloudKeyEnv, cfg.Chatd.OllamaCloudKeyFile), - OpenRouterKey: chat.ResolveKey(cfg.Chatd.OpenRouterKeyEnv, cfg.Chatd.OpenRouterKeyEnv, cfg.Chatd.OpenRouterKeyFile), - OpenCodeKey: chat.ResolveKey(cfg.Chatd.OpenCodeKeyEnv, cfg.Chatd.OpenCodeKeyEnv, cfg.Chatd.OpenCodeKeyFile), - KimiKey: chat.ResolveKey(cfg.Chatd.KimiKeyEnv, cfg.Chatd.KimiKeyEnv, cfg.Chatd.KimiKeyFile), + OllamaCloudKey: chat.ResolveKey(cfg.Chatd.OllamaCloudKeyEnv, cfg.Chatd.OllamaCloudKeyFile), + OpenRouterKey: chat.ResolveKey(cfg.Chatd.OpenRouterKeyEnv, cfg.Chatd.OpenRouterKeyFile), + OpenCodeKey: chat.ResolveKey(cfg.Chatd.OpenCodeKeyEnv, cfg.Chatd.OpenCodeKeyFile), + KimiKey: chat.ResolveKey(cfg.Chatd.KimiKeyEnv, cfg.Chatd.KimiKeyFile), Timeout: timeout, }) @@ -101,25 +101,16 @@ func (h *handlers) handleChat(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, resp) } +// handleProviders lists registered providers + per-provider Available() +// status. Phase 4 scrum fix B-2: looks up by name directly, skipping +// the prior synthetic-model-name route through Resolve. The endpoint +// reports registry membership, not routing rules — the latter is +// covered by /v1/chat itself. func (h *handlers) handleProviders(w http.ResponseWriter, _ *http.Request) { names := h.registry.Names() statuses := make(map[string]bool, len(names)) for _, n := range names { - // Look up the provider via Resolve (uses registry's prefix - // rules). Fake a request for the bare name to skip prefix - // stripping in Resolve — feed it the prefixed form. - var probe string - if n == "ollama" { - probe = "qwen3.5:latest" // bare-name default route - } else { - probe = n + "/probe" - } - p, err := h.registry.Resolve(probe) - if err != nil { - statuses[n] = false - continue - } - statuses[n] = p.Available() + statuses[n] = h.registry.Available(n) } writeJSON(w, http.StatusOK, map[string]any{ "providers": statuses, diff --git a/internal/chat/builder.go b/internal/chat/builder.go index 52e1259..fc6da53 100644 --- a/internal/chat/builder.go +++ b/internal/chat/builder.go @@ -59,21 +59,27 @@ func BuildRegistry(in BuilderInput) *Registry { // ResolveKey reads an API key with the priority chain: // 1. Explicit env var (named by envVar) -// 2. .env file at filePath (e.g. /etc/lakehouse/openrouter.env) -// with KEY=value lines; the first matching line wins. +// 2. .env file at envFilePath, looking for "=" line // 3. "" if neither set // // Mirrors the Rust adapter's resolve_*_key() pattern. Empty key // means the provider stays unregistered — operators see one fewer // entry in the boot log instead of a 503 at first request. -func ResolveKey(envVar, envFileName, envFilePath string) string { +// +// Phase 4 scrum fix B-1 (Opus + Kimi convergent): collapsed from +// 3-arg API to 2-arg. Previous version forced callers to pass the +// env var name twice (once for env-lookup, once for file-key-lookup), +// which let operator renames silently miss the file fallback. Now +// both lookups use the same canonical name. Operators who want a +// different file-key-name can use a different env var altogether. +func ResolveKey(envVar, envFilePath string) string { if envVar != "" { if v := strings.TrimSpace(os.Getenv(envVar)); v != "" { return v } } - if envFilePath != "" { - if v := readEnvFileVar(envFilePath, envFileName); v != "" { + if envFilePath != "" && envVar != "" { + if v := readEnvFileVar(envFilePath, envVar); v != "" { return v } } diff --git a/internal/chat/ollama.go b/internal/chat/ollama.go index d3899bc..d9b41a7 100644 --- a/internal/chat/ollama.go +++ b/internal/chat/ollama.go @@ -60,6 +60,15 @@ func (o *Ollama) Available() bool { func (o *Ollama) Chat(ctx context.Context, req Request) (*Response, error) { model := StripPrefix(req.Model, "ollama") + options := map[string]any{} + // Pointer-valued temperature so "not set" (nil) doesn't overwrite + // Ollama's default. Only forward when caller set it explicitly. + if req.Temperature != nil { + options["temperature"] = *req.Temperature + } + if req.MaxTokens > 0 { + options["num_predict"] = req.MaxTokens + } body := map[string]any{ "model": model, "messages": req.Messages, @@ -70,13 +79,8 @@ func (o *Ollama) Chat(ctx context.Context, req Request) (*Response, error) { // budgets get consumed by thinking before any content is // produced. Cloud tier (Ollama Cloud) inherits the same default // — see ollama_cloud.go. - "think": false, - "options": map[string]any{ - "temperature": req.Temperature, - }, - } - if req.MaxTokens > 0 { - body["options"].(map[string]any)["num_predict"] = req.MaxTokens + "think": false, + "options": options, } if req.Format == "json" { body["format"] = "json" @@ -108,9 +112,10 @@ func (o *Ollama) Chat(ctx context.Context, req Request) (*Response, error) { Message struct { Content string `json:"content"` } `json:"message"` - Done bool `json:"done"` - PromptEvalCount int `json:"prompt_eval_count"` - EvalCount int `json:"eval_count"` + Done bool `json:"done"` + DoneReason string `json:"done_reason"` // "stop", "length", ... + PromptEvalCount int `json:"prompt_eval_count"` + EvalCount int `json:"eval_count"` } if err := json.Unmarshal(rb, &ollamaResp); err != nil { return nil, fmt.Errorf("ollama decode: %w (body=%s)", err, abbrev(string(rb), 200)) @@ -121,11 +126,21 @@ func (o *Ollama) Chat(ctx context.Context, req Request) (*Response, error) { Content: ollamaResp.Message.Content, InputTokens: ollamaResp.PromptEvalCount, OutputTokens: ollamaResp.EvalCount, - FinishReason: finishReasonFromDone(ollamaResp.Done), + FinishReason: finishReasonFromOllama(ollamaResp.Done, ollamaResp.DoneReason), }, nil } -func finishReasonFromDone(done bool) string { +// finishReasonFromOllama prefers Ollama's done_reason when present +// (newer Ollama 0.4+ exposes this on /api/chat). Falls back to the +// done bool for older versions. Phase 4 scrum fix B-4 (Opus WARN): +// previous logic mapped `done==true` → "stop" unconditionally, which +// hid truncations that Ollama reports as `done=true, done_reason="length"`. +// The playbook_lift judge needs this signal to detect when max_tokens +// budget ran out before the answer completed. +func finishReasonFromOllama(done bool, doneReason string) string { + if doneReason != "" { + return doneReason + } if done { return "stop" } diff --git a/internal/chat/ollama_cloud.go b/internal/chat/ollama_cloud.go index 082a75b..ba655db 100644 --- a/internal/chat/ollama_cloud.go +++ b/internal/chat/ollama_cloud.go @@ -17,11 +17,13 @@ import ( // (crates/gateway/src/v1/ollama_cloud.rs) — system/prompt split // against /api/generate with cloud-friendly num_predict floor. // -// Model routing: both "cloud/" (prefix) and ":cloud" -// (suffix) route here. The registry handles the suffix case; this -// provider only sees the prefix form. Suffix models pass through -// the StripPrefix call unchanged (no leading "cloud/" to strip), -// which is what the upstream wants. +// Model routing: "ollama_cloud/" (prefix) and ":cloud" +// (suffix) both route here. Registry.Resolve handles the suffix case; +// the prefix case strips "ollama_cloud/" before the upstream call so +// ollama.com sees the bare model name. Suffix-form models pass through +// StripPrefix unchanged (no leading prefix to strip — exactly what the +// upstream wants since "kimi-k2.6:cloud" is the canonical name on +// ollama.com). type OllamaCloud struct { apiKey string baseURL string @@ -43,9 +45,11 @@ func (o *OllamaCloud) Name() string { return "ollama_cloud" } func (o *OllamaCloud) Available() bool { return o.apiKey != "" } func (o *OllamaCloud) Chat(ctx context.Context, req Request) (*Response, error) { - model := StripPrefix(req.Model, "cloud") - // ":cloud" suffix passes through unchanged — that's the - // canonical name on ollama.com. + // Strip "ollama_cloud/" prefix (Phase 4 scrum fix B-3 — Opus BLOCK). + // Pre-fix used "cloud" which never matched, so upstream got the + // prefixed model name and 400'd. ":cloud" suffix models pass + // through unchanged — that's the canonical name on ollama.com. + model := StripPrefix(req.Model, "ollama_cloud") system, prompt := flattenMessages(req.Messages) @@ -62,7 +66,11 @@ func (o *OllamaCloud) Chat(ctx context.Context, req Request) (*Response, error) // Cloud reasoning models need headroom — 400 floor matches // the Rust adapter's policy for kimi-k2.6 / gpt-oss:120b. "num_predict": maxInt(req.MaxTokens, 400), - "temperature": defaultFloat(req.Temperature, 0.3), + // Cloud-tier default 0.3 when caller didn't set; honor an + // explicit value otherwise. Anthropic 4.7 deprecation + // doesn't apply here (Ollama Cloud is its own server), + // so always send. + "temperature": tempPtrOr(req.Temperature, 0.3), }, } if system != "" { @@ -98,6 +106,7 @@ func (o *OllamaCloud) Chat(ctx context.Context, req Request) (*Response, error) Model string `json:"model"` Response string `json:"response"` Done bool `json:"done"` + DoneReason string `json:"done_reason"` PromptEvalCount int `json:"prompt_eval_count"` EvalCount int `json:"eval_count"` } @@ -110,7 +119,7 @@ func (o *OllamaCloud) Chat(ctx context.Context, req Request) (*Response, error) Content: cloudResp.Response, InputTokens: cloudResp.PromptEvalCount, OutputTokens: cloudResp.EvalCount, - FinishReason: finishReasonFromDone(cloudResp.Done), + FinishReason: finishReasonFromOllama(cloudResp.Done, cloudResp.DoneReason), }, nil } @@ -146,3 +155,10 @@ func defaultFloat(v, fallback float64) float64 { } return v } + +func tempPtrOr(v *float64, fallback float64) float64 { + if v == nil { + return fallback + } + return *v +} diff --git a/internal/chat/openai_compat.go b/internal/chat/openai_compat.go index 2bfbfe7..281fceb 100644 --- a/internal/chat/openai_compat.go +++ b/internal/chat/openai_compat.go @@ -49,10 +49,18 @@ func (c *openaiCompat) Chat(ctx context.Context, req Request) (*Response, error) model := StripPrefix(req.Model, c.prefix) body := map[string]any{ - "model": model, - "messages": req.Messages, - "stream": false, - "temperature": req.Temperature, + "model": model, + "messages": req.Messages, + "stream": false, + } + // Anthropic 4.7 (via OpenCode → Anthropic) rejects the temperature + // field entirely with "temperature is deprecated for this model". + // Send only when caller explicitly set it. Nil = use upstream + // default. Keep this true for ALL openai-compat providers — being + // permissive about field omission is better than maintaining a + // per-model deprecation table. + if req.Temperature != nil { + body["temperature"] = *req.Temperature } if req.MaxTokens > 0 { body["max_tokens"] = req.MaxTokens diff --git a/internal/chat/openai_compat_test.go b/internal/chat/openai_compat_test.go index a0e4ac8..d6ecafc 100644 --- a/internal/chat/openai_compat_test.go +++ b/internal/chat/openai_compat_test.go @@ -113,6 +113,102 @@ func TestOpenAICompat_EmptyKeyUnavailable(t *testing.T) { } } +// TestOllamaCloud_StripsCorrectPrefix locks in scrum fix B-3. +// Pre-fix: StripPrefix(req.Model, "cloud") never matched "ollama_cloud/" +// because the prefix string was "cloud/" not "ollama_cloud/". Result was +// the upstream got the prefixed model name and 400'd. This test would +// have caught it had it existed. +func TestOllamaCloud_StripsCorrectPrefix(t *testing.T) { + captured := "" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + bs, _ := io.ReadAll(r.Body) + captured = string(bs) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + _, _ = w.Write([]byte(`{"model":"deepseek-v3.2","response":"ok","done":true,"done_reason":"stop"}`)) + })) + t.Cleanup(srv.Close) + + // Inject the test server URL via reflection-free constructor — + // just build the struct directly with our overrides since + // NewOllamaCloud hardcodes ollama.com. + o := &OllamaCloud{ + apiKey: "test-key", + baseURL: srv.URL, + httpClient: srv.Client(), + } + + cases := []struct { + input string + expected string // model the upstream should see + }{ + // Prefix form (Phase 4 scrum fix B-3): strip "ollama_cloud/" + {"ollama_cloud/deepseek-v3.2", "deepseek-v3.2"}, + {"ollama_cloud/kimi-k2.6:cloud", "kimi-k2.6:cloud"}, + // Suffix form: nothing to strip (already canonical) + {"kimi-k2.6:cloud", "kimi-k2.6:cloud"}, + {"qwen3-coder:480b", "qwen3-coder:480b"}, + } + for _, c := range cases { + captured = "" + _, err := o.Chat(context.Background(), Request{ + Model: c.input, + Messages: []Message{{Role: "user", Content: "hi"}}, + }) + if err != nil { + t.Errorf("Chat(%q): %v", c.input, err) + continue + } + var sent map[string]any + if err := json.Unmarshal([]byte(captured), &sent); err != nil { + t.Fatalf("parse captured: %v", err) + } + got, _ := sent["model"].(string) + if got != c.expected { + t.Errorf("Chat(%q): upstream got model=%q, want %q", c.input, got, c.expected) + } + } +} + +func TestFinishReasonFromOllama_PrefersDoneReason(t *testing.T) { + // Phase 4 scrum fix B-4: done_reason takes precedence when present. + cases := []struct { + done bool + doneReason string + want string + }{ + {true, "stop", "stop"}, + {true, "length", "length"}, // truncation surfaced — pre-fix lost this + {true, "", "stop"}, // legacy fallback + {false, "", "length"}, + {false, "stop", "stop"}, // done_reason wins regardless of done + } + for _, c := range cases { + got := finishReasonFromOllama(c.done, c.doneReason) + if got != c.want { + t.Errorf("finishReasonFromOllama(%v, %q) = %q, want %q", c.done, c.doneReason, got, c.want) + } + } +} + +func TestRegistry_AvailableLooksUpDirectly(t *testing.T) { + // Phase 4 scrum fix B-2: Available() reports registry membership, + // not routing. Unregistered name → false; registered + Available() + // → true; registered but empty key → false. + registered := newFake("openrouter", true) + disabled := newFake("opencode", false) // "registered" but no key + r := NewRegistry(registered, disabled) + if !r.Available("openrouter") { + t.Errorf("openrouter should be Available") + } + if r.Available("opencode") { + t.Errorf("opencode has no key — Available should be false") + } + if r.Available("nonexistent") { + t.Errorf("nonexistent should be false") + } +} + func TestNewProviderConstructors(t *testing.T) { // Smoke: each public constructor produces a working provider with // the right name/prefix and respects empty-key=unavailable. diff --git a/internal/chat/registry.go b/internal/chat/registry.go index 481581f..34b1e77 100644 --- a/internal/chat/registry.go +++ b/internal/chat/registry.go @@ -3,6 +3,7 @@ package chat import ( "context" "fmt" + "sort" "strings" "time" ) @@ -53,16 +54,22 @@ func (r *Registry) Names() []string { for n := range r.providers { out = append(out, n) } - // Sorted manually to avoid pulling sort import for one call site; - // O(n²) is fine for n≤10. - for i := 1; i < len(out); i++ { - for j := i; j > 0 && out[j] < out[j-1]; j-- { - out[j], out[j-1] = out[j-1], out[j] - } - } + sort.Strings(out) return out } +// Available reports whether the named provider is registered AND +// its Available() method returns true. Returns false for unregistered +// names — used by /v1/chat/providers (Phase 4 scrum fix B-2: replaces +// the prior synthetic-route probe that mis-routed via Resolve). +func (r *Registry) Available(name string) bool { + p, ok := r.providers[name] + if !ok { + return false + } + return p.Available() +} + // Resolve returns the Provider for a model name. Resolution rules: // // 1. Empty model → ErrProviderNotFound diff --git a/internal/chat/types.go b/internal/chat/types.go index bbe73af..d3dc56f 100644 --- a/internal/chat/types.go +++ b/internal/chat/types.go @@ -60,15 +60,25 @@ type Message struct { // to their wire format (OpenAI Chat Completions, Ollama /api/chat, // Ollama Cloud /api/generate, etc.). Format="json" asks the provider // to return JSON-only output (constrained decoding when supported). +// +// Temperature is *float64 (pointer) so callers can distinguish "not +// set" (nil) from "set to 0" (deterministic). Anthropic 4.7 rejects +// the temperature field entirely, so providers omit it from the +// upstream call when this is nil. type Request struct { Model string `json:"model"` Messages []Message `json:"messages"` - Temperature float64 `json:"temperature"` + Temperature *float64 `json:"temperature,omitempty"` MaxTokens int `json:"max_tokens,omitempty"` Format string `json:"format,omitempty"` // "" or "json" Stream bool `json:"stream,omitempty"` // ignored in G0 — chatd is synchronous } +// Temp is a convenience for callers building a Request literal: +// +// chat.Request{Model: "...", Temperature: chat.Temp(0.0), ...} +func Temp(v float64) *float64 { return &v } + // Response is the provider-neutral chat response. LatencyMs is filled // by the dispatcher (Provider implementations don't track it). // Provider names the resolved provider for telemetry / debug. diff --git a/scripts/playbook_lift.sh b/scripts/playbook_lift.sh index 170a73a..aad26a8 100755 --- a/scripts/playbook_lift.sh +++ b/scripts/playbook_lift.sh @@ -134,9 +134,11 @@ echo "[lift] ingest candidates..." echo echo "[lift] running driver — judge=$EFFECTIVE_JUDGE · queries=$QUERIES_FILE · k=$K" -# Pass empty -judge so the Go driver runs its config resolution chain -# unless the operator explicitly set JUDGE_MODEL (in which case we -# pass it through, taking priority over config). +# -judge "$JUDGE_MODEL" passes either the explicit env override or +# the empty string. The Go driver treats empty -judge as "not set" +# and runs its own resolution chain (env → config → fallback). When +# JUDGE_MODEL IS set, the explicit -judge wins inside the Go driver +# regardless of what its env-lookup would find — flag wins by design. ./bin/playbook_lift \ -config "$CONFIG_PATH" \ -gateway "http://127.0.0.1:3110" \