3-lineage scrum (Opus 4.7 / Kimi K2.6 / Qwen3-coder) on today's wave landed 4 real findings (2 BLOCK + 2 WARN) and 2 INFO touch-ups. Verbatim verdicts + disposition table at: reports/scrum/_evidence/2026-04-30/ B-1 (BLOCK Opus + INFO Kimi convergent) — ResolveKey API: collapse from 3-arg (envVar, envFileName, envFilePath) to 2-arg (envVar, envFilePath). Pre-fix every chatd caller passed the env var name twice; if operator renamed *_key_env in lakehouse.toml while keeping the canonical KEY= line in the .env file, fallback silently missed. B-2 (WARN Opus + WARN Kimi convergent) — handleProviders probe: drop the synthesize-then-Resolve probe; look up by name directly via Registry.Available(name). Prior probe synthesized "<name>/probe" model strings and routed through Resolve, fragile to any future routing rule (e.g. cloud-suffix special case). B-3 (BLOCK Opus single — verified by trace + end-to-end probe) — OllamaCloud.Chat StripPrefix used "cloud" but registry routes "ollama_cloud/<m>". Result: upstream got the prefixed model name and 400'd. Smoke missed it because chatd_smoke runs without ollama_cloud registered. Now strips the right prefix; new TestOllamaCloud_StripsCorrectPrefix locks both prefix + suffix cases. Verified live: ollama_cloud/deepseek-v3.2 round-trips cleanly through the real ollama.com endpoint. B-4 (WARN Opus single) — Ollama finishReason: read done_reason field instead of inferring from done bool alone. Newer Ollama reports done=true with done_reason="length" on truncation; the prior code mapped that to "stop" and lost the truncation signal the playbook_lift judge needs to retry. New TestFinishReasonFromOllama_PrefersDoneReason covers the fallback ladder. INFOs: - B-5: replace hand-rolled insertion sort in Registry.Names with sort.Strings (Opus called the "avoid sort import" comment a false economy — correct). - A-1: clarify the playbook_lift.sh comment around -judge "" arg passing (Opus noted the comment said "env priority" but didn't reflect that the empty arg also passes through the Go driver's resolution chain). False positives dismissed (3, documented in disposition.md): - Kimi: TestMaybeDowngrade_WithConfigList wrong assertion (test IS correct per design — model excluded from weak list = strong = downgrade) - Qwen: nil-deref claim (defensive code already handles nil) - Opus: qwen3.5:latest doesn't exist on Ollama hub (true on the public hub but local install has it) just verify: PASS. chatd_smoke 6/6 PASS. New regression tests: 3 (B-2, B-3, B-4 each get a focused test). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
228 lines
7.7 KiB
Go
228 lines
7.7 KiB
Go
package chat
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// openaiServer returns an httptest server that mocks an OpenAI Chat
|
|
// Completions endpoint. Handler captures the last request body for
|
|
// assertion.
|
|
func openaiServer(t *testing.T, status int, respBody string) (*httptest.Server, *string) {
|
|
t.Helper()
|
|
captured := ""
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
bs, _ := io.ReadAll(r.Body)
|
|
captured = string(bs)
|
|
// Ensure Authorization header was set (Bearer prefix).
|
|
if auth := r.Header.Get("Authorization"); !strings.HasPrefix(auth, "Bearer ") {
|
|
t.Errorf("missing Bearer auth header; got %q", auth)
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(status)
|
|
_, _ = w.Write([]byte(respBody))
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
return srv, &captured
|
|
}
|
|
|
|
func TestOpenAICompat_HappyPath(t *testing.T) {
|
|
resp := `{
|
|
"model": "anthropic/claude-opus-4-7",
|
|
"choices": [{"message":{"content":"hello world"}, "finish_reason":"stop"}],
|
|
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
|
|
}`
|
|
srv, captured := openaiServer(t, 200, resp)
|
|
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
out, err := p.Chat(context.Background(), Request{
|
|
Model: "openrouter/anthropic/claude-opus-4-7",
|
|
Messages: []Message{{Role: "user", Content: "hi"}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
if out.Content != "hello world" {
|
|
t.Errorf("Content = %q, want hello world", out.Content)
|
|
}
|
|
// Provider strips its prefix before sending to upstream.
|
|
var sent map[string]any
|
|
if err := json.Unmarshal([]byte(*captured), &sent); err != nil {
|
|
t.Fatalf("parse captured: %v", err)
|
|
}
|
|
if sent["model"] != "anthropic/claude-opus-4-7" {
|
|
t.Errorf("upstream got model = %v, want anthropic/claude-opus-4-7 (prefix stripped)", sent["model"])
|
|
}
|
|
// Token accounting carried through.
|
|
if out.InputTokens != 10 || out.OutputTokens != 5 {
|
|
t.Errorf("tokens = (%d, %d), want (10, 5)", out.InputTokens, out.OutputTokens)
|
|
}
|
|
if out.FinishReason != "stop" {
|
|
t.Errorf("FinishReason = %q, want stop", out.FinishReason)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_FormatJSON(t *testing.T) {
|
|
resp := `{"choices":[{"message":{"content":"{\"a\":1}"},"finish_reason":"stop"}],"usage":{}}`
|
|
srv, captured := openaiServer(t, 200, resp)
|
|
p := newOpenAICompat("opencode", srv.URL+"/zen/v1", "test-key", "opencode", 5*time.Second)
|
|
|
|
_, err := p.Chat(context.Background(), Request{
|
|
Model: "opencode/claude-opus-4-7",
|
|
Messages: []Message{{Role: "user", Content: "give me JSON"}},
|
|
Format: "json",
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Chat: %v", err)
|
|
}
|
|
// Format=json should set response_format on the upstream call.
|
|
if !strings.Contains(*captured, `"response_format"`) || !strings.Contains(*captured, `"json_object"`) {
|
|
t.Errorf("Format=json should set response_format json_object; captured=%s", *captured)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_UpstreamError(t *testing.T) {
|
|
srv, _ := openaiServer(t, 429, `{"error":"rate limited"}`)
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
_, err := p.Chat(context.Background(), Request{Model: "openrouter/x"})
|
|
if !errors.Is(err, ErrUpstream) {
|
|
t.Errorf("429 should be ErrUpstream; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_ZeroChoices(t *testing.T) {
|
|
srv, _ := openaiServer(t, 200, `{"choices":[],"usage":{}}`)
|
|
p := newOpenAICompat("openrouter", srv.URL+"/v1", "test-key", "openrouter", 5*time.Second)
|
|
_, err := p.Chat(context.Background(), Request{Model: "openrouter/x"})
|
|
if !errors.Is(err, ErrUpstream) {
|
|
t.Errorf("zero-choices should ErrUpstream; got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestOpenAICompat_EmptyKeyUnavailable(t *testing.T) {
|
|
p := newOpenAICompat("openrouter", "https://example.com", "", "openrouter", 5*time.Second)
|
|
if p.Available() {
|
|
t.Errorf("empty key should make provider unavailable")
|
|
}
|
|
}
|
|
|
|
// TestOllamaCloud_StripsCorrectPrefix locks in scrum fix B-3.
|
|
// Pre-fix: StripPrefix(req.Model, "cloud") never matched "ollama_cloud/<m>"
|
|
// because the prefix string was "cloud/" not "ollama_cloud/". Result was
|
|
// the upstream got the prefixed model name and 400'd. This test would
|
|
// have caught it had it existed.
|
|
func TestOllamaCloud_StripsCorrectPrefix(t *testing.T) {
|
|
captured := ""
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
bs, _ := io.ReadAll(r.Body)
|
|
captured = string(bs)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(200)
|
|
_, _ = w.Write([]byte(`{"model":"deepseek-v3.2","response":"ok","done":true,"done_reason":"stop"}`))
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
|
|
// Inject the test server URL via reflection-free constructor —
|
|
// just build the struct directly with our overrides since
|
|
// NewOllamaCloud hardcodes ollama.com.
|
|
o := &OllamaCloud{
|
|
apiKey: "test-key",
|
|
baseURL: srv.URL,
|
|
httpClient: srv.Client(),
|
|
}
|
|
|
|
cases := []struct {
|
|
input string
|
|
expected string // model the upstream should see
|
|
}{
|
|
// Prefix form (Phase 4 scrum fix B-3): strip "ollama_cloud/"
|
|
{"ollama_cloud/deepseek-v3.2", "deepseek-v3.2"},
|
|
{"ollama_cloud/kimi-k2.6:cloud", "kimi-k2.6:cloud"},
|
|
// Suffix form: nothing to strip (already canonical)
|
|
{"kimi-k2.6:cloud", "kimi-k2.6:cloud"},
|
|
{"qwen3-coder:480b", "qwen3-coder:480b"},
|
|
}
|
|
for _, c := range cases {
|
|
captured = ""
|
|
_, err := o.Chat(context.Background(), Request{
|
|
Model: c.input,
|
|
Messages: []Message{{Role: "user", Content: "hi"}},
|
|
})
|
|
if err != nil {
|
|
t.Errorf("Chat(%q): %v", c.input, err)
|
|
continue
|
|
}
|
|
var sent map[string]any
|
|
if err := json.Unmarshal([]byte(captured), &sent); err != nil {
|
|
t.Fatalf("parse captured: %v", err)
|
|
}
|
|
got, _ := sent["model"].(string)
|
|
if got != c.expected {
|
|
t.Errorf("Chat(%q): upstream got model=%q, want %q", c.input, got, c.expected)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestFinishReasonFromOllama_PrefersDoneReason(t *testing.T) {
|
|
// Phase 4 scrum fix B-4: done_reason takes precedence when present.
|
|
cases := []struct {
|
|
done bool
|
|
doneReason string
|
|
want string
|
|
}{
|
|
{true, "stop", "stop"},
|
|
{true, "length", "length"}, // truncation surfaced — pre-fix lost this
|
|
{true, "", "stop"}, // legacy fallback
|
|
{false, "", "length"},
|
|
{false, "stop", "stop"}, // done_reason wins regardless of done
|
|
}
|
|
for _, c := range cases {
|
|
got := finishReasonFromOllama(c.done, c.doneReason)
|
|
if got != c.want {
|
|
t.Errorf("finishReasonFromOllama(%v, %q) = %q, want %q", c.done, c.doneReason, got, c.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestRegistry_AvailableLooksUpDirectly(t *testing.T) {
|
|
// Phase 4 scrum fix B-2: Available() reports registry membership,
|
|
// not routing. Unregistered name → false; registered + Available()
|
|
// → true; registered but empty key → false.
|
|
registered := newFake("openrouter", true)
|
|
disabled := newFake("opencode", false) // "registered" but no key
|
|
r := NewRegistry(registered, disabled)
|
|
if !r.Available("openrouter") {
|
|
t.Errorf("openrouter should be Available")
|
|
}
|
|
if r.Available("opencode") {
|
|
t.Errorf("opencode has no key — Available should be false")
|
|
}
|
|
if r.Available("nonexistent") {
|
|
t.Errorf("nonexistent should be false")
|
|
}
|
|
}
|
|
|
|
func TestNewProviderConstructors(t *testing.T) {
|
|
// Smoke: each public constructor produces a working provider with
|
|
// the right name/prefix and respects empty-key=unavailable.
|
|
or := NewOpenRouter("test-key", 0)
|
|
if or.Name() != "openrouter" || !or.Available() {
|
|
t.Errorf("openrouter constructor wrong: %s avail=%v", or.Name(), or.Available())
|
|
}
|
|
oc := NewOpenCode("test-key", 0)
|
|
if oc.Name() != "opencode" || !oc.Available() {
|
|
t.Errorf("opencode constructor wrong: %s avail=%v", oc.Name(), oc.Available())
|
|
}
|
|
km := NewKimi("", 0) // empty key → unavailable
|
|
if km.Available() {
|
|
t.Errorf("kimi with empty key should be unavailable")
|
|
}
|
|
}
|