3-lineage scrum (Opus 4.7 / Kimi K2.6 / Qwen3-coder) on today's wave landed 4 real findings (2 BLOCK + 2 WARN) and 2 INFO touch-ups. Verbatim verdicts + disposition table at: reports/scrum/_evidence/2026-04-30/ B-1 (BLOCK Opus + INFO Kimi convergent) — ResolveKey API: collapse from 3-arg (envVar, envFileName, envFilePath) to 2-arg (envVar, envFilePath). Pre-fix every chatd caller passed the env var name twice; if operator renamed *_key_env in lakehouse.toml while keeping the canonical KEY= line in the .env file, fallback silently missed. B-2 (WARN Opus + WARN Kimi convergent) — handleProviders probe: drop the synthesize-then-Resolve probe; look up by name directly via Registry.Available(name). Prior probe synthesized "<name>/probe" model strings and routed through Resolve, fragile to any future routing rule (e.g. cloud-suffix special case). B-3 (BLOCK Opus single — verified by trace + end-to-end probe) — OllamaCloud.Chat StripPrefix used "cloud" but registry routes "ollama_cloud/<m>". Result: upstream got the prefixed model name and 400'd. Smoke missed it because chatd_smoke runs without ollama_cloud registered. Now strips the right prefix; new TestOllamaCloud_StripsCorrectPrefix locks both prefix + suffix cases. Verified live: ollama_cloud/deepseek-v3.2 round-trips cleanly through the real ollama.com endpoint. B-4 (WARN Opus single) — Ollama finishReason: read done_reason field instead of inferring from done bool alone. Newer Ollama reports done=true with done_reason="length" on truncation; the prior code mapped that to "stop" and lost the truncation signal the playbook_lift judge needs to retry. New TestFinishReasonFromOllama_PrefersDoneReason covers the fallback ladder. INFOs: - B-5: replace hand-rolled insertion sort in Registry.Names with sort.Strings (Opus called the "avoid sort import" comment a false economy — correct). - A-1: clarify the playbook_lift.sh comment around -judge "" arg passing (Opus noted the comment said "env priority" but didn't reflect that the empty arg also passes through the Go driver's resolution chain). False positives dismissed (3, documented in disposition.md): - Kimi: TestMaybeDowngrade_WithConfigList wrong assertion (test IS correct per design — model excluded from weak list = strong = downgrade) - Qwen: nil-deref claim (defensive code already handles nil) - Opus: qwen3.5:latest doesn't exist on Ollama hub (true on the public hub but local install has it) just verify: PASS. chatd_smoke 6/6 PASS. New regression tests: 3 (B-2, B-3, B-4 each get a focused test). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
150 lines
4.9 KiB
Go
150 lines
4.9 KiB
Go
package chat
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// openaiCompat implements the OpenAI Chat Completions wire format,
|
|
// which OpenRouter / OpenCode / Kimi all speak. Differences between
|
|
// those three providers are limited to:
|
|
// - base URL (e.g. /v1/chat/completions vs /zen/v1/chat/completions)
|
|
// - prefix stripped from req.Model
|
|
// - bearer token source (different env vars)
|
|
//
|
|
// Each concrete provider wraps this with its own constructor pinning
|
|
// those values.
|
|
type openaiCompat struct {
|
|
name string
|
|
baseURL string
|
|
apiKey string
|
|
prefix string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
func newOpenAICompat(name, baseURL, apiKey, prefix string, timeout time.Duration) *openaiCompat {
|
|
if timeout == 0 {
|
|
timeout = 180 * time.Second
|
|
}
|
|
return &openaiCompat{
|
|
name: name,
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
apiKey: apiKey,
|
|
prefix: prefix,
|
|
httpClient: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
func (c *openaiCompat) Name() string { return c.name }
|
|
func (c *openaiCompat) Available() bool { return c.apiKey != "" }
|
|
|
|
func (c *openaiCompat) Chat(ctx context.Context, req Request) (*Response, error) {
|
|
model := StripPrefix(req.Model, c.prefix)
|
|
|
|
body := map[string]any{
|
|
"model": model,
|
|
"messages": req.Messages,
|
|
"stream": false,
|
|
}
|
|
// Anthropic 4.7 (via OpenCode → Anthropic) rejects the temperature
|
|
// field entirely with "temperature is deprecated for this model".
|
|
// Send only when caller explicitly set it. Nil = use upstream
|
|
// default. Keep this true for ALL openai-compat providers — being
|
|
// permissive about field omission is better than maintaining a
|
|
// per-model deprecation table.
|
|
if req.Temperature != nil {
|
|
body["temperature"] = *req.Temperature
|
|
}
|
|
if req.MaxTokens > 0 {
|
|
body["max_tokens"] = req.MaxTokens
|
|
}
|
|
if req.Format == "json" {
|
|
// OpenAI-compat response_format. OpenRouter passes through;
|
|
// OpenCode + Kimi accept it. Worst case the upstream ignores it.
|
|
body["response_format"] = map[string]any{"type": "json_object"}
|
|
}
|
|
|
|
bs, _ := json.Marshal(body)
|
|
url := c.baseURL + "/chat/completions"
|
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bs))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/json")
|
|
httpReq.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
// HTTP-Referer + X-Title are OpenRouter-specific but harmless on
|
|
// other providers — they pass them through to upstream attribution.
|
|
httpReq.Header.Set("HTTP-Referer", "https://golanglakehouse.local")
|
|
httpReq.Header.Set("X-Title", "Lakehouse-Go")
|
|
|
|
resp, err := c.httpClient.Do(httpReq)
|
|
if err != nil {
|
|
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
|
return nil, fmt.Errorf("%w: %s", ErrTimeout, c.name)
|
|
}
|
|
return nil, fmt.Errorf("%s: %w", c.name, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
rb, _ := io.ReadAll(resp.Body)
|
|
if resp.StatusCode/100 != 2 {
|
|
return nil, fmt.Errorf("%w: %s %d: %s", ErrUpstream, c.name, resp.StatusCode, abbrev(string(rb), 300))
|
|
}
|
|
|
|
var oaResp struct {
|
|
Model string `json:"model"`
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
FinishReason string `json:"finish_reason"`
|
|
} `json:"choices"`
|
|
Usage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
} `json:"usage"`
|
|
}
|
|
if err := json.Unmarshal(rb, &oaResp); err != nil {
|
|
return nil, fmt.Errorf("%s decode: %w (body=%s)", c.name, err, abbrev(string(rb), 200))
|
|
}
|
|
if len(oaResp.Choices) == 0 {
|
|
return nil, fmt.Errorf("%w: %s returned 0 choices: %s", ErrUpstream, c.name, abbrev(string(rb), 200))
|
|
}
|
|
|
|
return &Response{
|
|
Model: model,
|
|
Content: oaResp.Choices[0].Message.Content,
|
|
InputTokens: oaResp.Usage.PromptTokens,
|
|
OutputTokens: oaResp.Usage.CompletionTokens,
|
|
FinishReason: oaResp.Choices[0].FinishReason,
|
|
}, nil
|
|
}
|
|
|
|
// NewOpenRouter returns a provider for openrouter.ai. apiKey
|
|
// resolved at construction (typically from OPENROUTER_API_KEY env or
|
|
// /etc/lakehouse/openrouter.env).
|
|
func NewOpenRouter(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("openrouter", "https://openrouter.ai/api/v1", apiKey, "openrouter", timeout)
|
|
}
|
|
|
|
// NewOpenCode returns a provider for OpenCode's unified Zen+Go endpoint.
|
|
// One key reaches Anthropic Opus, GPT-5, Gemini 3.1, Kimi K2.6,
|
|
// DeepSeek, GLM, Qwen, plus 4 free-tier models.
|
|
func NewOpenCode(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("opencode", "https://opencode.ai/zen/v1", apiKey, "opencode", timeout)
|
|
}
|
|
|
|
// NewKimi returns a provider for the direct Kimi For Coding endpoint.
|
|
// `api.kimi.com` is a separate account system from api.moonshot.ai —
|
|
// keys are NOT interchangeable.
|
|
func NewKimi(apiKey string, timeout time.Duration) *openaiCompat {
|
|
return newOpenAICompat("kimi", "https://api.kimi.com/coding/v1", apiKey, "kimi", timeout)
|
|
}
|