package chat import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "strings" "time" ) // OllamaCloud — Ollama Cloud (Pro plan as of 2026-04-28). Bearer // auth via OLLAMA_CLOUD_KEY. Wire format mirrors the Rust adapter // (crates/gateway/src/v1/ollama_cloud.rs) — system/prompt split // against /api/generate with cloud-friendly num_predict floor. // // Model routing: "ollama_cloud/" (prefix) and ":cloud" // (suffix) both route here. Registry.Resolve handles the suffix case; // the prefix case strips "ollama_cloud/" before the upstream call so // ollama.com sees the bare model name. Suffix-form models pass through // StripPrefix unchanged (no leading prefix to strip — exactly what the // upstream wants since "kimi-k2.6:cloud" is the canonical name on // ollama.com). type OllamaCloud struct { apiKey string baseURL string httpClient *http.Client } func NewOllamaCloud(apiKey string, timeout time.Duration) *OllamaCloud { if timeout == 0 { timeout = 180 * time.Second } return &OllamaCloud{ apiKey: apiKey, baseURL: "https://ollama.com", httpClient: &http.Client{Timeout: timeout}, } } func (o *OllamaCloud) Name() string { return "ollama_cloud" } func (o *OllamaCloud) Available() bool { return o.apiKey != "" } func (o *OllamaCloud) Chat(ctx context.Context, req Request) (*Response, error) { // Strip "ollama_cloud/" prefix (Phase 4 scrum fix B-3 — Opus BLOCK). // Pre-fix used "cloud" which never matched, so upstream got the // prefixed model name and 400'd. ":cloud" suffix models pass // through unchanged — that's the canonical name on ollama.com. model := StripPrefix(req.Model, "ollama_cloud") system, prompt := flattenMessages(req.Messages) body := map[string]any{ "model": model, "prompt": prompt, "stream": false, // Mirror local Ollama — skip reasoning by default. Callers that // need thinking explicitly (e.g. for Kimi K2 long reasoning) // will get a future Request.Think field; the v0 default keeps // outputs short and predictable. "think": false, "options": map[string]any{ // Cloud reasoning models need headroom — 400 floor matches // the Rust adapter's policy for kimi-k2.6 / gpt-oss:120b. "num_predict": maxInt(req.MaxTokens, 400), // Cloud-tier default 0.3 when caller didn't set; honor an // explicit value otherwise. Anthropic 4.7 deprecation // doesn't apply here (Ollama Cloud is its own server), // so always send. "temperature": tempPtrOr(req.Temperature, 0.3), }, } if system != "" { body["system"] = system } if req.Format == "json" { body["format"] = "json" } bs, _ := json.Marshal(body) httpReq, err := http.NewRequestWithContext(ctx, "POST", o.baseURL+"/api/generate", bytes.NewReader(bs)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Authorization", "Bearer "+o.apiKey) resp, err := o.httpClient.Do(httpReq) if err != nil { if errors.Is(ctx.Err(), context.DeadlineExceeded) { return nil, fmt.Errorf("%w: ollama_cloud", ErrTimeout) } return nil, fmt.Errorf("ollama_cloud: %w", err) } defer resp.Body.Close() rb, _ := io.ReadAll(resp.Body) if resp.StatusCode/100 != 2 { return nil, fmt.Errorf("%w: ollama_cloud %d: %s", ErrUpstream, resp.StatusCode, abbrev(string(rb), 300)) } var cloudResp struct { Model string `json:"model"` Response string `json:"response"` Done bool `json:"done"` DoneReason string `json:"done_reason"` PromptEvalCount int `json:"prompt_eval_count"` EvalCount int `json:"eval_count"` } if err := json.Unmarshal(rb, &cloudResp); err != nil { return nil, fmt.Errorf("ollama_cloud decode: %w (body=%s)", err, abbrev(string(rb), 200)) } return &Response{ Model: model, Content: cloudResp.Response, InputTokens: cloudResp.PromptEvalCount, OutputTokens: cloudResp.EvalCount, FinishReason: finishReasonFromOllama(cloudResp.Done, cloudResp.DoneReason), }, nil } // flattenMessages splits a Message list into Ollama Cloud's // /api/generate shape (single system + concatenated prompt). // Mirrors the Rust adapter's flatten_messages_public. func flattenMessages(messages []Message) (system, prompt string) { var sysParts []string var promptParts []string for _, m := range messages { switch m.Role { case "system": sysParts = append(sysParts, m.Content) case "assistant": promptParts = append(promptParts, "Assistant: "+m.Content) default: // "user" or anything else promptParts = append(promptParts, "User: "+m.Content) } } return strings.Join(sysParts, "\n\n"), strings.Join(promptParts, "\n\n") } func maxInt(a, b int) int { if a > b { return a } return b } func defaultFloat(v, fallback float64) float64 { if v == 0 { return fallback } return v } func tempPtrOr(v *float64, fallback float64) float64 { if v == nil { return fallback } return *v }