// Ollama provider — local-first per PROMPT.md. // // HealthCheck: probes /api/tags (server up + model list) + a 1-token // completion + a strict-JSON probe. Used by `model doctor`. // // Complete + CompleteJSON: POST /api/chat with stream=false. JSON // mode uses Ollama's native `format: "json"` — newer Ollama versions // also accept a JSON Schema there but format=json is the lowest- // common-denominator that works back to 0.4. // // `think: false` is set for ALL completions per the Lakehouse-Go // 2026-04-30 finding: qwen3.5:latest and qwen3:latest are reasoning- // capable but the inner-loop hot path wants direct answers, not // `` traces consuming the token budget. Callers that NEED // reasoning override via opts (Phase F+, not yet wired). package llm import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "strings" "time" ) // OllamaProvider is the concrete impl. Stateless; safe for concurrent // use (the http.Client handles connection pooling). type OllamaProvider struct { baseURL string httpClient *http.Client } // NewOllama returns a provider pointed at baseURL. Empty baseURL // defaults to http://localhost:11434. timeout 0 → 120s (matches // model-profile default). func NewOllama(baseURL string, timeout time.Duration) *OllamaProvider { if baseURL == "" { baseURL = "http://localhost:11434" } if timeout == 0 { timeout = 120 * time.Second } return &OllamaProvider{ baseURL: strings.TrimRight(baseURL, "/"), httpClient: &http.Client{Timeout: timeout}, } } func (o *OllamaProvider) Name() string { return "ollama" } // HealthCheck runs the 5 probes documented in REPORT_SCHEMA.md // model-doctor.json shape: // - server_available: GET /api/tags returns 2xx // - primary_model_available: name appears in tag list // - fallback_model_available: name appears in tag list // - basic_prompt_ok: a 5-token "reply OK" round-trips // - json_mode_ok: a JSON probe parses cleanly // // Errors surface in HealthStatus.Errors as human-readable strings // (no stack trace shape — operators run this from a shell). func (o *OllamaProvider) HealthCheck(ctx context.Context, primary, fallback string) HealthStatus { st := HealthStatus{Errors: []string{}} // 1. Server availability + model list tags, err := o.listTags(ctx) if err != nil { st.Errors = append(st.Errors, "list models: "+err.Error()) return st } st.ServerAvailable = true loaded := map[string]bool{} for _, t := range tags { loaded[t] = true } st.PrimaryModelAvailable = primary != "" && loaded[primary] st.FallbackModelAvailable = fallback != "" && loaded[fallback] // Pick the model we'll use for the live probes — primary if // loaded, else fallback, else the first model Ollama has. probeModel := "" switch { case st.PrimaryModelAvailable: probeModel = primary case st.FallbackModelAvailable: probeModel = fallback case len(tags) > 0: probeModel = tags[0] st.Errors = append(st.Errors, fmt.Sprintf("neither primary %q nor fallback %q loaded; using %q for liveness probe", primary, fallback, probeModel)) default: st.Errors = append(st.Errors, "no models loaded; can't run liveness probe") return st } // 2. Basic completion. Scrum fix B3 (Kimi BLOCK + Opus BLOCK, // 2026-04-30): checks that the response actually contains "OK" // (case-insensitive, substring) — pre-fix accepted any non-empty // string, so a thinking-model's `...` trace or an apology // passed silently. Substring rather than equality because some // models surround the answer with whitespace or a trailing period. if got, err := o.Complete(ctx, probeModel, "Reply with the single word: OK", CompleteOptions{Temperature: 0, MaxTokens: 8, TimeoutSeconds: 30}); err != nil { st.Errors = append(st.Errors, "basic prompt: "+err.Error()) } else if upper := strings.ToUpper(strings.TrimSpace(got)); upper == "OK" || strings.Contains(upper, "OK") { st.BasicPromptOK = true } else { st.Errors = append(st.Errors, fmt.Sprintf("basic prompt: expected 'OK', got %q", abbrev(got, 80))) } // 3. JSON-mode completion jsonGot, err := o.CompleteJSON(ctx, probeModel, `Output exactly this JSON and nothing else: {"ok": true}`, CompleteOptions{Temperature: 0, MaxTokens: 32, TimeoutSeconds: 30}) if err != nil { st.Errors = append(st.Errors, "json mode: "+err.Error()) } else { var probe struct{ Ok bool } if json.Unmarshal([]byte(jsonGot), &probe) == nil { st.JSONModeOK = true } else { st.Errors = append(st.Errors, "json mode: parse failed; raw="+abbrev(jsonGot, 200)) } } return st } // Complete posts to /api/chat with stream=false. Returns just the // assistant content; token counts not surfaced (callers that need // them go via the chat-shape API directly, which we'll expose later). func (o *OllamaProvider) Complete(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) { body := o.chatBody(model, prompt, opts, false) return o.postChat(ctx, body, opts) } // CompleteJSON requests Ollama's native JSON-mode constrained output. // The `format: "json"` field forces grammar-constrained generation — // the model can only emit valid JSON. Some models still emit garbage // in the content field (e.g. preamble text); validation is the // caller's job (PROMPT.md "AI may suggest. Code validates."). func (o *OllamaProvider) CompleteJSON(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) { body := o.chatBody(model, prompt, opts, true) return o.postChat(ctx, body, opts) } func (o *OllamaProvider) chatBody(model, prompt string, opts CompleteOptions, jsonMode bool) map[string]any { options := map[string]any{} // Scrum fix B4 (Opus BLOCK, 2026-04-30): always forward the // caller-supplied Temperature, including 0. Pre-fix `if != 0` // silently dropped the field for callers wanting deterministic // generation, so Ollama's ~0.8 default applied to the JSON // probe + every reviewer call. CompleteOptions.Temperature is // still float64 (not *float64) — the harness's two callers // (HealthCheck, Reviewer) always set it explicitly, so "0 // means 0" is the right semantic. If a future caller wants // "use Ollama default", they can set MaxTokens=0 + delete the // option (or we'll switch to *float64 like chatd did). options["temperature"] = opts.Temperature if opts.MaxTokens > 0 { options["num_predict"] = opts.MaxTokens } body := map[string]any{ "model": model, "messages": []map[string]any{ {"role": "user", "content": prompt}, }, "stream": false, "think": false, // local hot path skips reasoning by default "options": options, } if jsonMode { body["format"] = "json" } return body } func (o *OllamaProvider) postChat(ctx context.Context, body map[string]any, opts CompleteOptions) (string, error) { bs, _ := json.Marshal(body) req, err := http.NewRequestWithContext(ctx, "POST", o.baseURL+"/api/chat", bytes.NewReader(bs)) if err != nil { return "", err } req.Header.Set("Content-Type", "application/json") cli := o.httpClient if opts.TimeoutSeconds > 0 { cli = &http.Client{Timeout: time.Duration(opts.TimeoutSeconds) * time.Second} } resp, err := cli.Do(req) if err != nil { if errors.Is(ctx.Err(), context.DeadlineExceeded) { return "", fmt.Errorf("ollama timeout") } return "", fmt.Errorf("ollama request: %w", err) } defer resp.Body.Close() rb, _ := io.ReadAll(resp.Body) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("ollama %d: %s", resp.StatusCode, abbrev(string(rb), 200)) } var out struct { Message struct { Content string `json:"content"` } `json:"message"` Done bool `json:"done"` DoneReason string `json:"done_reason"` } if err := json.Unmarshal(rb, &out); err != nil { return "", fmt.Errorf("ollama decode: %w (body=%s)", err, abbrev(string(rb), 200)) } return out.Message.Content, nil } // listTags hits /api/tags and returns the loaded-model name list. func (o *OllamaProvider) listTags(ctx context.Context) ([]string, error) { cctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() req, _ := http.NewRequestWithContext(cctx, "GET", o.baseURL+"/api/tags", nil) resp, err := o.httpClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { return nil, fmt.Errorf("status %d", resp.StatusCode) } rb, _ := io.ReadAll(resp.Body) var out struct { Models []struct { Name string `json:"name"` } `json:"models"` } if err := json.Unmarshal(rb, &out); err != nil { return nil, err } names := make([]string, 0, len(out.Models)) for _, m := range out.Models { names = append(names, m.Name) } return names, nil } func abbrev(s string, n int) string { if len(s) <= n { return s } return s[:n] + "…" }