Implements PROMPT.md / docs/REVIEW_PIPELINE.md Phase 2:
- internal/llm/ollama.go — real Ollama provider:
- HealthCheck probes /api/tags + a 1-token completion + a JSON-mode
probe ({"ok": true} round-trip), populating the model-doctor.json
schema documented in docs/LOCAL_MODEL_SETUP.md
- Complete + CompleteJSON via /api/chat with stream=false
- think=false set for ALL completions (qwen3.5:latest is reasoning-
capable but the inner-loop hot path wants direct answers, not
reasoning traces consuming the token budget — same finding as
the Lakehouse-Go chatd 2026-04-30 wave)
- internal/llm/review.go — Reviewer wrapper:
- 2-attempt flow: prompt → parse → repair-prompt → parse
- Strict JSON shape enforced; markdown fences stripped before parse
- Severity normalized to enum; out-of-range confidence clamped
- Per-file chunking (file-level for v0; function-level Phase D+)
- Bounded by review-profile max_file_bytes + max_llm_chunk_chars
- pipeline.go — Phase 2 wired between static scan + report gen:
- --enable-llm flag opts in (off by default — static-only is
cheaper and faster)
- Raw output ALWAYS saved to llm-findings.raw.json (forensics)
- Normalized findings → llm-findings.normalized.json
- LLM findings merged into the report findings list (sourced
"llm" so consumers can filter)
- Receipts honestly mark phase status: "ok" | "degraded" | "skipped"
- cli model doctor — real probes replace the Phase A stub.
Verified:
- model doctor: status="ok" with qwen3.5:latest + qwen3:latest both
loaded, basic_prompt_ok=true, json_mode_ok=true
- insecure-repo with --enable-llm: 9 LLM findings; qwen3.5 correctly
flagged SQLi, RCE, hardcoded credentials as critical with verbatim
evidence; 27s wall for 3 chunks
- clean-repo with --enable-llm: 0 LLM findings, 4 parsed chunks, 2.8s
- self-review with --enable-llm: 77 LLM findings + 83 static; 3 of
~30 chunks needed retry (PROMPT.md, REPORT_SCHEMA.md,
SCRUM_TEST_TEMPLATE.md — all eventually parsed); 5min wall
go vet + go test -short clean. Fixture stray.go now `package fixture`
so go-tooling doesn't choke on the orphan.
Phase D (validator cross-check) + Phase E (memory + diff/rules
subcommands) remain.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
236 lines
7.5 KiB
Go
236 lines
7.5 KiB
Go
// Ollama provider — local-first per PROMPT.md.
|
|
//
|
|
// HealthCheck: probes /api/tags (server up + model list) + a 1-token
|
|
// completion + a strict-JSON probe. Used by `model doctor`.
|
|
//
|
|
// Complete + CompleteJSON: POST /api/chat with stream=false. JSON
|
|
// mode uses Ollama's native `format: "json"` — newer Ollama versions
|
|
// also accept a JSON Schema there but format=json is the lowest-
|
|
// common-denominator that works back to 0.4.
|
|
//
|
|
// `think: false` is set for ALL completions per the Lakehouse-Go
|
|
// 2026-04-30 finding: qwen3.5:latest and qwen3:latest are reasoning-
|
|
// capable but the inner-loop hot path wants direct answers, not
|
|
// `<think>` traces consuming the token budget. Callers that NEED
|
|
// reasoning override via opts (Phase F+, not yet wired).
|
|
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// OllamaProvider is the concrete impl. Stateless; safe for concurrent
|
|
// use (the http.Client handles connection pooling).
|
|
type OllamaProvider struct {
|
|
baseURL string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// NewOllama returns a provider pointed at baseURL. Empty baseURL
|
|
// defaults to http://localhost:11434. timeout 0 → 120s (matches
|
|
// model-profile default).
|
|
func NewOllama(baseURL string, timeout time.Duration) *OllamaProvider {
|
|
if baseURL == "" {
|
|
baseURL = "http://localhost:11434"
|
|
}
|
|
if timeout == 0 {
|
|
timeout = 120 * time.Second
|
|
}
|
|
return &OllamaProvider{
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
httpClient: &http.Client{Timeout: timeout},
|
|
}
|
|
}
|
|
|
|
func (o *OllamaProvider) Name() string { return "ollama" }
|
|
|
|
// HealthCheck runs the 5 probes documented in REPORT_SCHEMA.md
|
|
// model-doctor.json shape:
|
|
// - server_available: GET /api/tags returns 2xx
|
|
// - primary_model_available: name appears in tag list
|
|
// - fallback_model_available: name appears in tag list
|
|
// - basic_prompt_ok: a 5-token "reply OK" round-trips
|
|
// - json_mode_ok: a JSON probe parses cleanly
|
|
//
|
|
// Errors surface in HealthStatus.Errors as human-readable strings
|
|
// (no stack trace shape — operators run this from a shell).
|
|
func (o *OllamaProvider) HealthCheck(ctx context.Context, primary, fallback string) HealthStatus {
|
|
st := HealthStatus{Errors: []string{}}
|
|
|
|
// 1. Server availability + model list
|
|
tags, err := o.listTags(ctx)
|
|
if err != nil {
|
|
st.Errors = append(st.Errors, "list models: "+err.Error())
|
|
return st
|
|
}
|
|
st.ServerAvailable = true
|
|
|
|
loaded := map[string]bool{}
|
|
for _, t := range tags {
|
|
loaded[t] = true
|
|
}
|
|
st.PrimaryModelAvailable = primary != "" && loaded[primary]
|
|
st.FallbackModelAvailable = fallback != "" && loaded[fallback]
|
|
|
|
// Pick the model we'll use for the live probes — primary if
|
|
// loaded, else fallback, else the first model Ollama has.
|
|
probeModel := ""
|
|
switch {
|
|
case st.PrimaryModelAvailable:
|
|
probeModel = primary
|
|
case st.FallbackModelAvailable:
|
|
probeModel = fallback
|
|
case len(tags) > 0:
|
|
probeModel = tags[0]
|
|
st.Errors = append(st.Errors,
|
|
fmt.Sprintf("neither primary %q nor fallback %q loaded; using %q for liveness probe", primary, fallback, probeModel))
|
|
default:
|
|
st.Errors = append(st.Errors, "no models loaded; can't run liveness probe")
|
|
return st
|
|
}
|
|
|
|
// 2. Basic completion
|
|
if got, err := o.Complete(ctx, probeModel, "Reply with the single word: OK", CompleteOptions{Temperature: 0, MaxTokens: 8, TimeoutSeconds: 30}); err != nil {
|
|
st.Errors = append(st.Errors, "basic prompt: "+err.Error())
|
|
} else if strings.TrimSpace(got) != "" {
|
|
st.BasicPromptOK = true
|
|
}
|
|
|
|
// 3. JSON-mode completion
|
|
jsonGot, err := o.CompleteJSON(ctx, probeModel, `Output exactly this JSON and nothing else: {"ok": true}`, CompleteOptions{Temperature: 0, MaxTokens: 32, TimeoutSeconds: 30})
|
|
if err != nil {
|
|
st.Errors = append(st.Errors, "json mode: "+err.Error())
|
|
} else {
|
|
var probe struct{ Ok bool }
|
|
if json.Unmarshal([]byte(jsonGot), &probe) == nil {
|
|
st.JSONModeOK = true
|
|
} else {
|
|
st.Errors = append(st.Errors, "json mode: parse failed; raw="+abbrev(jsonGot, 200))
|
|
}
|
|
}
|
|
|
|
return st
|
|
}
|
|
|
|
// Complete posts to /api/chat with stream=false. Returns just the
|
|
// assistant content; token counts not surfaced (callers that need
|
|
// them go via the chat-shape API directly, which we'll expose later).
|
|
func (o *OllamaProvider) Complete(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) {
|
|
body := o.chatBody(model, prompt, opts, false)
|
|
return o.postChat(ctx, body, opts)
|
|
}
|
|
|
|
// CompleteJSON requests Ollama's native JSON-mode constrained output.
|
|
// The `format: "json"` field forces grammar-constrained generation —
|
|
// the model can only emit valid JSON. Some models still emit garbage
|
|
// in the content field (e.g. preamble text); validation is the
|
|
// caller's job (PROMPT.md "AI may suggest. Code validates.").
|
|
func (o *OllamaProvider) CompleteJSON(ctx context.Context, model, prompt string, opts CompleteOptions) (string, error) {
|
|
body := o.chatBody(model, prompt, opts, true)
|
|
return o.postChat(ctx, body, opts)
|
|
}
|
|
|
|
func (o *OllamaProvider) chatBody(model, prompt string, opts CompleteOptions, jsonMode bool) map[string]any {
|
|
options := map[string]any{}
|
|
if opts.Temperature != 0 {
|
|
options["temperature"] = opts.Temperature
|
|
}
|
|
if opts.MaxTokens > 0 {
|
|
options["num_predict"] = opts.MaxTokens
|
|
}
|
|
body := map[string]any{
|
|
"model": model,
|
|
"messages": []map[string]any{
|
|
{"role": "user", "content": prompt},
|
|
},
|
|
"stream": false,
|
|
"think": false, // local hot path skips reasoning by default
|
|
"options": options,
|
|
}
|
|
if jsonMode {
|
|
body["format"] = "json"
|
|
}
|
|
return body
|
|
}
|
|
|
|
func (o *OllamaProvider) postChat(ctx context.Context, body map[string]any, opts CompleteOptions) (string, error) {
|
|
bs, _ := json.Marshal(body)
|
|
req, err := http.NewRequestWithContext(ctx, "POST", o.baseURL+"/api/chat", bytes.NewReader(bs))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
cli := o.httpClient
|
|
if opts.TimeoutSeconds > 0 {
|
|
cli = &http.Client{Timeout: time.Duration(opts.TimeoutSeconds) * time.Second}
|
|
}
|
|
resp, err := cli.Do(req)
|
|
if err != nil {
|
|
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
|
return "", fmt.Errorf("ollama timeout")
|
|
}
|
|
return "", fmt.Errorf("ollama request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
rb, _ := io.ReadAll(resp.Body)
|
|
if resp.StatusCode/100 != 2 {
|
|
return "", fmt.Errorf("ollama %d: %s", resp.StatusCode, abbrev(string(rb), 200))
|
|
}
|
|
var out struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
Done bool `json:"done"`
|
|
DoneReason string `json:"done_reason"`
|
|
}
|
|
if err := json.Unmarshal(rb, &out); err != nil {
|
|
return "", fmt.Errorf("ollama decode: %w (body=%s)", err, abbrev(string(rb), 200))
|
|
}
|
|
return out.Message.Content, nil
|
|
}
|
|
|
|
// listTags hits /api/tags and returns the loaded-model name list.
|
|
func (o *OllamaProvider) listTags(ctx context.Context) ([]string, error) {
|
|
cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
|
defer cancel()
|
|
req, _ := http.NewRequestWithContext(cctx, "GET", o.baseURL+"/api/tags", nil)
|
|
resp, err := o.httpClient.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode/100 != 2 {
|
|
return nil, fmt.Errorf("status %d", resp.StatusCode)
|
|
}
|
|
rb, _ := io.ReadAll(resp.Body)
|
|
var out struct {
|
|
Models []struct {
|
|
Name string `json:"name"`
|
|
} `json:"models"`
|
|
}
|
|
if err := json.Unmarshal(rb, &out); err != nil {
|
|
return nil, err
|
|
}
|
|
names := make([]string, 0, len(out.Models))
|
|
for _, m := range out.Models {
|
|
names = append(names, m.Name)
|
|
}
|
|
return names, nil
|
|
}
|
|
|
|
func abbrev(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n] + "…"
|
|
}
|