package matrix import ( "bytes" "context" "encoding/json" "fmt" "io" "log/slog" "net/http" "strings" "time" ) // LLMJudgeGate is an InjectGate implementation that uses an Ollama- // compatible chat endpoint (or chatd's /v1/chat) to rate the // (query, candidate) pair on a 1-5 rubric, then approves the // injection iff rating >= MinRating. // // The HTTP path is intentionally generic — works against any // endpoint that speaks Ollama's /api/chat shape: bare Ollama, // chatd's /v1/chat, or anything else honoring the same JSON. // Per-call timeout is bounded by the parent ctx + the http.Client. // // Best-effort posture: a judge call that fails (network, JSON // decode, anything) returns Approve=false. Same fail-closed default // as the inject path's distance gate — when the judge can't speak, // don't inject (better silent miss than confident wrong-domain). // // Usage from retrieve.go: // gate := matrix.NewLLMJudgeGate(req.JudgeURL, req.JudgeModel, // req.JudgeMinRating, hc) // results, injected = matrix.InjectPlaybookMisses(req.QueryText, // results, hits, maxInjectDist, gate) type LLMJudgeGate struct { URL string Model string MinRating int HTTPClient *http.Client } // NewLLMJudgeGate is the constructor. Defaults: minRating 3, 10s // HTTP timeout. URL must include the path (e.g. // "http://localhost:11434/api/chat" for bare Ollama). Returns nil // when URL or Model is empty — caller treats nil InjectGate as // "no judge configured, default-approve" per InjectPlaybookMisses // contract. func NewLLMJudgeGate(url, model string, minRating int, hc *http.Client) *LLMJudgeGate { if url == "" || model == "" { return nil } if minRating <= 0 { minRating = 3 } if hc == nil { hc = &http.Client{Timeout: 10 * time.Second} } return &LLMJudgeGate{ URL: url, Model: model, MinRating: minRating, HTTPClient: hc, } } // Approve calls the LLM judge with a query+candidate prompt; returns // true iff the judge's rating meets MinRating. Errors return false // (fail-closed — see type doc). func (g *LLMJudgeGate) Approve(query string, hit PlaybookHit) bool { if g == nil || query == "" { // No judge or no query to judge against — treat as approve. // Empty-query case mirrors InjectPlaybookMisses' contract: // callers without a query string can't usefully judge. return true } rating := g.rate(query, hit) return rating >= g.MinRating } func (g *LLMJudgeGate) rate(query string, hit PlaybookHit) int { system := `You rate retrieval results for a staffing co-pilot. Rate the result 1-5 against the query: 5 = perfect match (this person/role IS what was asked for) 4 = strong match (right field, right level, minor mismatches) 3 = adjacent match (related field or partial overlap) 2 = weak/tangential match 1 = irrelevant Output JSON only: {"rating": N, "reason": ""}.` // We pass the recorded query text + answer ID to give the judge // minimal context. Production might also fetch the answer's // metadata, but that requires a second HTTP hop; the recorded // query is usually enough to sniff wrong-domain matches. user := fmt.Sprintf("Query: %q\n\nCandidate playbook entry:\n recorded_query: %q\n answer_id: %s\n answer_corpus: %s\n recorded_score: %.2f", query, hit.Entry.QueryText, hit.Entry.AnswerID, hit.Entry.AnswerCorpus, hit.Entry.Score) body, _ := json.Marshal(map[string]any{ "model": g.Model, "stream": false, "format": "json", "messages": []map[string]string{ {"role": "system", "content": system}, {"role": "user", "content": user}, }, "options": map[string]any{"temperature": 0}, }) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() req, err := http.NewRequestWithContext(ctx, "POST", g.URL, bytes.NewReader(body)) if err != nil { slog.Warn("matrix.judge: build request", "err", err) return 0 } req.Header.Set("Content-Type", "application/json") resp, err := g.HTTPClient.Do(req) if err != nil { slog.Warn("matrix.judge: HTTP", "err", err, "url", g.URL) return 0 } defer resp.Body.Close() if resp.StatusCode/100 != 2 { slog.Warn("matrix.judge: non-2xx", "status", resp.StatusCode, "url", g.URL) return 0 } rb, _ := io.ReadAll(resp.Body) var ollamaResp struct { Message struct { Content string `json:"content"` } `json:"message"` } if err := json.Unmarshal(rb, &ollamaResp); err != nil { slog.Warn("matrix.judge: decode envelope", "err", err) return 0 } var v struct { Rating int `json:"rating"` } // Some chat endpoints wrap content in markdown code fences even // with format=json. Strip leading/trailing whitespace + fences. content := strings.TrimSpace(ollamaResp.Message.Content) content = strings.TrimPrefix(content, "```json") content = strings.TrimPrefix(content, "```") content = strings.TrimSuffix(content, "```") content = strings.TrimSpace(content) if err := json.Unmarshal([]byte(content), &v); err != nil { slog.Warn("matrix.judge: decode rating", "err", err, "content", content) return 0 } if v.Rating < 1 || v.Rating > 5 { return 0 } return v.Rating }