golangLAKEHOUSE/internal/matrix/judge.go

package matrix

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log/slog"
	"net/http"
	"strings"
	"time"
)

// LLMJudgeGate is an InjectGate implementation that uses an Ollama-
// compatible chat endpoint (or chatd's /v1/chat) to rate the
// (query, candidate) pair on a 1-5 rubric, then approves the
// injection iff rating >= MinRating.
//
// The HTTP path is intentionally generic — works against any
// endpoint that speaks Ollama's /api/chat shape: bare Ollama,
// chatd's /v1/chat, or anything else honoring the same JSON.
// Per-call timeout is bounded by the parent ctx + the http.Client.
//
// Best-effort posture: a judge call that fails (network, JSON
// decode, anything) returns Approve=false. Same fail-closed default
// as the inject path's distance gate — when the judge can't speak,
// don't inject (better silent miss than confident wrong-domain).
//
// Usage from retrieve.go:
//   gate := matrix.NewLLMJudgeGate(req.JudgeURL, req.JudgeModel,
//       req.JudgeMinRating, hc)
//   results, injected = matrix.InjectPlaybookMisses(req.QueryText,
//       results, hits, maxInjectDist, gate)
type LLMJudgeGate struct {
	URL        string
	Model      string
	MinRating  int
	HTTPClient *http.Client
}

// NewLLMJudgeGate is the constructor. Defaults: minRating 3, 10s
// HTTP timeout. URL must include the path (e.g.
// "http://localhost:11434/api/chat" for bare Ollama). Returns nil
// when URL or Model is empty — caller treats nil InjectGate as
// "no judge configured, default-approve" per InjectPlaybookMisses
// contract.
func NewLLMJudgeGate(url, model string, minRating int, hc *http.Client) *LLMJudgeGate {
	if url == "" || model == "" {
		return nil
	}
	if minRating <= 0 {
		minRating = 3
	}
	if hc == nil {
		hc = &http.Client{Timeout: 10 * time.Second}
	}
	return &LLMJudgeGate{
		URL:        url,
		Model:      model,
		MinRating:  minRating,
		HTTPClient: hc,
	}
}

// Approve calls the LLM judge with a query+candidate prompt; returns
// true iff the judge's rating meets MinRating. Errors return false
// (fail-closed — see type doc).
func (g *LLMJudgeGate) Approve(query string, hit PlaybookHit) bool {
	if g == nil || query == "" {
		// No judge or no query to judge against — treat as approve.
		// Empty-query case mirrors InjectPlaybookMisses' contract:
		// callers without a query string can't usefully judge.
		return true
	}
	rating := g.rate(query, hit)
	return rating >= g.MinRating
}

func (g *LLMJudgeGate) rate(query string, hit PlaybookHit) int {
	system := `You rate retrieval results for a staffing co-pilot.
Rate the result 1-5 against the query:
  5 = perfect match (this person/role IS what was asked for)
  4 = strong match (right field, right level, minor mismatches)
  3 = adjacent match (related field or partial overlap)
  2 = weak/tangential match
  1 = irrelevant
Output JSON only: {"rating": N, "reason": "<one sentence>"}.`
	// We pass the recorded query text + answer ID to give the judge
	// minimal context. Production might also fetch the answer's
	// metadata, but that requires a second HTTP hop; the recorded
	// query is usually enough to sniff wrong-domain matches.
	user := fmt.Sprintf("Query: %q\n\nCandidate playbook entry:\n  recorded_query: %q\n  answer_id: %s\n  answer_corpus: %s\n  recorded_score: %.2f",
		query, hit.Entry.QueryText, hit.Entry.AnswerID, hit.Entry.AnswerCorpus, hit.Entry.Score)

	body, _ := json.Marshal(map[string]any{
		"model":  g.Model,
		"stream": false,
		"format": "json",
		"messages": []map[string]string{
			{"role": "system", "content": system},
			{"role": "user", "content": user},
		},
		"options": map[string]any{"temperature": 0},
	})
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()
	req, err := http.NewRequestWithContext(ctx, "POST", g.URL, bytes.NewReader(body))
	if err != nil {
		slog.Warn("matrix.judge: build request", "err", err)
		return 0
	}
	req.Header.Set("Content-Type", "application/json")
	resp, err := g.HTTPClient.Do(req)
	if err != nil {
		slog.Warn("matrix.judge: HTTP", "err", err, "url", g.URL)
		return 0
	}
	defer resp.Body.Close()
	if resp.StatusCode/100 != 2 {
		slog.Warn("matrix.judge: non-2xx", "status", resp.StatusCode, "url", g.URL)
		return 0
	}
	rb, _ := io.ReadAll(resp.Body)
	var ollamaResp struct {
		Message struct {
			Content string `json:"content"`
		} `json:"message"`
	}
	if err := json.Unmarshal(rb, &ollamaResp); err != nil {
		slog.Warn("matrix.judge: decode envelope", "err", err)
		return 0
	}
	var v struct {
		Rating int `json:"rating"`
	}
	// Some chat endpoints wrap content in markdown code fences even
	// with format=json. Strip leading/trailing whitespace + fences.
	content := strings.TrimSpace(ollamaResp.Message.Content)
	content = strings.TrimPrefix(content, "```json")
	content = strings.TrimPrefix(content, "```")
	content = strings.TrimSuffix(content, "```")
	content = strings.TrimSpace(content)
	if err := json.Unmarshal([]byte(content), &v); err != nil {
		slog.Warn("matrix.judge: decode rating", "err", err, "content", content)
		return 0
	}
	if v.Rating < 1 || v.Rating > 5 {
		return 0
	}
	return v.Rating
}