golangLAKEHOUSE/internal/matrix/playbook.go

package matrix

// Playbook memory — SPEC §3.4 component 5 (learning-loop integration).
//
// Concept: every time an external system confirms "(query → answer_id)
// was a successful match," record it. Future similar queries get that
// answer's score boosted, so the matrix indexer learns from outcomes
// rather than relying solely on the base embedder's geometry.
//
// Per feedback_meta_index_vision.md: this is the north star — a
// meta-index that LEARNS from playbooks over time, not a static
// hybrid search engine.
//
// Storage shape: a vectord index named DefaultPlaybookCorpus where:
//   - The vector is embed(query_text)
//   - The metadata is a serialized PlaybookEntry
// Retrieval shape: at /matrix/search time, when use_playbook=true,
// matrixd searches the playbook corpus with the same query vector,
// looks up each hit's answer_id, and if that answer is in the current
// matrix-search results, applies a boost to its distance.
//
// Composition: this layer is additive on top of the existing
// retrieve+merge — when use_playbook=false, behavior is unchanged.
// The boost only re-ranks results that ALREADY surfaced from the
// regular retrieval. A v1 enhancement would inject playbook hits
// directly even when they weren't in the top-K (Shape B from the
// design conversation), but v0 keeps the safer "boost-only" stance.

import (
	"encoding/json"
	"errors"
	"sort"
	"time"
)

// DefaultPlaybookCorpus is the vectord index name where playbook
// entries land by default. Callers can override per-request, but
// having one default makes the system observable from the outside
// (operator hits /vectors/index and sees this corpus in the list).
const DefaultPlaybookCorpus = "playbook_memory"

// DefaultPlaybookTopK is how many similar past queries to consider
// when applying boost. 3 keeps the influence focused — we want the
// boost to reward consistent matches, not let one stale playbook
// dominate. Caller can override.
const DefaultPlaybookTopK = 3

// DefaultPlaybookMaxDistance is the cosine ceiling for "this past
// query is similar enough to count." 0.5 lets in genuinely related
// queries while excluding pure-coincidence neighbors. Caller can
// override per-request as we learn what works for staffing data.
//
// This threshold gates the BOOST path (re-rank in place), which is
// safe at loose thresholds because boost only modifies results already
// in regular retrieval. The INJECT path uses a tighter ceiling — see
// DefaultPlaybookMaxInjectDistance.
const DefaultPlaybookMaxDistance = 0.5

// DefaultPlaybookMaxInjectDistance is the SHAPE B cosine ceiling for
// "this past query is similar enough to FORCE its answer into the
// result set." Tighter than DefaultPlaybookMaxDistance because inject
// is structurally riskier than boost: it adds a result the embedding
// didn't surface, so a loose match can cross-pollinate the wrong
// answer into unrelated queries.
//
// Empirical motivation (playbook_lift_003): Q2's recording for an
// OSHA-30 forklift operator surfaced as warm top-1 for the dental
// hygienist / RN / software engineer OOD queries because their text
// vectors fell within 0.5 cosine of "OSHA-30 forklift Wisconsin."
// 0.20 would have rejected those (implied playbook distances 0.38-0.46)
// while keeping all 6 paraphrase recoveries (≤ 0.30 implied).
//
// Boost path stays at 0.5 — re-ranking results that already retrieved
// by their own merits is safe even when the playbook match is loose.
const DefaultPlaybookMaxInjectDistance = 0.20

// PlaybookEntry is what gets stored as metadata on each playbook
// vector. RecordedAt is captured at write time; callers should not
// set it (the recorder fills it in).
type PlaybookEntry struct {
	QueryText    string   `json:"query_text"`
	AnswerID     string   `json:"answer_id"`
	AnswerCorpus string   `json:"answer_corpus"`
	Score        float64  `json:"score"`         // 0..1; higher = better outcome
	RecordedAtNs int64    `json:"recorded_at_ns"`
	Tags         []string `json:"tags,omitempty"`
}

// Validate returns an error if the entry is missing required fields.
// Callers should validate before storage so bad data doesn't pollute
// the corpus.
func (p PlaybookEntry) Validate() error {
	if p.QueryText == "" {
		return errors.New("playbook: query_text required")
	}
	if p.AnswerID == "" {
		return errors.New("playbook: answer_id required")
	}
	if p.AnswerCorpus == "" {
		return errors.New("playbook: answer_corpus required")
	}
	if p.Score < 0 || p.Score > 1 {
		return errors.New("playbook: score must be in [0, 1]")
	}
	return nil
}

// BoostFactor returns the multiplier applied to a result's distance
// when this playbook entry matches it. Lower is better:
//
//	score = 0   → 1.0  (no boost)
//	score = 0.5 → 0.75 (mild boost)
//	score = 1.0 → 0.5  (halve the distance — strong boost)
//
// Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety.
//
// Why halving as the maximum boost: a perfect-confidence playbook
// entry shouldn't completely override the base embedding (that
// invites runaway feedback loops where one early playbook
// dominates forever). Halving is enough to move a mid-rank result
// to the top in most cases without erasing the base ranking
// signal.
func (p PlaybookEntry) BoostFactor() float64 {
	score := p.Score
	if score < 0 {
		score = 0
	}
	if score > 1 {
		score = 1
	}
	return 1.0 - 0.5*score
}

// MarshalMetadata serializes the entry as the JSON RawMessage that
// vectord stores per item. Convenience for the recorder.
func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) {
	return json.Marshal(p)
}

// UnmarshalPlaybookMetadata is the inverse — used when fetching
// playbook hits to decode their metadata back into entries.
func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) {
	var e PlaybookEntry
	if len(raw) == 0 {
		return e, errors.New("playbook: empty metadata")
	}
	if err := json.Unmarshal(raw, &e); err != nil {
		return e, err
	}
	return e, nil
}

// NewPlaybookEntry stamps RecordedAtNs to now and returns the entry.
// Validation happens at storage; this is just construction.
func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry {
	return PlaybookEntry{
		QueryText:    query,
		AnswerID:     answerID,
		AnswerCorpus: answerCorpus,
		Score:        score,
		RecordedAtNs: time.Now().UnixNano(),
		Tags:         tags,
	}
}

// PlaybookHit is one similarity-search result from the playbook
// corpus, paired with its decoded entry. Distance is the cosine
// distance between the current query and this past playbook's
// query vector — used by the caller to filter out "too far"
// matches via PlaybookMaxDistance.
type PlaybookHit struct {
	PlaybookID string        `json:"playbook_id"`
	Distance   float32       `json:"distance"`
	Entry      PlaybookEntry `json:"entry"`
}

// InjectPlaybookMisses appends synthetic Results for playbook hits
// whose (AnswerCorpus, AnswerID) doesn't already appear in results.
// This is "Shape B" from the doc comment at the top of this file:
// the v0 boost-only stance (ApplyPlaybookBoost) can't promote a
// recorded answer that wasn't already in the regular retrieval's
// top-K. Paraphrase queries broke this — different embedding ⇒
// different top-K ⇒ recorded answer drops out ⇒ no boost can save
// it. Reality test playbook_lift_002 showed 0/2 paraphrase top-1
// lifts because of exactly that.
//
// Synthetic distance = playbook_hit_distance × BoostFactor — same
// formula as ApplyPlaybookBoost, applied to the playbook hit's own
// distance instead of a result's. Lower playbook hit distance
// (current query is similar to recorded query) AND higher score
// (recorded outcome was strong) push the injection toward top-1.
//
// fetchPlaybookHits has already filtered hits to those within
// DefaultPlaybookMaxDistance (0.5), so injected results land in the
// same distance range as regular retrieval — they don't dominate
// top-K from out-of-distribution playbooks.
//
// Returns the (possibly extended) results slice and how many synthetic
// rows were appended. Caller MUST re-sort + truncate to K afterwards.
//
// maxInjectDist filters which hits qualify for injection — hits whose
// playbook-corpus cosine distance exceeds it are skipped (the boost
// path may still re-rank them in place). Pass 0 (or any non-positive
// value) to use DefaultPlaybookMaxInjectDistance.
//
// gate is an optional approval callback called once per CANDIDATE
// (post-distance-filter, post-dedup) before injection. Returning
// false rejects that candidate. Use nil for the historical "all
// distance-eligible candidates inject" behavior.
//
// Multi-coord run #008's judge re-rating proved that distance + LLM
// rating disagree often enough to matter (Q3 crane: dist 0.23 looks
// confident, judge says 1/5 = irrelevant). Lift-suite tail issues
// (Q6↔Q7 swap, Q9/Q15 paraphrase drift) are exactly this shape —
// embedding-tight but wrong-domain. The gate parameter lets callers
// route those candidates through a judge before the inject lands.
//
// query is the current search's query text — passed to the gate so
// it can score (query, candidate) pairs without re-deriving from
// SearchRequest. Empty when the caller doesn't have it (gate
// implementations should treat empty query as "skip judge, allow").
func InjectPlaybookMisses(query string, results []Result, hits []PlaybookHit, maxInjectDist float32, gate InjectGate) ([]Result, int) {
	if len(hits) == 0 {
		return results, 0
	}
	if maxInjectDist <= 0 {
		maxInjectDist = float32(DefaultPlaybookMaxInjectDistance)
	}
	present := make(map[string]bool, len(results))
	for _, r := range results {
		present[r.Corpus+"|"+r.ID] = true
	}

	// For each (corpus, id) NOT in results, keep the playbook hit
	// with the largest boost (lowest BoostFactor = highest score).
	// Multiple hits to the same answer collapse to one injection.
	bestForKey := make(map[string]PlaybookHit)
	for _, h := range hits {
		// Inject-specific tighter threshold (boost path's threshold is
		// looser; this prevents cross-pollination of wrong-domain
		// answers into queries whose text happens to fall within
		// boost-distance of an unrelated recording).
		if h.Distance > maxInjectDist {
			continue
		}
		key := h.Entry.AnswerCorpus + "|" + h.Entry.AnswerID
		if present[key] {
			continue
		}
		if existing, ok := bestForKey[key]; !ok || h.Entry.BoostFactor() < existing.Entry.BoostFactor() {
			bestForKey[key] = h
		}
	}

	injected := 0
	for _, h := range bestForKey {
		// Judge gate (per OPEN item #1, closed by this commit):
		// post-distance-filter, ask the gate whether the candidate
		// actually fits the current query before letting it inject.
		// Closes the lift-suite tail issues where embedding said
		// "tight" but a judge said "wrong domain."
		if gate != nil && !gate.Approve(query, h) {
			continue
		}
		injectedDist := h.Distance * float32(h.Entry.BoostFactor())
		// Synthesize metadata that flags the injection so callers
		// (driver/UI/observer) can distinguish "regular retrieval"
		// from "playbook injection." Production consumers needing
		// the actual worker metadata can fetch from vectord by
		// (Corpus, ID) — synthetic results carry only provenance.
		meta, _ := json.Marshal(map[string]any{
			"playbook_injected":      true,
			"playbook_id":            h.PlaybookID,
			"playbook_score":         h.Entry.Score,
			"playbook_query_text":    h.Entry.QueryText,
			"playbook_recorded_at_ns": h.Entry.RecordedAtNs,
			"playbook_hit_distance":  h.Distance,
		})
		results = append(results, Result{
			ID:       h.Entry.AnswerID,
			Corpus:   h.Entry.AnswerCorpus,
			Distance: injectedDist,
			Metadata: meta,
		})
		injected++
	}

	return results, injected
}

// InjectGate is the optional approval callback for Shape B inject.
// Called once per candidate (after distance filter, after dedup).
// Returning false rejects that candidate. Implementations:
//   - LLMJudgeGate (this package, see judge.go): Ollama LLM rates the
//     (query, candidate) pair against a 1-5 rubric.
//   - InjectGateFunc (this package): zero-deps adapter for arbitrary
//     caller logic — useful in tests + when callers want non-LLM
//     gating (e.g. metadata-only filters).
//
// nil InjectGate = pre-judge-gating behavior (all distance-eligible
// candidates inject); preserves backward compatibility.
type InjectGate interface {
	Approve(query string, hit PlaybookHit) bool
}

// InjectGateFunc adapts a plain function to the InjectGate interface.
// Used heavily in tests; production callers usually use LLMJudgeGate.
type InjectGateFunc func(query string, hit PlaybookHit) bool

// Approve makes InjectGateFunc satisfy InjectGate.
func (f InjectGateFunc) Approve(q string, h PlaybookHit) bool { return f(q, h) }

// ApplyPlaybookBoost re-ranks results in place using matched
// playbook hits. For each hit whose (AnswerID, AnswerCorpus)
// matches a result, multiply that result's distance by the hit's
// BoostFactor. If multiple hits match the same result, the highest-
// score one wins (greatest reduction in distance).
//
// After applying boosts, results are re-sorted ascending by
// distance.
//
// Returns the number of distinct results that received a boost.
// Callers can log this as a signal of "how much the playbook
// influenced this query."
func ApplyPlaybookBoost(results []Result, hits []PlaybookHit) int {
	if len(hits) == 0 || len(results) == 0 {
		return 0
	}

	// For each result, find the hit with the lowest BoostFactor
	// (= largest boost = highest score, since BoostFactor is
	// 1-0.5*score and we minimize).
	bestBoost := make(map[int]float64, len(results))
	for i, r := range results {
		for _, h := range hits {
			if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus {
				continue
			}
			bf := h.Entry.BoostFactor()
			if cur, ok := bestBoost[i]; !ok || bf < cur {
				bestBoost[i] = bf
			}
		}
	}

	for i, bf := range bestBoost {
		results[i].Distance = float32(float64(results[i].Distance) * bf)
	}

	sort.SliceStable(results, func(i, j int) bool {
		return results[i].Distance < results[j].Distance
	})

	return len(bestBoost)
}