package matrix // Playbook memory — SPEC §3.4 component 5 (learning-loop integration). // // Concept: every time an external system confirms "(query → answer_id) // was a successful match," record it. Future similar queries get that // answer's score boosted, so the matrix indexer learns from outcomes // rather than relying solely on the base embedder's geometry. // // Per feedback_meta_index_vision.md: this is the north star — a // meta-index that LEARNS from playbooks over time, not a static // hybrid search engine. // // Storage shape: a vectord index named DefaultPlaybookCorpus where: // - The vector is embed(query_text) // - The metadata is a serialized PlaybookEntry // Retrieval shape: at /matrix/search time, when use_playbook=true, // matrixd searches the playbook corpus with the same query vector, // looks up each hit's answer_id, and if that answer is in the current // matrix-search results, applies a boost to its distance. // // Composition: this layer is additive on top of the existing // retrieve+merge — when use_playbook=false, behavior is unchanged. // The boost only re-ranks results that ALREADY surfaced from the // regular retrieval. A v1 enhancement would inject playbook hits // directly even when they weren't in the top-K (Shape B from the // design conversation), but v0 keeps the safer "boost-only" stance. import ( "encoding/json" "errors" "sort" "strings" "time" ) // DefaultPlaybookCorpus is the vectord index name where playbook // entries land by default. Callers can override per-request, but // having one default makes the system observable from the outside // (operator hits /vectors/index and sees this corpus in the list). const DefaultPlaybookCorpus = "playbook_memory" // DefaultPlaybookTopK is how many similar past queries to consider // when applying boost. 3 keeps the influence focused — we want the // boost to reward consistent matches, not let one stale playbook // dominate. Caller can override. const DefaultPlaybookTopK = 3 // DefaultPlaybookMaxDistance is the cosine ceiling for "this past // query is similar enough to count." 0.5 lets in genuinely related // queries while excluding pure-coincidence neighbors. Caller can // override per-request as we learn what works for staffing data. // // This threshold gates the BOOST path (re-rank in place), which is // safe at loose thresholds because boost only modifies results already // in regular retrieval. The INJECT path uses a tighter ceiling — see // DefaultPlaybookMaxInjectDistance. const DefaultPlaybookMaxDistance = 0.5 // DefaultPlaybookMaxInjectDistance is the SHAPE B cosine ceiling for // "this past query is similar enough to FORCE its answer into the // result set." Tighter than DefaultPlaybookMaxDistance because inject // is structurally riskier than boost: it adds a result the embedding // didn't surface, so a loose match can cross-pollinate the wrong // answer into unrelated queries. // // Empirical motivation (playbook_lift_003): Q2's recording for an // OSHA-30 forklift operator surfaced as warm top-1 for the dental // hygienist / RN / software engineer OOD queries because their text // vectors fell within 0.5 cosine of "OSHA-30 forklift Wisconsin." // 0.20 would have rejected those (implied playbook distances 0.38-0.46) // while keeping all 6 paraphrase recoveries (≤ 0.30 implied). // // Boost path stays at 0.5 — re-ranking results that already retrieved // by their own merits is safe even when the playbook match is loose. const DefaultPlaybookMaxInjectDistance = 0.20 // PlaybookEntry is what gets stored as metadata on each playbook // vector. RecordedAt is captured at write time; callers should not // set it (the recorder fills it in). // // Role is an optional caller-provided hint that names the query's // staffing role at record time (e.g. "Forklift Operator", "CNC // Operator"). When set, the matrix retrieve layer gates Shape A boost // and Shape B inject so a recorded entry only fires for queries with // the same role. Empty Role = no gate (caller didn't know the role, // or the query has no clean role concept — e.g. lift-suite multi- // constraint queries). Reality test real_001 surfaced same-client+city // queries bleeding across roles when the embedder collapsed them // within the inject distance threshold; the role gate closes that // without rejecting legitimate intra-role boosts. type PlaybookEntry struct { QueryText string `json:"query_text"` Role string `json:"role,omitempty"` AnswerID string `json:"answer_id"` AnswerCorpus string `json:"answer_corpus"` Score float64 `json:"score"` // 0..1; higher = better outcome RecordedAtNs int64 `json:"recorded_at_ns"` Tags []string `json:"tags,omitempty"` } // Validate returns an error if the entry is missing required fields. // Callers should validate before storage so bad data doesn't pollute // the corpus. func (p PlaybookEntry) Validate() error { if p.QueryText == "" { return errors.New("playbook: query_text required") } if p.AnswerID == "" { return errors.New("playbook: answer_id required") } if p.AnswerCorpus == "" { return errors.New("playbook: answer_corpus required") } if p.Score < 0 || p.Score > 1 { return errors.New("playbook: score must be in [0, 1]") } return nil } // BoostFactor returns the multiplier applied to a result's distance // when this playbook entry matches it. Lower is better: // // score = 0 → 1.0 (no boost) // score = 0.5 → 0.75 (mild boost) // score = 1.0 → 0.5 (halve the distance — strong boost) // // Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety. // // Why halving as the maximum boost: a perfect-confidence playbook // entry shouldn't completely override the base embedding (that // invites runaway feedback loops where one early playbook // dominates forever). Halving is enough to move a mid-rank result // to the top in most cases without erasing the base ranking // signal. func (p PlaybookEntry) BoostFactor() float64 { score := p.Score if score < 0 { score = 0 } if score > 1 { score = 1 } return 1.0 - 0.5*score } // MarshalMetadata serializes the entry as the JSON RawMessage that // vectord stores per item. Convenience for the recorder. func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) { return json.Marshal(p) } // UnmarshalPlaybookMetadata is the inverse — used when fetching // playbook hits to decode their metadata back into entries. func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) { var e PlaybookEntry if len(raw) == 0 { return e, errors.New("playbook: empty metadata") } if err := json.Unmarshal(raw, &e); err != nil { return e, err } return e, nil } // NewPlaybookEntry stamps RecordedAtNs to now and returns the entry. // Validation happens at storage; this is just construction. // // Role-aware overload is NewPlaybookEntryWithRole — callers that don't // know the role can keep using this constructor and the role gate // will degrade to current (un-gated) behavior. func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry { return PlaybookEntry{ QueryText: query, AnswerID: answerID, AnswerCorpus: answerCorpus, Score: score, RecordedAtNs: time.Now().UnixNano(), Tags: tags, } } // NewPlaybookEntryWithRole is the role-aware variant of NewPlaybookEntry. // role should be a stable role token (see roleEqual / role-token notes // in the cross-role gate); empty disables the gate for this entry. func NewPlaybookEntryWithRole(query, role, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry { e := NewPlaybookEntry(query, answerID, answerCorpus, score, tags) e.Role = role return e } // PlaybookHit is one similarity-search result from the playbook // corpus, paired with its decoded entry. Distance is the cosine // distance between the current query and this past playbook's // query vector — used by the caller to filter out "too far" // matches via PlaybookMaxDistance. type PlaybookHit struct { PlaybookID string `json:"playbook_id"` Distance float32 `json:"distance"` Entry PlaybookEntry `json:"entry"` } // roleEqual is the cross-role gate's match function. Returns true when // EITHER side is empty (gate disabled — current behavior preserved) OR // the role tokens compare equal under a normalization that ignores // case + trailing 's' (so "Forklift Operator" matches "forklift // operator" matches "Forklift Operators"). Robust enough for the // staffing-domain plurals seen in fill_events; tighter normalization // (Levenshtein, role-graph) is a separate concern and not blocking // the bleed fix. // // Why this lives near PlaybookEntry: the gate is a property of the // entry/query pair, not of the boost or inject mechanism — both call // sites in this file delegate to the same function so they can't // drift apart. func roleEqual(a, b string) bool { if a == "" || b == "" { return true } return roleNormalize(a) == roleNormalize(b) } // nonPluralSWords is the allowlist of staffing-domain tokens that end // in 's' but are NOT plurals — naïve trailing-'s' stripping would // mangle these into wrong tokens ("Sales" → "Sale", "Logistics" → // "Logistic"). Per scrum review role_gate_v1 (Opus + Kimi + Qwen // convergent finding 2026-04-30): expand this list as new false- // positives surface from production traffic. var nonPluralSWords = map[string]bool{ "sales": true, "logistics": true, "operations": true, "facilities": true, "premises": true, "news": true, "physics": true, "economics": true, "mathematics":true, "analytics": true, } // roleNormalize lowercases + trims + best-effort-singularizes the // last word so "Forklift Operators" matches "Forklift Operator" but // "Sales Associate" doesn't lose its "s". // // Only the LAST word is plural-checked because staffing-domain role // names follow English head-noun-last conventions ("Sales Associate" // → head is "Associate"; "Forklift Operators" → head is "Operators"). // // Singularization rules (in order): // 1. Last word ∈ nonPluralSWords → return as-is (e.g. "Logistics // Coordinator" stays "logistics coordinator"). // 2. Last word ends in "ss" → return as-is ("Press Operator" stays; // "Boss" stays). // 3. Last word ends in "es" after "x"/"s"/"ch"/"sh" → strip "es" // ("Boxes" → "Box", "Dishes" → "Dish"). // 4. Last word ends in "s" → strip "s" ("Operators" → "Operator"). // 5. Otherwise return as-is. func roleNormalize(s string) string { s = strings.ToLower(strings.TrimSpace(s)) if s == "" { return "" } // Find the last word. lastSpace := strings.LastIndex(s, " ") prefix, last := "", s if lastSpace >= 0 { prefix, last = s[:lastSpace+1], s[lastSpace+1:] } if nonPluralSWords[last] { return s } if strings.HasSuffix(last, "ss") { return s } n := len(last) if n >= 3 && last[n-1] == 's' && last[n-2] == 'e' && (last[n-3] == 'x' || last[n-3] == 's' || last[n-3] == 'h') { return prefix + last[:n-2] } if strings.HasSuffix(last, "s") { return prefix + last[:n-1] } return s } // InjectPlaybookMisses appends synthetic Results for playbook hits // whose (AnswerCorpus, AnswerID) doesn't already appear in results. // This is "Shape B" from the doc comment at the top of this file: // the v0 boost-only stance (ApplyPlaybookBoost) can't promote a // recorded answer that wasn't already in the regular retrieval's // top-K. Paraphrase queries broke this — different embedding ⇒ // different top-K ⇒ recorded answer drops out ⇒ no boost can save // it. Reality test playbook_lift_002 showed 0/2 paraphrase top-1 // lifts because of exactly that. // // Synthetic distance = playbook_hit_distance × BoostFactor — same // formula as ApplyPlaybookBoost, applied to the playbook hit's own // distance instead of a result's. Lower playbook hit distance // (current query is similar to recorded query) AND higher score // (recorded outcome was strong) push the injection toward top-1. // // fetchPlaybookHits has already filtered hits to those within // DefaultPlaybookMaxDistance (0.5), so injected results land in the // same distance range as regular retrieval — they don't dominate // top-K from out-of-distribution playbooks. // // Returns the (possibly extended) results slice and how many synthetic // rows were appended. Caller MUST re-sort + truncate to K afterwards. // // maxInjectDist filters which hits qualify for injection — hits whose // playbook-corpus cosine distance exceeds it are skipped (the boost // path may still re-rank them in place). Pass 0 (or any non-positive // value) to use DefaultPlaybookMaxInjectDistance. // // gate is an optional approval callback called once per CANDIDATE // (post-distance-filter, post-dedup) before injection. Returning // false rejects that candidate. Use nil for the historical "all // distance-eligible candidates inject" behavior. // // Multi-coord run #008's judge re-rating proved that distance + LLM // rating disagree often enough to matter (Q3 crane: dist 0.23 looks // confident, judge says 1/5 = irrelevant). Lift-suite tail issues // (Q6↔Q7 swap, Q9/Q15 paraphrase drift) are exactly this shape — // embedding-tight but wrong-domain. The gate parameter lets callers // route those candidates through a judge before the inject lands. // // query is the current search's query text — passed to the gate so // it can score (query, candidate) pairs without re-deriving from // SearchRequest. Empty when the caller doesn't have it (gate // implementations should treat empty query as "skip judge, allow"). // // queryRole, when non-empty, gates injection on role match: a playbook // hit whose Entry.Role is set and DIFFERENT from queryRole is rejected // before the judge gate runs. Closes the same-client+city cross-role // bleed (real_001 finding) where the inject distance threshold (0.20) // wasn't tight enough to prevent Forklift-Operator playbook entries // from injecting on CNC-Operator queries that happened to embed close // (same city + client + count-token + time-token dominate cosine). // Empty queryRole OR empty hit.Entry.Role = no gate (preserves // behavior for callers that don't supply role). func InjectPlaybookMisses(query, queryRole string, results []Result, hits []PlaybookHit, maxInjectDist float32, gate InjectGate) ([]Result, int) { if len(hits) == 0 { return results, 0 } if maxInjectDist <= 0 { maxInjectDist = float32(DefaultPlaybookMaxInjectDistance) } present := make(map[string]bool, len(results)) for _, r := range results { present[r.Corpus+"|"+r.ID] = true } // For each (corpus, id) NOT in results, keep the playbook hit // with the largest boost (lowest BoostFactor = highest score). // Multiple hits to the same answer collapse to one injection. bestForKey := make(map[string]PlaybookHit) for _, h := range hits { // Inject-specific tighter threshold (boost path's threshold is // looser; this prevents cross-pollination of wrong-domain // answers into queries whose text happens to fall within // boost-distance of an unrelated recording). if h.Distance > maxInjectDist { continue } // Cross-role gate (real_001 bleed fix). Reject before the // judge gate so we don't burn a slow LLM call to immediately // reject the candidate on a structural mismatch. if !roleEqual(queryRole, h.Entry.Role) { continue } key := h.Entry.AnswerCorpus + "|" + h.Entry.AnswerID if present[key] { continue } if existing, ok := bestForKey[key]; !ok || h.Entry.BoostFactor() < existing.Entry.BoostFactor() { bestForKey[key] = h } } injected := 0 for _, h := range bestForKey { // Judge gate (per OPEN item #1, closed by this commit): // post-distance-filter, ask the gate whether the candidate // actually fits the current query before letting it inject. // Closes the lift-suite tail issues where embedding said // "tight" but a judge said "wrong domain." if gate != nil && !gate.Approve(query, h) { continue } injectedDist := h.Distance * float32(h.Entry.BoostFactor()) // Synthesize metadata that flags the injection so callers // (driver/UI/observer) can distinguish "regular retrieval" // from "playbook injection." Production consumers needing // the actual worker metadata can fetch from vectord by // (Corpus, ID) — synthetic results carry only provenance. meta, _ := json.Marshal(map[string]any{ "playbook_injected": true, "playbook_id": h.PlaybookID, "playbook_score": h.Entry.Score, "playbook_query_text": h.Entry.QueryText, "playbook_recorded_at_ns": h.Entry.RecordedAtNs, "playbook_hit_distance": h.Distance, }) results = append(results, Result{ ID: h.Entry.AnswerID, Corpus: h.Entry.AnswerCorpus, Distance: injectedDist, Metadata: meta, }) injected++ } return results, injected } // InjectGate is the optional approval callback for Shape B inject. // Called once per candidate (after distance filter, after dedup). // Returning false rejects that candidate. Implementations: // - LLMJudgeGate (this package, see judge.go): Ollama LLM rates the // (query, candidate) pair against a 1-5 rubric. // - InjectGateFunc (this package): zero-deps adapter for arbitrary // caller logic — useful in tests + when callers want non-LLM // gating (e.g. metadata-only filters). // // nil InjectGate = pre-judge-gating behavior (all distance-eligible // candidates inject); preserves backward compatibility. type InjectGate interface { Approve(query string, hit PlaybookHit) bool } // InjectGateFunc adapts a plain function to the InjectGate interface. // Used heavily in tests; production callers usually use LLMJudgeGate. type InjectGateFunc func(query string, hit PlaybookHit) bool // Approve makes InjectGateFunc satisfy InjectGate. func (f InjectGateFunc) Approve(q string, h PlaybookHit) bool { return f(q, h) } // ApplyPlaybookBoost re-ranks results in place using matched // playbook hits. For each hit whose (AnswerID, AnswerCorpus) // matches a result, multiply that result's distance by the hit's // BoostFactor. If multiple hits match the same result, the highest- // score one wins (greatest reduction in distance). // // queryRole, when non-empty, gates the boost on role match — same // semantics as InjectPlaybookMisses' queryRole. Defense in depth: // even if a recorded entry's answer happens to surface in the // regular retrieve top-K for a different-role query, role-mismatch // shouldn't fire a boost. Empty role on either side disables the gate. // // After applying boosts, results are re-sorted ascending by // distance. // // Returns the number of distinct results that received a boost. // Callers can log this as a signal of "how much the playbook // influenced this query." func ApplyPlaybookBoost(queryRole string, results []Result, hits []PlaybookHit) int { if len(hits) == 0 || len(results) == 0 { return 0 } // For each result, find the hit with the lowest BoostFactor // (= largest boost = highest score, since BoostFactor is // 1-0.5*score and we minimize). bestBoost := make(map[int]float64, len(results)) for i, r := range results { for _, h := range hits { if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus { continue } // Cross-role gate (real_001 bleed fix). Same semantics as // InjectPlaybookMisses — see comment there. if !roleEqual(queryRole, h.Entry.Role) { continue } bf := h.Entry.BoostFactor() if cur, ok := bestBoost[i]; !ok || bf < cur { bestBoost[i] = bf } } } for i, bf := range bestBoost { results[i].Distance = float32(float64(results[i].Distance) * bf) } sort.SliceStable(results, func(i, j int) bool { return results[i].Distance < results[j].Distance }) return len(bestBoost) }