package matrix import ( "encoding/json" "testing" ) func TestPlaybookEntry_Validate(t *testing.T) { good := PlaybookEntry{ QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 0.5, } if err := good.Validate(); err != nil { t.Errorf("good entry should validate: %v", err) } cases := []struct { name string entry PlaybookEntry }{ {"empty query", PlaybookEntry{AnswerID: "y", AnswerCorpus: "z", Score: 0.5}}, {"empty answer id", PlaybookEntry{QueryText: "x", AnswerCorpus: "z", Score: 0.5}}, {"empty corpus", PlaybookEntry{QueryText: "x", AnswerID: "y", Score: 0.5}}, {"score too high", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 1.5}}, {"score negative", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: -0.1}}, } for _, c := range cases { if err := c.entry.Validate(); err == nil { t.Errorf("%s: expected validation error, got nil", c.name) } } } func TestPlaybookEntry_BoostFactor(t *testing.T) { cases := []struct { score float64 want float64 }{ {0.0, 1.0}, {0.5, 0.75}, {1.0, 0.5}, {-0.1, 1.0}, // clamped {1.5, 0.5}, // clamped } for _, c := range cases { got := PlaybookEntry{Score: c.score}.BoostFactor() if abs(got-c.want) > 1e-9 { t.Errorf("BoostFactor(score=%.2f): want %.4f, got %.4f", c.score, c.want, got) } } } func TestApplyPlaybookBoost_NoHitsLeaveResultsAlone(t *testing.T) { results := []Result{ {ID: "a", Distance: 0.1, Corpus: "x"}, {ID: "b", Distance: 0.2, Corpus: "x"}, } n := ApplyPlaybookBoost("", results, nil) if n != 0 { t.Errorf("expected 0 boosted, got %d", n) } if results[0].ID != "a" || results[1].ID != "b" { t.Errorf("results reordered without hits: %v", results) } } func TestApplyPlaybookBoost_BoostMovesResultUp(t *testing.T) { // Initial: a (0.10) beats b (0.20) beats c (0.30). // Playbook says (answer=c, score=1.0) should be boosted → c's // distance becomes 0.30 * 0.5 = 0.15. New ordering: a, c, b. results := []Result{ {ID: "a", Distance: 0.10, Corpus: "x"}, {ID: "b", Distance: 0.20, Corpus: "x"}, {ID: "c", Distance: 0.30, Corpus: "x"}, } hits := []PlaybookHit{ {PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{ AnswerID: "c", AnswerCorpus: "x", Score: 1.0, }}, } n := ApplyPlaybookBoost("", results, hits) if n != 1 { t.Errorf("expected 1 boosted, got %d", n) } if results[0].ID != "a" || results[1].ID != "c" || results[2].ID != "b" { t.Errorf("expected order a,c,b after boost; got %v", idsOf(results)) } if abs(float64(results[1].Distance)-0.15) > 1e-6 { t.Errorf("expected c distance 0.15 after boost; got %.4f", results[1].Distance) } } func TestApplyPlaybookBoost_HighestScoreWinsForSameAnswer(t *testing.T) { results := []Result{ {ID: "a", Distance: 0.30, Corpus: "x"}, } // Two playbook hits both pointing at "a". Score=0.4 (weak boost) // + Score=0.9 (strong boost). Strong should win — distance gets // multiplied by 1-0.5*0.9 = 0.55, not by 1-0.5*0.4 = 0.80. hits := []PlaybookHit{ {PlaybookID: "p_weak", Distance: 0.05, Entry: PlaybookEntry{ AnswerID: "a", AnswerCorpus: "x", Score: 0.4, }}, {PlaybookID: "p_strong", Distance: 0.05, Entry: PlaybookEntry{ AnswerID: "a", AnswerCorpus: "x", Score: 0.9, }}, } ApplyPlaybookBoost("", results, hits) wantDist := 0.30 * 0.55 if abs(float64(results[0].Distance)-wantDist) > 1e-6 { t.Errorf("strong-score boost should win: want %.4f, got %.4f", wantDist, results[0].Distance) } } func TestApplyPlaybookBoost_CorpusAttributionRespected(t *testing.T) { // Playbook references answer_id="a" in corpus="x". // Results have answer_id="a" in corpus="y" — DIFFERENT corpus. // Boost should NOT apply; the (id, corpus) tuple is the join key, // not just id (otherwise different-corpus collisions would create // false positives). results := []Result{ {ID: "a", Distance: 0.30, Corpus: "y"}, } hits := []PlaybookHit{ {PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{ AnswerID: "a", AnswerCorpus: "x", Score: 1.0, }}, } n := ApplyPlaybookBoost("", results, hits) if n != 0 { t.Errorf("cross-corpus collision should not boost: got %d", n) } if abs(float64(results[0].Distance)-0.30) > 1e-6 { // 1e-6 tolerance accounts for float32→float64 conversion; // the assertion that matters is "unchanged from input." t.Errorf("distance should be unchanged: got %.6f", results[0].Distance) } } func TestPlaybookEntry_RoundTripJSON(t *testing.T) { e := NewPlaybookEntry("forklift query", "w-12345", "workers", 0.85, []string{"chicago", "verified"}) raw, err := e.MarshalMetadata() if err != nil { t.Fatalf("marshal: %v", err) } got, err := UnmarshalPlaybookMetadata(raw) if err != nil { t.Fatalf("unmarshal: %v", err) } if got.QueryText != e.QueryText || got.AnswerID != e.AnswerID || got.AnswerCorpus != e.AnswerCorpus || got.Score != e.Score { t.Errorf("round-trip mismatch: want %+v, got %+v", e, got) } if len(got.Tags) != 2 || got.Tags[0] != "chicago" { t.Errorf("tags lost in round-trip: %v", got.Tags) } if got.RecordedAtNs == 0 { t.Error("RecordedAtNs not set by NewPlaybookEntry") } } func TestUnmarshalPlaybookMetadata_RejectsEmpty(t *testing.T) { if _, err := UnmarshalPlaybookMetadata(json.RawMessage{}); err == nil { t.Error("empty metadata should error") } } // TestInjectPlaybookMisses_AddsMissingAnswers locks Shape B's primary // claim: when a playbook hit's answer isn't already in regular // retrieval results, InjectPlaybookMisses appends a synthetic Result // for it. Reality test playbook_lift_002 surfaced 0/2 paraphrase // recoveries because the v0 boost-only stance couldn't promote // answers that dropped out of the paraphrase's top-K. func TestInjectPlaybookMisses_AddsMissingAnswers(t *testing.T) { results := []Result{ {ID: "w-1", Corpus: "workers", Distance: 0.30}, {ID: "w-2", Corpus: "workers", Distance: 0.35}, } hits := []PlaybookHit{ { PlaybookID: "pb-x", Distance: 0.20, // current query is close to recorded query Entry: PlaybookEntry{ QueryText: "recorded query", AnswerID: "w-99", // NOT in results AnswerCorpus: "workers", Score: 1.0, // strong outcome → boost factor 0.5 }, }, } out, injected := InjectPlaybookMisses("test query", "", results, hits, 0, nil) if injected != 1 { t.Fatalf("expected 1 injected, got %d", injected) } if len(out) != 3 { t.Fatalf("expected len=3, got %d (%v)", len(out), idsOf(out)) } // The injected result should be findable + carry the playbook // provenance metadata flag. var injectedResult *Result for i := range out { if out[i].ID == "w-99" { injectedResult = &out[i] break } } if injectedResult == nil { t.Fatal("w-99 not present in output") } // distance = 0.20 * 0.5 = 0.10 → near-top after caller re-sorts if injectedResult.Distance < 0.099 || injectedResult.Distance > 0.101 { t.Errorf("expected injected distance ~0.10, got %f", injectedResult.Distance) } var meta map[string]any if err := json.Unmarshal(injectedResult.Metadata, &meta); err != nil { t.Fatalf("decode meta: %v", err) } if v, _ := meta["playbook_injected"].(bool); !v { t.Errorf("expected playbook_injected=true marker, got %v", meta) } if v, _ := meta["playbook_query_text"].(string); v != "recorded query" { t.Errorf("expected recorded query in meta, got %v", v) } } // TestInjectPlaybookMisses_SkipsAnswersAlreadyPresent locks the // boost-only-when-present property. If a playbook hit's answer is // ALREADY in results, we don't duplicate-inject — ApplyPlaybookBoost // has handled that case via in-place re-rank. func TestInjectPlaybookMisses_SkipsAnswersAlreadyPresent(t *testing.T) { results := []Result{ {ID: "w-1", Corpus: "workers", Distance: 0.30}, {ID: "w-99", Corpus: "workers", Distance: 0.40}, // ALREADY HERE } hits := []PlaybookHit{ { PlaybookID: "pb-x", Distance: 0.20, Entry: PlaybookEntry{ QueryText: "x", AnswerID: "w-99", AnswerCorpus: "workers", Score: 1.0, }, }, } out, injected := InjectPlaybookMisses("test query", "", results, hits, 0, nil) if injected != 0 { t.Errorf("expected 0 injected (answer already present), got %d", injected) } if len(out) != 2 { t.Errorf("expected results unchanged at len=2, got %d", len(out)) } } // TestInjectPlaybookMisses_DedupesPerAnswer locks: multiple playbook // hits all pointing to the same missing answer collapse to ONE // injection (the highest-scoring hit wins). func TestInjectPlaybookMisses_DedupesPerAnswer(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} hits := []PlaybookHit{ { PlaybookID: "pb-low", Distance: 0.30, Entry: PlaybookEntry{QueryText: "q1", AnswerID: "w-99", AnswerCorpus: "workers", Score: 0.4}, }, { PlaybookID: "pb-high", Distance: 0.30, Entry: PlaybookEntry{QueryText: "q2", AnswerID: "w-99", AnswerCorpus: "workers", Score: 1.0}, }, } out, injected := InjectPlaybookMisses("test query", "", results, hits, 0.5, nil) // explicit loose threshold so 0.30 hits qualify if injected != 1 { t.Errorf("expected 1 injection (deduped), got %d", injected) } // Score=1.0 (the high one) wins → boost factor 0.5 → distance 0.15 for _, r := range out { if r.ID == "w-99" { if r.Distance < 0.149 || r.Distance > 0.151 { t.Errorf("expected distance from highest-score hit (~0.15), got %f", r.Distance) } } } } // TestInjectPlaybookMisses_GateRejectsCandidate locks the judge-gate // path (OPEN item #1, closed by this commit). When the InjectGate // returns false on a candidate, the candidate is skipped — even if // distance would otherwise allow it. Closes the lift-suite tail // issues where embedding said "tight" but a judge said "wrong domain." func TestInjectPlaybookMisses_GateRejectsCandidate(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} hits := []PlaybookHit{ { PlaybookID: "pb-x", Distance: 0.10, // tight in cosine — would inject without gate Entry: PlaybookEntry{ QueryText: "recorded crane operator query", AnswerID: "w-99", AnswerCorpus: "workers", Score: 1.0, }, }, } rejectAll := InjectGateFunc(func(string, PlaybookHit) bool { return false }) out, injected := InjectPlaybookMisses("forklift loader query", "", results, hits, 0, rejectAll) if injected != 0 { t.Errorf("rejectAll gate should skip injection, got %d injected", injected) } if len(out) != 1 { t.Errorf("results should be unchanged at len=1, got %d", len(out)) } } // TestInjectPlaybookMisses_GateApprovesCandidate locks the // always-approve gate path: behavior matches nil-gate (current // distance-only filter). Useful for tests that want to assert // "judge-gate API is wired" without an actual decision. func TestInjectPlaybookMisses_GateApprovesCandidate(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} hits := []PlaybookHit{ { PlaybookID: "pb-x", Distance: 0.10, Entry: PlaybookEntry{ QueryText: "x", AnswerID: "w-99", AnswerCorpus: "workers", Score: 1.0, }, }, } approveAll := InjectGateFunc(func(string, PlaybookHit) bool { return true }) out, injected := InjectPlaybookMisses("test query", "", results, hits, 0, approveAll) if injected != 1 { t.Errorf("approveAll gate should inject, got %d", injected) } if len(out) != 2 { t.Errorf("results should grow to 2, got %d", len(out)) } } // TestInjectPlaybookMisses_GateSeesCorrectQuery locks the gate's // query+hit visibility — the gate must receive the CURRENT search's // query (not the recorded one) so it can judge the (current_query, // candidate) pair. The recorded query lives on hit.Entry.QueryText. func TestInjectPlaybookMisses_GateSeesCorrectQuery(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} hits := []PlaybookHit{ { PlaybookID: "pb-x", Distance: 0.10, Entry: PlaybookEntry{ QueryText: "RECORDED", AnswerID: "w-99", AnswerCorpus: "workers", Score: 1.0, }, }, } var seenQuery, seenRecordedQuery string gate := InjectGateFunc(func(q string, h PlaybookHit) bool { seenQuery = q seenRecordedQuery = h.Entry.QueryText return true }) _, _ = InjectPlaybookMisses("CURRENT", "", results, hits, 0, gate) if seenQuery != "CURRENT" { t.Errorf("gate received query=%q, want CURRENT", seenQuery) } if seenRecordedQuery != "RECORDED" { t.Errorf("gate received recorded=%q, want RECORDED", seenRecordedQuery) } } // TestInjectPlaybookMisses_RespectsInjectThreshold locks the // cross-pollination defense added after run #003: hits whose playbook // distance exceeds the inject threshold are skipped, preventing the // "OSHA-30 forklift" recording from surfacing as warm top-1 for an // unrelated dental-hygienist query just because their text vectors // happened to fall within boost-threshold (0.5). func TestInjectPlaybookMisses_RespectsInjectThreshold(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} // Two hits: one within tight inject threshold, one beyond it but // within boost threshold. Only the tight one should inject. hits := []PlaybookHit{ { PlaybookID: "tight", Distance: 0.10, // within inject (true paraphrase territory) Entry: PlaybookEntry{QueryText: "q1", AnswerID: "w-tight", AnswerCorpus: "workers", Score: 1.0}, }, { PlaybookID: "loose", Distance: 0.40, // boost-eligible but inject-rejected Entry: PlaybookEntry{QueryText: "q2", AnswerID: "w-loose", AnswerCorpus: "workers", Score: 1.0}, }, } // Default threshold (0 → DefaultPlaybookMaxInjectDistance = 0.20) out, injected := InjectPlaybookMisses("test query", "", results, hits, 0, nil) if injected != 1 { t.Errorf("expected 1 injection (only the tight hit qualifies), got %d", injected) } gotTight := false for _, r := range out { if r.ID == "w-tight" { gotTight = true } if r.ID == "w-loose" { t.Errorf("loose hit (distance > inject threshold) was injected anyway") } } if !gotTight { t.Error("tight hit should have been injected") } } // TestInjectPlaybookMisses_EmptyHits is a fast-path no-op check. func TestInjectPlaybookMisses_EmptyHits(t *testing.T) { results := []Result{{ID: "w-1", Corpus: "workers", Distance: 0.30}} out, injected := InjectPlaybookMisses("test query", "", results, nil, 0, nil) if injected != 0 { t.Errorf("expected 0 injection, got %d", injected) } if len(out) != 1 { t.Errorf("results should be unchanged, got len=%d", len(out)) } } // Cross-role gate tests — reproduce the real_001 bleed scenario at // the unit-test level so regressions can't ship without notice. // // real_001 scenario: Q#2 records playbook entry for e-6193 fitting // "Forklift Operator at Beacon Freight Detroit MI". Q#10 (CNC Operator // at same client + city) embeds within 0.135 cosine of Q#2's // playbook entry — well inside the 0.20 inject threshold. Without the // role gate, e-6193 injects on Q#10 at warm-top-1, demoting the // cold-pass-correct w-3759. func TestInjectPlaybookMisses_RoleGateRejectsCrossRole(t *testing.T) { results := []Result{{ID: "w-3759", Corpus: "workers", Distance: 0.50}} hits := []PlaybookHit{{ PlaybookID: "pb-q2", Distance: 0.135, // tight; well within DefaultPlaybookMaxInjectDistance Entry: PlaybookEntry{ QueryText: "Need 1 Forklift Operator in Detroit MI ...", Role: "Forklift Operator", AnswerID: "e-6193", AnswerCorpus: "ethereal_workers", Score: 1.0, }, }} out, injected := InjectPlaybookMisses( "Need 1 CNC Operator in Detroit MI ...", "CNC Operator", // queryRole differs results, hits, 0, nil, ) if injected != 0 { t.Errorf("expected 0 cross-role injections, got %d", injected) } if len(out) != 1 { t.Errorf("results should be unchanged when gate rejects, got len=%d", len(out)) } } func TestInjectPlaybookMisses_RoleGateAllowsSameRole(t *testing.T) { results := []Result{{ID: "e-5617", Corpus: "ethereal_workers", Distance: 0.50}} hits := []PlaybookHit{{ PlaybookID: "pb-q2", Distance: 0.135, Entry: PlaybookEntry{ QueryText: "Need 1 Forklift Operator in Detroit MI ...", Role: "Forklift Operator", AnswerID: "e-6193", AnswerCorpus: "ethereal_workers", Score: 1.0, }, }} out, injected := InjectPlaybookMisses( "Need 2 Forklift Operators in Detroit MI ...", "Forklift Operators", // plural — roleEqual normalizes to match results, hits, 0, nil, ) if injected != 1 { t.Errorf("expected 1 same-role injection, got %d", injected) } if len(out) != 2 { t.Errorf("results should be extended with the injection, got len=%d", len(out)) } } func TestInjectPlaybookMisses_RoleGateBackwardCompat(t *testing.T) { // Empty role on either side disables the gate — preserves // behavior for callers (lift suite, etc.) that don't supply role. results := []Result{{ID: "w-3759", Corpus: "workers", Distance: 0.50}} hits := []PlaybookHit{{ PlaybookID: "pb-q2", Distance: 0.135, Entry: PlaybookEntry{ QueryText: "...", Role: "Forklift Operator", // entry has role AnswerID: "e-6193", AnswerCorpus: "ethereal_workers", Score: 1.0, }, }} // Caller didn't pass queryRole — gate must NOT fire. _, injected := InjectPlaybookMisses("...", "", results, hits, 0, nil) if injected != 1 { t.Errorf("expected gate disabled when queryRole empty (injected=1), got %d", injected) } // Inverse: queryRole set but entry has no role. hits[0].Entry.Role = "" _, injected = InjectPlaybookMisses("...", "Forklift Operator", results, hits, 0, nil) if injected != 1 { t.Errorf("expected gate disabled when entry.Role empty (injected=1), got %d", injected) } } func TestApplyPlaybookBoost_RoleGateRejectsCrossRole(t *testing.T) { results := []Result{ {ID: "w-3759", Corpus: "workers", Distance: 0.50}, {ID: "e-6193", Corpus: "ethereal_workers", Distance: 0.55}, } hits := []PlaybookHit{{ PlaybookID: "pb-q2", Distance: 0.18, Entry: PlaybookEntry{ QueryText: "Forklift Operator query", Role: "Forklift Operator", AnswerID: "e-6193", AnswerCorpus: "ethereal_workers", Score: 1.0, }, }} // Different-role query → boost must NOT fire on e-6193 even though // it's in the result set. n := ApplyPlaybookBoost("CNC Operator", results, hits) if n != 0 { t.Errorf("expected 0 cross-role boosts, got %d", n) } if results[1].Distance != 0.55 { t.Errorf("e-6193 distance should be unchanged (0.55), got %v", results[1].Distance) } } func TestRoleEqual_PluralAndCase(t *testing.T) { cases := []struct { a, b string want bool }{ {"Forklift Operator", "forklift operator", true}, {"Forklift Operator", "Forklift Operators", true}, // plural {"CNC Operator", "Forklift Operator", false}, {"Pickers", "Picker", true}, {"", "Forklift Operator", true}, // gate disabled {"CNC Operator", "", true}, // gate disabled (other side empty) {"", "", true}, // both empty = both disabled {"Boxes", "Box", true}, // -es plural } for _, c := range cases { got := roleEqual(c.a, c.b) if got != c.want { t.Errorf("roleEqual(%q, %q) = %v, want %v", c.a, c.b, got, c.want) } } } // TestRoleNormalize_NonPluralS locks the convergent finding from the // role_gate_v1 scrum (Opus + Kimi + Qwen): words ending in 's' that // aren't plurals must NOT be stripped. Bug shape per Opus' write-up: // the original implementation collapsed "Sales" to "Sale", "Logistics" // to "Logistic", etc., which would silently fail role-equality on // every staffing client whose roles include those tokens. func TestRoleNormalize_NonPluralS(t *testing.T) { cases := []struct { in, want string }{ // Allowlist: end in 's' but represent role names as-is. {"Sales", "sales"}, {"Sales Associate", "sales associate"}, {"Sales Associates", "sales associate"}, // plural-stripped on the head noun only {"Logistics", "logistics"}, {"Logistics Coordinator", "logistics coordinator"}, {"Operations Manager", "operations manager"}, {"Facilities", "facilities"}, // -ss (not plural): must not strip. {"Press", "press"}, {"Press Operator", "press operator"}, {"Boss", "boss"}, // Real plurals still strip correctly. {"Operators", "operator"}, {"Pickers", "picker"}, {"Boxes", "box"}, {"Dishes", "dish"}, // Multi-word real plurals: strip head only. {"Forklift Operators", "forklift operator"}, {"Production Workers", "production worker"}, // Whitespace + case tolerance. {" CNC Operator ", "cnc operator"}, {"FORKLIFT OPERATOR", "forklift operator"}, // Empty. {"", ""}, } for _, c := range cases { got := roleNormalize(c.in) if got != c.want { t.Errorf("roleNormalize(%q) = %q, want %q", c.in, got, c.want) } } } // TestRoleEqual_NonPluralS is the gate-level pairing for // TestRoleNormalize_NonPluralS — proves that two equal-shape role // strings compare equal AND two different-role strings (one plural, // one allowlisted) compare different. func TestRoleEqual_NonPluralS(t *testing.T) { cases := []struct { a, b string want bool }{ // Same role, allowlisted token: must stay equal. {"Sales", "sales", true}, {"Logistics Coordinator", "logistics coordinator", true}, {"Press Operator", "PRESS OPERATOR", true}, // Allowlisted vs naïve-singular (the original bug): Sales // would have falsely matched "Sale" if normalize stripped 's'. // "Sale" isn't in the allowlist + isn't a plural-of-something, // so they should NOT match. {"Sales", "Sale", false}, // Plural ↔ singular cross-comparison still works for legit plurals. {"Sales Associates", "Sales Associate", true}, {"Forklift Operators", "Forklift Operator", true}, } for _, c := range cases { got := roleEqual(c.a, c.b) if got != c.want { t.Errorf("roleEqual(%q, %q) = %v, want %v", c.a, c.b, got, c.want) } } } func abs(f float64) float64 { if f < 0 { return -f } return f } func idsOf(rs []Result) []string { out := make([]string, len(rs)) for i, r := range rs { out[i] = r.ID } return out }