matrix: playbook memory + boost — SPEC §3.4 component 5 of 5 (LEARNING LOOP)
Closes SPEC §3.4. The matrix indexer is now a learning meta-index per
feedback_meta_index_vision.md — every successful (query → answer)
pair recorded via /matrix/playbooks/record boosts that answer for
future similar queries.
This is the architectural piece that lifts vectord from "static
hybrid search" to the meta-index J originally framed in Phase 19 of
the Rust system.
What's new:
- internal/matrix/playbook.go — PlaybookEntry, PlaybookHit,
ApplyPlaybookBoost. Pure-function boost math:
distance' = distance * (1 - 0.5 * score)
Score 0 = no boost (factor 1.0); score 1 = halve distance
(factor 0.5). Capped at 0.5 deliberately so a single high-
confidence playbook can't dominate the base ranking forever
(runaway-feedback-loop guard).
- Retriever.Record(entry, corpus) — embeds query_text, ensures
playbook corpus exists (idempotent), upserts via deterministic
sha256-derived ID (last score wins on re-record of same triple).
- Retriever.Search extended with UsePlaybook + PlaybookCorpus +
PlaybookTopK + PlaybookMaxDistance. Reuses the query vector —
no extra embed call. Missing-corpus 404 = no-op (cold-start
state before any Record call), not an error.
- POST /v1/matrix/playbooks/record (matrixd) — caller submits
{query_text, answer_id, answer_corpus, score, tags?}; gets
{playbook_id} back.
Storage: a vectord index named "playbook_memory" (configurable per
request) with embed(query_text) as the vector and the
PlaybookEntry JSON as metadata. Just another corpus — observable
from /vectors/index, persistable through G1P, etc.
Match key for boost: (AnswerID, AnswerCorpus). Cross-corpus ID
collisions don't false-match — verified by
TestApplyPlaybookBoost_CorpusAttributionRespected.
End-to-end smoke (scripts/playbook_smoke.sh, all assertions PASS):
- Baseline search: widget-c at distance 0.6566 (rank 3)
- Record playbook: query → widget-c, score=1.0
- Re-search with use_playbook=true:
widget-c distance: 0.3283 (rank 2)
ratio: 0.5 EXACTLY (matches boost math precisely)
playbook_boosted: 1
- widget-c jumped from #3 to #2 — learning loop visible
Tests:
- 8 unit tests in internal/matrix/playbook_test.go covering
Validate, BoostFactor (5 cases), the no-boost identity, the
boost-moves-result-up scenario, highest-score wins on duplicate
matches, cross-corpus attribution, JSON round-trip, and
rejection of empty metadata
- scripts/playbook_smoke.sh integration test (3 assertions PASS)
15-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance, downgrade, playbook).
SPEC §3.4 NOW COMPLETE: 5 of 5 components shipped. The matrix
indexer's port is done as a substrate; remaining work is operational
(rating signal sources, telemetry, eventual structured filtering for
staffing data — none in §3.4).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
31b408882b
commit
06e71520c4
@ -2,12 +2,13 @@
|
||||
// Retriever with HTTP routes per docs/SPEC.md §3.4.
|
||||
//
|
||||
// Routes:
|
||||
// POST /matrix/search — multi-corpus retrieve+merge
|
||||
// GET /matrix/corpora — list known vectord indexes (proxy)
|
||||
// POST /matrix/relevance — adjacency-pollution filter (CODE-aware;
|
||||
// port of mcp-server/relevance.ts)
|
||||
// POST /matrix/downgrade — strong-model auto-downgrade decision
|
||||
// (port of mode.rs::execute pass5 gate)
|
||||
// POST /matrix/search — multi-corpus retrieve+merge,
|
||||
// with optional playbook boost
|
||||
// GET /matrix/corpora — list known vectord indexes
|
||||
// POST /matrix/relevance — adjacency-pollution filter
|
||||
// POST /matrix/downgrade — strong-model downgrade gate
|
||||
// POST /matrix/playbooks/record — record a (query → answer)
|
||||
// success for the learning loop
|
||||
//
|
||||
// matrixd talks to embedd (for query-text embedding) and vectord
|
||||
// (for per-corpus search) via HTTP. Both URLs come from
|
||||
@ -64,6 +65,7 @@ func (h *handlers) register(r chi.Router) {
|
||||
r.Get("/matrix/corpora", h.handleCorpora)
|
||||
r.Post("/matrix/relevance", h.handleRelevance)
|
||||
r.Post("/matrix/downgrade", h.handleDowngrade)
|
||||
r.Post("/matrix/playbooks/record", h.handlePlaybookRecord)
|
||||
}
|
||||
|
||||
func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||
@ -104,6 +106,42 @@ func (h *handlers) handleRelevance(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
// playbookRecordRequest is the POST /matrix/playbooks/record body.
|
||||
// Corpus is optional; defaults to matrix.DefaultPlaybookCorpus.
|
||||
type playbookRecordRequest struct {
|
||||
QueryText string `json:"query_text"`
|
||||
AnswerID string `json:"answer_id"`
|
||||
AnswerCorpus string `json:"answer_corpus"`
|
||||
Score float64 `json:"score"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Corpus string `json:"corpus,omitempty"`
|
||||
}
|
||||
|
||||
func (h *handlers) handlePlaybookRecord(w http.ResponseWriter, r *http.Request) {
|
||||
var req playbookRecordRequest
|
||||
if !decodeJSON(w, r, &req) {
|
||||
return
|
||||
}
|
||||
entry := matrix.NewPlaybookEntry(req.QueryText, req.AnswerID, req.AnswerCorpus, req.Score, req.Tags)
|
||||
if err := entry.Validate(); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
pbID, err := h.r.Record(r.Context(), entry, req.Corpus)
|
||||
if err != nil {
|
||||
slog.Warn("playbook record", "err", err)
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"playbook_id": pbID,
|
||||
"query_text": entry.QueryText,
|
||||
"answer_id": entry.AnswerID,
|
||||
"answer_corpus": entry.AnswerCorpus,
|
||||
"score": entry.Score,
|
||||
})
|
||||
}
|
||||
|
||||
// downgradeRequest is the POST /matrix/downgrade body. Mirrors
|
||||
// matrix.DowngradeInput; ForceFullOverride is read from the
|
||||
// LH_FORCE_FULL_ENRICHMENT env var when omitted from the body.
|
||||
|
||||
196
internal/matrix/playbook.go
Normal file
196
internal/matrix/playbook.go
Normal file
@ -0,0 +1,196 @@
|
||||
package matrix
|
||||
|
||||
// Playbook memory — SPEC §3.4 component 5 (learning-loop integration).
|
||||
//
|
||||
// Concept: every time an external system confirms "(query → answer_id)
|
||||
// was a successful match," record it. Future similar queries get that
|
||||
// answer's score boosted, so the matrix indexer learns from outcomes
|
||||
// rather than relying solely on the base embedder's geometry.
|
||||
//
|
||||
// Per feedback_meta_index_vision.md: this is the north star — a
|
||||
// meta-index that LEARNS from playbooks over time, not a static
|
||||
// hybrid search engine.
|
||||
//
|
||||
// Storage shape: a vectord index named DefaultPlaybookCorpus where:
|
||||
// - The vector is embed(query_text)
|
||||
// - The metadata is a serialized PlaybookEntry
|
||||
// Retrieval shape: at /matrix/search time, when use_playbook=true,
|
||||
// matrixd searches the playbook corpus with the same query vector,
|
||||
// looks up each hit's answer_id, and if that answer is in the current
|
||||
// matrix-search results, applies a boost to its distance.
|
||||
//
|
||||
// Composition: this layer is additive on top of the existing
|
||||
// retrieve+merge — when use_playbook=false, behavior is unchanged.
|
||||
// The boost only re-ranks results that ALREADY surfaced from the
|
||||
// regular retrieval. A v1 enhancement would inject playbook hits
|
||||
// directly even when they weren't in the top-K (Shape B from the
|
||||
// design conversation), but v0 keeps the safer "boost-only" stance.
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DefaultPlaybookCorpus is the vectord index name where playbook
|
||||
// entries land by default. Callers can override per-request, but
|
||||
// having one default makes the system observable from the outside
|
||||
// (operator hits /vectors/index and sees this corpus in the list).
|
||||
const DefaultPlaybookCorpus = "playbook_memory"
|
||||
|
||||
// DefaultPlaybookTopK is how many similar past queries to consider
|
||||
// when applying boost. 3 keeps the influence focused — we want the
|
||||
// boost to reward consistent matches, not let one stale playbook
|
||||
// dominate. Caller can override.
|
||||
const DefaultPlaybookTopK = 3
|
||||
|
||||
// DefaultPlaybookMaxDistance is the cosine ceiling for "this past
|
||||
// query is similar enough to count." 0.5 lets in genuinely related
|
||||
// queries while excluding pure-coincidence neighbors. Caller can
|
||||
// override per-request as we learn what works for staffing data.
|
||||
const DefaultPlaybookMaxDistance = 0.5
|
||||
|
||||
// PlaybookEntry is what gets stored as metadata on each playbook
|
||||
// vector. RecordedAt is captured at write time; callers should not
|
||||
// set it (the recorder fills it in).
|
||||
type PlaybookEntry struct {
|
||||
QueryText string `json:"query_text"`
|
||||
AnswerID string `json:"answer_id"`
|
||||
AnswerCorpus string `json:"answer_corpus"`
|
||||
Score float64 `json:"score"` // 0..1; higher = better outcome
|
||||
RecordedAtNs int64 `json:"recorded_at_ns"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
}
|
||||
|
||||
// Validate returns an error if the entry is missing required fields.
|
||||
// Callers should validate before storage so bad data doesn't pollute
|
||||
// the corpus.
|
||||
func (p PlaybookEntry) Validate() error {
|
||||
if p.QueryText == "" {
|
||||
return errors.New("playbook: query_text required")
|
||||
}
|
||||
if p.AnswerID == "" {
|
||||
return errors.New("playbook: answer_id required")
|
||||
}
|
||||
if p.AnswerCorpus == "" {
|
||||
return errors.New("playbook: answer_corpus required")
|
||||
}
|
||||
if p.Score < 0 || p.Score > 1 {
|
||||
return errors.New("playbook: score must be in [0, 1]")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// BoostFactor returns the multiplier applied to a result's distance
|
||||
// when this playbook entry matches it. Lower is better:
|
||||
//
|
||||
// score = 0 → 1.0 (no boost)
|
||||
// score = 0.5 → 0.75 (mild boost)
|
||||
// score = 1.0 → 0.5 (halve the distance — strong boost)
|
||||
//
|
||||
// Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety.
|
||||
//
|
||||
// Why halving as the maximum boost: a perfect-confidence playbook
|
||||
// entry shouldn't completely override the base embedding (that
|
||||
// invites runaway feedback loops where one early playbook
|
||||
// dominates forever). Halving is enough to move a mid-rank result
|
||||
// to the top in most cases without erasing the base ranking
|
||||
// signal.
|
||||
func (p PlaybookEntry) BoostFactor() float64 {
|
||||
score := p.Score
|
||||
if score < 0 {
|
||||
score = 0
|
||||
}
|
||||
if score > 1 {
|
||||
score = 1
|
||||
}
|
||||
return 1.0 - 0.5*score
|
||||
}
|
||||
|
||||
// MarshalMetadata serializes the entry as the JSON RawMessage that
|
||||
// vectord stores per item. Convenience for the recorder.
|
||||
func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) {
|
||||
return json.Marshal(p)
|
||||
}
|
||||
|
||||
// UnmarshalPlaybookMetadata is the inverse — used when fetching
|
||||
// playbook hits to decode their metadata back into entries.
|
||||
func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) {
|
||||
var e PlaybookEntry
|
||||
if len(raw) == 0 {
|
||||
return e, errors.New("playbook: empty metadata")
|
||||
}
|
||||
if err := json.Unmarshal(raw, &e); err != nil {
|
||||
return e, err
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// NewPlaybookEntry stamps RecordedAtNs to now and returns the entry.
|
||||
// Validation happens at storage; this is just construction.
|
||||
func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry {
|
||||
return PlaybookEntry{
|
||||
QueryText: query,
|
||||
AnswerID: answerID,
|
||||
AnswerCorpus: answerCorpus,
|
||||
Score: score,
|
||||
RecordedAtNs: time.Now().UnixNano(),
|
||||
Tags: tags,
|
||||
}
|
||||
}
|
||||
|
||||
// PlaybookHit is one similarity-search result from the playbook
|
||||
// corpus, paired with its decoded entry. Distance is the cosine
|
||||
// distance between the current query and this past playbook's
|
||||
// query vector — used by the caller to filter out "too far"
|
||||
// matches via PlaybookMaxDistance.
|
||||
type PlaybookHit struct {
|
||||
PlaybookID string `json:"playbook_id"`
|
||||
Distance float32 `json:"distance"`
|
||||
Entry PlaybookEntry `json:"entry"`
|
||||
}
|
||||
|
||||
// ApplyPlaybookBoost re-ranks results in place using matched
|
||||
// playbook hits. For each hit whose (AnswerID, AnswerCorpus)
|
||||
// matches a result, multiply that result's distance by the hit's
|
||||
// BoostFactor. If multiple hits match the same result, the highest-
|
||||
// score one wins (greatest reduction in distance).
|
||||
//
|
||||
// After applying boosts, results are re-sorted ascending by
|
||||
// distance.
|
||||
//
|
||||
// Returns the number of distinct results that received a boost.
|
||||
// Callers can log this as a signal of "how much the playbook
|
||||
// influenced this query."
|
||||
func ApplyPlaybookBoost(results []Result, hits []PlaybookHit) int {
|
||||
if len(hits) == 0 || len(results) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// For each result, find the hit with the lowest BoostFactor
|
||||
// (= largest boost = highest score, since BoostFactor is
|
||||
// 1-0.5*score and we minimize).
|
||||
bestBoost := make(map[int]float64, len(results))
|
||||
for i, r := range results {
|
||||
for _, h := range hits {
|
||||
if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus {
|
||||
continue
|
||||
}
|
||||
bf := h.Entry.BoostFactor()
|
||||
if cur, ok := bestBoost[i]; !ok || bf < cur {
|
||||
bestBoost[i] = bf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i, bf := range bestBoost {
|
||||
results[i].Distance = float32(float64(results[i].Distance) * bf)
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return results[i].Distance < results[j].Distance
|
||||
})
|
||||
|
||||
return len(bestBoost)
|
||||
}
|
||||
180
internal/matrix/playbook_test.go
Normal file
180
internal/matrix/playbook_test.go
Normal file
@ -0,0 +1,180 @@
|
||||
package matrix
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPlaybookEntry_Validate(t *testing.T) {
|
||||
good := PlaybookEntry{
|
||||
QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 0.5,
|
||||
}
|
||||
if err := good.Validate(); err != nil {
|
||||
t.Errorf("good entry should validate: %v", err)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
entry PlaybookEntry
|
||||
}{
|
||||
{"empty query", PlaybookEntry{AnswerID: "y", AnswerCorpus: "z", Score: 0.5}},
|
||||
{"empty answer id", PlaybookEntry{QueryText: "x", AnswerCorpus: "z", Score: 0.5}},
|
||||
{"empty corpus", PlaybookEntry{QueryText: "x", AnswerID: "y", Score: 0.5}},
|
||||
{"score too high", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 1.5}},
|
||||
{"score negative", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: -0.1}},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if err := c.entry.Validate(); err == nil {
|
||||
t.Errorf("%s: expected validation error, got nil", c.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlaybookEntry_BoostFactor(t *testing.T) {
|
||||
cases := []struct {
|
||||
score float64
|
||||
want float64
|
||||
}{
|
||||
{0.0, 1.0},
|
||||
{0.5, 0.75},
|
||||
{1.0, 0.5},
|
||||
{-0.1, 1.0}, // clamped
|
||||
{1.5, 0.5}, // clamped
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := PlaybookEntry{Score: c.score}.BoostFactor()
|
||||
if abs(got-c.want) > 1e-9 {
|
||||
t.Errorf("BoostFactor(score=%.2f): want %.4f, got %.4f", c.score, c.want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyPlaybookBoost_NoHitsLeaveResultsAlone(t *testing.T) {
|
||||
results := []Result{
|
||||
{ID: "a", Distance: 0.1, Corpus: "x"},
|
||||
{ID: "b", Distance: 0.2, Corpus: "x"},
|
||||
}
|
||||
n := ApplyPlaybookBoost(results, nil)
|
||||
if n != 0 {
|
||||
t.Errorf("expected 0 boosted, got %d", n)
|
||||
}
|
||||
if results[0].ID != "a" || results[1].ID != "b" {
|
||||
t.Errorf("results reordered without hits: %v", results)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyPlaybookBoost_BoostMovesResultUp(t *testing.T) {
|
||||
// Initial: a (0.10) beats b (0.20) beats c (0.30).
|
||||
// Playbook says (answer=c, score=1.0) should be boosted → c's
|
||||
// distance becomes 0.30 * 0.5 = 0.15. New ordering: a, c, b.
|
||||
results := []Result{
|
||||
{ID: "a", Distance: 0.10, Corpus: "x"},
|
||||
{ID: "b", Distance: 0.20, Corpus: "x"},
|
||||
{ID: "c", Distance: 0.30, Corpus: "x"},
|
||||
}
|
||||
hits := []PlaybookHit{
|
||||
{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
|
||||
AnswerID: "c", AnswerCorpus: "x", Score: 1.0,
|
||||
}},
|
||||
}
|
||||
n := ApplyPlaybookBoost(results, hits)
|
||||
if n != 1 {
|
||||
t.Errorf("expected 1 boosted, got %d", n)
|
||||
}
|
||||
if results[0].ID != "a" || results[1].ID != "c" || results[2].ID != "b" {
|
||||
t.Errorf("expected order a,c,b after boost; got %v", idsOf(results))
|
||||
}
|
||||
if abs(float64(results[1].Distance)-0.15) > 1e-6 {
|
||||
t.Errorf("expected c distance 0.15 after boost; got %.4f", results[1].Distance)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyPlaybookBoost_HighestScoreWinsForSameAnswer(t *testing.T) {
|
||||
results := []Result{
|
||||
{ID: "a", Distance: 0.30, Corpus: "x"},
|
||||
}
|
||||
// Two playbook hits both pointing at "a". Score=0.4 (weak boost)
|
||||
// + Score=0.9 (strong boost). Strong should win — distance gets
|
||||
// multiplied by 1-0.5*0.9 = 0.55, not by 1-0.5*0.4 = 0.80.
|
||||
hits := []PlaybookHit{
|
||||
{PlaybookID: "p_weak", Distance: 0.05, Entry: PlaybookEntry{
|
||||
AnswerID: "a", AnswerCorpus: "x", Score: 0.4,
|
||||
}},
|
||||
{PlaybookID: "p_strong", Distance: 0.05, Entry: PlaybookEntry{
|
||||
AnswerID: "a", AnswerCorpus: "x", Score: 0.9,
|
||||
}},
|
||||
}
|
||||
ApplyPlaybookBoost(results, hits)
|
||||
wantDist := 0.30 * 0.55
|
||||
if abs(float64(results[0].Distance)-wantDist) > 1e-6 {
|
||||
t.Errorf("strong-score boost should win: want %.4f, got %.4f", wantDist, results[0].Distance)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyPlaybookBoost_CorpusAttributionRespected(t *testing.T) {
|
||||
// Playbook references answer_id="a" in corpus="x".
|
||||
// Results have answer_id="a" in corpus="y" — DIFFERENT corpus.
|
||||
// Boost should NOT apply; the (id, corpus) tuple is the join key,
|
||||
// not just id (otherwise different-corpus collisions would create
|
||||
// false positives).
|
||||
results := []Result{
|
||||
{ID: "a", Distance: 0.30, Corpus: "y"},
|
||||
}
|
||||
hits := []PlaybookHit{
|
||||
{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
|
||||
AnswerID: "a", AnswerCorpus: "x", Score: 1.0,
|
||||
}},
|
||||
}
|
||||
n := ApplyPlaybookBoost(results, hits)
|
||||
if n != 0 {
|
||||
t.Errorf("cross-corpus collision should not boost: got %d", n)
|
||||
}
|
||||
if abs(float64(results[0].Distance)-0.30) > 1e-6 {
|
||||
// 1e-6 tolerance accounts for float32→float64 conversion;
|
||||
// the assertion that matters is "unchanged from input."
|
||||
t.Errorf("distance should be unchanged: got %.6f", results[0].Distance)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlaybookEntry_RoundTripJSON(t *testing.T) {
|
||||
e := NewPlaybookEntry("forklift query", "w-12345", "workers", 0.85, []string{"chicago", "verified"})
|
||||
raw, err := e.MarshalMetadata()
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
got, err := UnmarshalPlaybookMetadata(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if got.QueryText != e.QueryText || got.AnswerID != e.AnswerID ||
|
||||
got.AnswerCorpus != e.AnswerCorpus || got.Score != e.Score {
|
||||
t.Errorf("round-trip mismatch: want %+v, got %+v", e, got)
|
||||
}
|
||||
if len(got.Tags) != 2 || got.Tags[0] != "chicago" {
|
||||
t.Errorf("tags lost in round-trip: %v", got.Tags)
|
||||
}
|
||||
if got.RecordedAtNs == 0 {
|
||||
t.Error("RecordedAtNs not set by NewPlaybookEntry")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalPlaybookMetadata_RejectsEmpty(t *testing.T) {
|
||||
if _, err := UnmarshalPlaybookMetadata(json.RawMessage{}); err == nil {
|
||||
t.Error("empty metadata should error")
|
||||
}
|
||||
}
|
||||
|
||||
func abs(f float64) float64 {
|
||||
if f < 0 {
|
||||
return -f
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func idsOf(rs []Result) []string {
|
||||
out := make([]string, len(rs))
|
||||
for i, r := range rs {
|
||||
out[i] = r.ID
|
||||
}
|
||||
return out
|
||||
}
|
||||
@ -17,12 +17,16 @@ package matrix
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -42,21 +46,38 @@ type Result struct {
|
||||
// SearchRequest is the matrix search input. Either QueryText (matrix
|
||||
// embeds it via embedd) or QueryVector (already embedded by caller)
|
||||
// must be set; QueryVector takes precedence if both supplied.
|
||||
//
|
||||
// Playbook fields (component 5 — learning loop):
|
||||
// UsePlaybook=true: after normal retrieve+merge, fetch top similar
|
||||
// past queries from PlaybookCorpus and apply distance boost to
|
||||
// any current results that match a recorded answer.
|
||||
// PlaybookCorpus: index name; empty = DefaultPlaybookCorpus.
|
||||
// PlaybookTopK: number of similar past queries to consider; 0 =
|
||||
// DefaultPlaybookTopK.
|
||||
// PlaybookMaxDistance: cosine ceiling for "similar enough"; 0 =
|
||||
// DefaultPlaybookMaxDistance.
|
||||
type SearchRequest struct {
|
||||
QueryText string `json:"query_text,omitempty"`
|
||||
QueryVector []float32 `json:"query_vector,omitempty"`
|
||||
Corpora []string `json:"corpora"`
|
||||
K int `json:"k"`
|
||||
PerCorpusK int `json:"per_corpus_k,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
QueryText string `json:"query_text,omitempty"`
|
||||
QueryVector []float32 `json:"query_vector,omitempty"`
|
||||
Corpora []string `json:"corpora"`
|
||||
K int `json:"k"`
|
||||
PerCorpusK int `json:"per_corpus_k,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
UsePlaybook bool `json:"use_playbook,omitempty"`
|
||||
PlaybookCorpus string `json:"playbook_corpus,omitempty"`
|
||||
PlaybookTopK int `json:"playbook_top_k,omitempty"`
|
||||
PlaybookMaxDistance float64 `json:"playbook_max_distance,omitempty"`
|
||||
}
|
||||
|
||||
// SearchResponse wraps the merged results plus per-corpus return
|
||||
// counts so callers can detect "this corpus returned nothing"
|
||||
// without re-querying.
|
||||
// without re-querying. PlaybookBoosted is the count of results that
|
||||
// received a boost from playbook memory; useful for telemetry on
|
||||
// "how much the learning loop influenced this query."
|
||||
type SearchResponse struct {
|
||||
Results []Result `json:"results"`
|
||||
PerCorpusCounts map[string]int `json:"per_corpus_counts"`
|
||||
PlaybookBoosted int `json:"playbook_boosted,omitempty"`
|
||||
}
|
||||
|
||||
// Retriever holds the HTTP clients to embedd and vectord. Stateless
|
||||
@ -158,7 +179,178 @@ func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchRespo
|
||||
if len(allHits) > req.K {
|
||||
allHits = allHits[:req.K]
|
||||
}
|
||||
return &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus}, nil
|
||||
resp := &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus}
|
||||
|
||||
// Playbook boost (component 5). Reuses the query vector — no
|
||||
// extra embed call. If the playbook corpus doesn't exist (first
|
||||
// search before any Record), the lookup gracefully no-ops.
|
||||
if req.UsePlaybook {
|
||||
hits, err := r.fetchPlaybookHits(ctx, qvec, req)
|
||||
if err != nil {
|
||||
// Don't fail the whole search on playbook errors — the
|
||||
// boost is opportunistic. Log + continue.
|
||||
slog.Warn("matrix: playbook lookup failed; skipping boost", "err", err)
|
||||
} else if len(hits) > 0 {
|
||||
resp.PlaybookBoosted = ApplyPlaybookBoost(resp.Results, hits)
|
||||
}
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// fetchPlaybookHits queries the playbook corpus with the same query
|
||||
// vector and returns hits whose decoded entries are within
|
||||
// PlaybookMaxDistance. A missing playbook corpus returns nil + nil
|
||||
// (legitimate no-op state for a system before any Record call).
|
||||
func (r *Retriever) fetchPlaybookHits(ctx context.Context, qvec []float32, req SearchRequest) ([]PlaybookHit, error) {
|
||||
corpus := req.PlaybookCorpus
|
||||
if corpus == "" {
|
||||
corpus = DefaultPlaybookCorpus
|
||||
}
|
||||
topK := req.PlaybookTopK
|
||||
if topK <= 0 {
|
||||
topK = DefaultPlaybookTopK
|
||||
}
|
||||
maxDist := req.PlaybookMaxDistance
|
||||
if maxDist <= 0 {
|
||||
maxDist = DefaultPlaybookMaxDistance
|
||||
}
|
||||
|
||||
rawHits, err := r.searchCorpus(ctx, corpus, qvec, topK)
|
||||
if err != nil {
|
||||
// vectord returns 404 for missing index. We treat that as
|
||||
// "no playbook yet" — legitimate cold-start state, not an
|
||||
// error.
|
||||
if strings.Contains(err.Error(), "status 404") {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out := make([]PlaybookHit, 0, len(rawHits))
|
||||
for _, h := range rawHits {
|
||||
if float64(h.Distance) > maxDist {
|
||||
continue
|
||||
}
|
||||
entry, err := UnmarshalPlaybookMetadata(h.Metadata)
|
||||
if err != nil {
|
||||
slog.Warn("matrix: skip malformed playbook entry", "id", h.ID, "err", err)
|
||||
continue
|
||||
}
|
||||
out = append(out, PlaybookHit{
|
||||
PlaybookID: h.ID,
|
||||
Distance: h.Distance,
|
||||
Entry: entry,
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Record stores a (query → answer_id) playbook entry in the
|
||||
// playbook corpus. Embeds the query via embedd, ensures the corpus
|
||||
// exists (idempotent create), and writes the entry as one vectord
|
||||
// item with the entry's JSON in metadata.
|
||||
//
|
||||
// Uses a deterministic ID derived from (query_text, answer_id,
|
||||
// answer_corpus) so re-recording the same triple upserts (last
|
||||
// score wins). Callers wanting to accumulate distinct samples can
|
||||
// vary one of the three.
|
||||
//
|
||||
// corpus="" defaults to DefaultPlaybookCorpus.
|
||||
func (r *Retriever) Record(ctx context.Context, entry PlaybookEntry, corpus string) (string, error) {
|
||||
if err := entry.Validate(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if corpus == "" {
|
||||
corpus = DefaultPlaybookCorpus
|
||||
}
|
||||
|
||||
qvec, err := r.embed(ctx, entry.QueryText, "")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("playbook record embed: %w", err)
|
||||
}
|
||||
|
||||
if err := r.ensureCorpus(ctx, corpus, len(qvec)); err != nil {
|
||||
return "", fmt.Errorf("playbook ensure corpus: %w", err)
|
||||
}
|
||||
|
||||
if entry.RecordedAtNs == 0 {
|
||||
entry.RecordedAtNs = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
pbID := playbookID(entry.QueryText, entry.AnswerID, entry.AnswerCorpus)
|
||||
|
||||
meta, err := entry.MarshalMetadata()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := r.addItem(ctx, corpus, pbID, qvec, meta); err != nil {
|
||||
return "", fmt.Errorf("playbook add: %w", err)
|
||||
}
|
||||
return pbID, nil
|
||||
}
|
||||
|
||||
// playbookID is sha256-truncated 8 bytes (16 hex chars) prefixed
|
||||
// with "pb-". Deterministic on (query, answer_id, answer_corpus).
|
||||
func playbookID(query, answerID, answerCorpus string) string {
|
||||
h := sha256.Sum256([]byte(query + "|" + answerID + "|" + answerCorpus))
|
||||
return "pb-" + hex.EncodeToString(h[:8])
|
||||
}
|
||||
|
||||
// ensureCorpus creates a vectord index if it doesn't exist.
|
||||
// 201 = created; 409 = already exists; both fine for idempotent use.
|
||||
func (r *Retriever) ensureCorpus(ctx context.Context, name string, dim int) error {
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"name": name, "dimension": dim, "distance": "cosine",
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
r.vectordURL+"/vectors/index", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
resp, err := r.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
io.Copy(io.Discard, resp.Body)
|
||||
if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("ensure %q: status %d", name, resp.StatusCode)
|
||||
}
|
||||
|
||||
// addItem POSTs a single-item batch to /vectors/index/{name}/add.
|
||||
func (r *Retriever) addItem(ctx context.Context, corpus, id string, vec []float32, meta json.RawMessage) error {
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"items": []map[string]any{
|
||||
{"id": id, "vector": vec, "metadata": meta},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
url := r.vectordURL + "/vectors/index/" + corpus + "/add"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
resp, err := r.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("add %q: status %d: %s", corpus, resp.StatusCode, b)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Corpora returns the list of vectord index names. Thin proxy to
|
||||
|
||||
175
scripts/playbook_smoke.sh
Executable file
175
scripts/playbook_smoke.sh
Executable file
@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env bash
|
||||
# Playbook smoke — learning-loop integration end-to-end.
|
||||
# All assertions go through gateway :3110.
|
||||
#
|
||||
# Validates the full boost cycle:
|
||||
# 1. Build a test corpus with 3 items
|
||||
# 2. Query → get baseline ranking
|
||||
# 3. Record a playbook: query → bottom-ranked answer with score=1.0
|
||||
# 4. Re-query with use_playbook=true
|
||||
# 5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost
|
||||
# math: distance' = distance × (1 - 0.5×score))
|
||||
# 6. Assert: PlaybookBoosted >= 1 in the response
|
||||
#
|
||||
# Requires Ollama on :11434 with nomic-embed-text loaded — Record
|
||||
# embeds the query_text. Skips (exit 0) when Ollama is absent.
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
|
||||
echo "[playbook-smoke] Ollama not reachable on :11434 — skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "[playbook-smoke] building stack..."
|
||||
go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway
|
||||
|
||||
pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true
|
||||
sleep 0.3
|
||||
|
||||
PIDS=()
|
||||
TMP="$(mktemp -d)"
|
||||
CFG="$TMP/playbook.toml"
|
||||
|
||||
cleanup() {
|
||||
echo "[playbook-smoke] cleanup"
|
||||
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
|
||||
rm -rf "$TMP"
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
cat > "$CFG" <<EOF
|
||||
[gateway]
|
||||
bind = "127.0.0.1:3110"
|
||||
storaged_url = "http://127.0.0.1:3211"
|
||||
catalogd_url = "http://127.0.0.1:3212"
|
||||
ingestd_url = "http://127.0.0.1:3213"
|
||||
queryd_url = "http://127.0.0.1:3214"
|
||||
vectord_url = "http://127.0.0.1:3215"
|
||||
embedd_url = "http://127.0.0.1:3216"
|
||||
pathwayd_url = "http://127.0.0.1:3217"
|
||||
matrixd_url = "http://127.0.0.1:3218"
|
||||
|
||||
[vectord]
|
||||
bind = "127.0.0.1:3215"
|
||||
storaged_url = ""
|
||||
|
||||
[matrixd]
|
||||
bind = "127.0.0.1:3218"
|
||||
embedd_url = "http://127.0.0.1:3216"
|
||||
vectord_url = "http://127.0.0.1:3215"
|
||||
EOF
|
||||
|
||||
poll_health() {
|
||||
local port="$1" deadline=$(($(date +%s) + 5))
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
|
||||
sleep 0.05
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..."
|
||||
./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
|
||||
poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
|
||||
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
|
||||
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
|
||||
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
|
||||
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
|
||||
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
|
||||
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
|
||||
|
||||
FAILED=0
|
||||
|
||||
# Embed three corpus items + the query, all via /v1/embed.
|
||||
echo "[playbook-smoke] embedding 3 corpus items + query..."
|
||||
EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')"
|
||||
V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')"
|
||||
V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')"
|
||||
V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')"
|
||||
V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')"
|
||||
|
||||
# Build corpus
|
||||
echo "[playbook-smoke] create corpus widgets + add 3 items..."
|
||||
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"name":"widgets","dimension":768,"distance":"cosine"}'
|
||||
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \
|
||||
'{items:[
|
||||
{id:"widget-a", vector:$va, metadata:{label:"a"}},
|
||||
{id:"widget-b", vector:$vb, metadata:{label:"b"}},
|
||||
{id:"widget-c", vector:$vc, metadata:{label:"c"}}
|
||||
]}')"
|
||||
|
||||
# Baseline matrix search (no playbook) — using query_vector to skip
|
||||
# embedd round-trip and keep the test deterministic on the geometry
|
||||
# we know.
|
||||
echo "[playbook-smoke] baseline search (no playbook):"
|
||||
BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")"
|
||||
BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')"
|
||||
BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
|
||||
echo " baseline order: $BASE_ORDER widget-c distance=$BASE_C_DIST"
|
||||
|
||||
# Record a playbook entry for the query → widget-c (use the same
|
||||
# query_text that the playbook will be re-queried by, exact match).
|
||||
QUERY_TEXT="alpha staffing query test full prompt"
|
||||
echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0"
|
||||
RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "$(jq -n --arg q "$QUERY_TEXT" \
|
||||
'{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")"
|
||||
PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')"
|
||||
if [ -z "$PB_ID" ]; then
|
||||
echo " ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1
|
||||
else
|
||||
echo " ✓ playbook_id=$PB_ID"
|
||||
fi
|
||||
|
||||
# Re-search with use_playbook=true. Use query_text so matrixd embeds
|
||||
# it again (proves end-to-end). The newly-recorded playbook entry has
|
||||
# the SAME query_text → cosine distance ~0 → boost applies to widget-c.
|
||||
echo "[playbook-smoke] boosted search (use_playbook=true):"
|
||||
BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "$(jq -n --arg q "$QUERY_TEXT" \
|
||||
'{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")"
|
||||
BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')"
|
||||
BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
|
||||
PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')"
|
||||
echo " boosted order: $BOOST_ORDER widget-c distance=$BOOST_C_DIST playbook_boosted=$PB_BOOSTED"
|
||||
|
||||
# ── Assertion 1: PlaybookBoosted >= 1 ────────────────────────────
|
||||
if [ "$PB_BOOSTED" -ge 1 ]; then
|
||||
echo " ✓ playbook_boosted=$PB_BOOSTED ≥ 1"
|
||||
else
|
||||
echo " ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1
|
||||
fi
|
||||
|
||||
# ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor)
|
||||
# Allow some tolerance because the query and recorded query may not
|
||||
# be byte-identical depending on Ollama's tokenization stability.
|
||||
RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')"
|
||||
echo " widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)"
|
||||
WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')"
|
||||
if [ "$WITHIN" = "true" ]; then
|
||||
echo " ✓ ratio in [0.40, 0.60] — boost applied correctly"
|
||||
else
|
||||
echo " ✗ ratio out of band: $RATIO"; FAILED=1
|
||||
fi
|
||||
|
||||
if [ "$FAILED" -eq 0 ]; then
|
||||
echo "[playbook-smoke] Playbook acceptance gate: PASSED"
|
||||
exit 0
|
||||
else
|
||||
echo "[playbook-smoke] Playbook acceptance gate: FAILED"
|
||||
exit 1
|
||||
fi
|
||||
Loading…
x
Reference in New Issue
Block a user