diff --git a/cmd/matrixd/main.go b/cmd/matrixd/main.go index 68e681d..2630660 100644 --- a/cmd/matrixd/main.go +++ b/cmd/matrixd/main.go @@ -2,12 +2,13 @@ // Retriever with HTTP routes per docs/SPEC.md §3.4. // // Routes: -// POST /matrix/search — multi-corpus retrieve+merge -// GET /matrix/corpora — list known vectord indexes (proxy) -// POST /matrix/relevance — adjacency-pollution filter (CODE-aware; -// port of mcp-server/relevance.ts) -// POST /matrix/downgrade — strong-model auto-downgrade decision -// (port of mode.rs::execute pass5 gate) +// POST /matrix/search — multi-corpus retrieve+merge, +// with optional playbook boost +// GET /matrix/corpora — list known vectord indexes +// POST /matrix/relevance — adjacency-pollution filter +// POST /matrix/downgrade — strong-model downgrade gate +// POST /matrix/playbooks/record — record a (query → answer) +// success for the learning loop // // matrixd talks to embedd (for query-text embedding) and vectord // (for per-corpus search) via HTTP. Both URLs come from @@ -64,6 +65,7 @@ func (h *handlers) register(r chi.Router) { r.Get("/matrix/corpora", h.handleCorpora) r.Post("/matrix/relevance", h.handleRelevance) r.Post("/matrix/downgrade", h.handleDowngrade) + r.Post("/matrix/playbooks/record", h.handlePlaybookRecord) } func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) { @@ -104,6 +106,42 @@ func (h *handlers) handleRelevance(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, res) } +// playbookRecordRequest is the POST /matrix/playbooks/record body. +// Corpus is optional; defaults to matrix.DefaultPlaybookCorpus. +type playbookRecordRequest struct { + QueryText string `json:"query_text"` + AnswerID string `json:"answer_id"` + AnswerCorpus string `json:"answer_corpus"` + Score float64 `json:"score"` + Tags []string `json:"tags,omitempty"` + Corpus string `json:"corpus,omitempty"` +} + +func (h *handlers) handlePlaybookRecord(w http.ResponseWriter, r *http.Request) { + var req playbookRecordRequest + if !decodeJSON(w, r, &req) { + return + } + entry := matrix.NewPlaybookEntry(req.QueryText, req.AnswerID, req.AnswerCorpus, req.Score, req.Tags) + if err := entry.Validate(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + pbID, err := h.r.Record(r.Context(), entry, req.Corpus) + if err != nil { + slog.Warn("playbook record", "err", err) + http.Error(w, err.Error(), http.StatusBadGateway) + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "playbook_id": pbID, + "query_text": entry.QueryText, + "answer_id": entry.AnswerID, + "answer_corpus": entry.AnswerCorpus, + "score": entry.Score, + }) +} + // downgradeRequest is the POST /matrix/downgrade body. Mirrors // matrix.DowngradeInput; ForceFullOverride is read from the // LH_FORCE_FULL_ENRICHMENT env var when omitted from the body. diff --git a/internal/matrix/playbook.go b/internal/matrix/playbook.go new file mode 100644 index 0000000..ecdf4f1 --- /dev/null +++ b/internal/matrix/playbook.go @@ -0,0 +1,196 @@ +package matrix + +// Playbook memory — SPEC §3.4 component 5 (learning-loop integration). +// +// Concept: every time an external system confirms "(query → answer_id) +// was a successful match," record it. Future similar queries get that +// answer's score boosted, so the matrix indexer learns from outcomes +// rather than relying solely on the base embedder's geometry. +// +// Per feedback_meta_index_vision.md: this is the north star — a +// meta-index that LEARNS from playbooks over time, not a static +// hybrid search engine. +// +// Storage shape: a vectord index named DefaultPlaybookCorpus where: +// - The vector is embed(query_text) +// - The metadata is a serialized PlaybookEntry +// Retrieval shape: at /matrix/search time, when use_playbook=true, +// matrixd searches the playbook corpus with the same query vector, +// looks up each hit's answer_id, and if that answer is in the current +// matrix-search results, applies a boost to its distance. +// +// Composition: this layer is additive on top of the existing +// retrieve+merge — when use_playbook=false, behavior is unchanged. +// The boost only re-ranks results that ALREADY surfaced from the +// regular retrieval. A v1 enhancement would inject playbook hits +// directly even when they weren't in the top-K (Shape B from the +// design conversation), but v0 keeps the safer "boost-only" stance. + +import ( + "encoding/json" + "errors" + "sort" + "time" +) + +// DefaultPlaybookCorpus is the vectord index name where playbook +// entries land by default. Callers can override per-request, but +// having one default makes the system observable from the outside +// (operator hits /vectors/index and sees this corpus in the list). +const DefaultPlaybookCorpus = "playbook_memory" + +// DefaultPlaybookTopK is how many similar past queries to consider +// when applying boost. 3 keeps the influence focused — we want the +// boost to reward consistent matches, not let one stale playbook +// dominate. Caller can override. +const DefaultPlaybookTopK = 3 + +// DefaultPlaybookMaxDistance is the cosine ceiling for "this past +// query is similar enough to count." 0.5 lets in genuinely related +// queries while excluding pure-coincidence neighbors. Caller can +// override per-request as we learn what works for staffing data. +const DefaultPlaybookMaxDistance = 0.5 + +// PlaybookEntry is what gets stored as metadata on each playbook +// vector. RecordedAt is captured at write time; callers should not +// set it (the recorder fills it in). +type PlaybookEntry struct { + QueryText string `json:"query_text"` + AnswerID string `json:"answer_id"` + AnswerCorpus string `json:"answer_corpus"` + Score float64 `json:"score"` // 0..1; higher = better outcome + RecordedAtNs int64 `json:"recorded_at_ns"` + Tags []string `json:"tags,omitempty"` +} + +// Validate returns an error if the entry is missing required fields. +// Callers should validate before storage so bad data doesn't pollute +// the corpus. +func (p PlaybookEntry) Validate() error { + if p.QueryText == "" { + return errors.New("playbook: query_text required") + } + if p.AnswerID == "" { + return errors.New("playbook: answer_id required") + } + if p.AnswerCorpus == "" { + return errors.New("playbook: answer_corpus required") + } + if p.Score < 0 || p.Score > 1 { + return errors.New("playbook: score must be in [0, 1]") + } + return nil +} + +// BoostFactor returns the multiplier applied to a result's distance +// when this playbook entry matches it. Lower is better: +// +// score = 0 → 1.0 (no boost) +// score = 0.5 → 0.75 (mild boost) +// score = 1.0 → 0.5 (halve the distance — strong boost) +// +// Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety. +// +// Why halving as the maximum boost: a perfect-confidence playbook +// entry shouldn't completely override the base embedding (that +// invites runaway feedback loops where one early playbook +// dominates forever). Halving is enough to move a mid-rank result +// to the top in most cases without erasing the base ranking +// signal. +func (p PlaybookEntry) BoostFactor() float64 { + score := p.Score + if score < 0 { + score = 0 + } + if score > 1 { + score = 1 + } + return 1.0 - 0.5*score +} + +// MarshalMetadata serializes the entry as the JSON RawMessage that +// vectord stores per item. Convenience for the recorder. +func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) { + return json.Marshal(p) +} + +// UnmarshalPlaybookMetadata is the inverse — used when fetching +// playbook hits to decode their metadata back into entries. +func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) { + var e PlaybookEntry + if len(raw) == 0 { + return e, errors.New("playbook: empty metadata") + } + if err := json.Unmarshal(raw, &e); err != nil { + return e, err + } + return e, nil +} + +// NewPlaybookEntry stamps RecordedAtNs to now and returns the entry. +// Validation happens at storage; this is just construction. +func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry { + return PlaybookEntry{ + QueryText: query, + AnswerID: answerID, + AnswerCorpus: answerCorpus, + Score: score, + RecordedAtNs: time.Now().UnixNano(), + Tags: tags, + } +} + +// PlaybookHit is one similarity-search result from the playbook +// corpus, paired with its decoded entry. Distance is the cosine +// distance between the current query and this past playbook's +// query vector — used by the caller to filter out "too far" +// matches via PlaybookMaxDistance. +type PlaybookHit struct { + PlaybookID string `json:"playbook_id"` + Distance float32 `json:"distance"` + Entry PlaybookEntry `json:"entry"` +} + +// ApplyPlaybookBoost re-ranks results in place using matched +// playbook hits. For each hit whose (AnswerID, AnswerCorpus) +// matches a result, multiply that result's distance by the hit's +// BoostFactor. If multiple hits match the same result, the highest- +// score one wins (greatest reduction in distance). +// +// After applying boosts, results are re-sorted ascending by +// distance. +// +// Returns the number of distinct results that received a boost. +// Callers can log this as a signal of "how much the playbook +// influenced this query." +func ApplyPlaybookBoost(results []Result, hits []PlaybookHit) int { + if len(hits) == 0 || len(results) == 0 { + return 0 + } + + // For each result, find the hit with the lowest BoostFactor + // (= largest boost = highest score, since BoostFactor is + // 1-0.5*score and we minimize). + bestBoost := make(map[int]float64, len(results)) + for i, r := range results { + for _, h := range hits { + if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus { + continue + } + bf := h.Entry.BoostFactor() + if cur, ok := bestBoost[i]; !ok || bf < cur { + bestBoost[i] = bf + } + } + } + + for i, bf := range bestBoost { + results[i].Distance = float32(float64(results[i].Distance) * bf) + } + + sort.SliceStable(results, func(i, j int) bool { + return results[i].Distance < results[j].Distance + }) + + return len(bestBoost) +} diff --git a/internal/matrix/playbook_test.go b/internal/matrix/playbook_test.go new file mode 100644 index 0000000..ebab590 --- /dev/null +++ b/internal/matrix/playbook_test.go @@ -0,0 +1,180 @@ +package matrix + +import ( + "encoding/json" + "testing" +) + +func TestPlaybookEntry_Validate(t *testing.T) { + good := PlaybookEntry{ + QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 0.5, + } + if err := good.Validate(); err != nil { + t.Errorf("good entry should validate: %v", err) + } + + cases := []struct { + name string + entry PlaybookEntry + }{ + {"empty query", PlaybookEntry{AnswerID: "y", AnswerCorpus: "z", Score: 0.5}}, + {"empty answer id", PlaybookEntry{QueryText: "x", AnswerCorpus: "z", Score: 0.5}}, + {"empty corpus", PlaybookEntry{QueryText: "x", AnswerID: "y", Score: 0.5}}, + {"score too high", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 1.5}}, + {"score negative", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: -0.1}}, + } + for _, c := range cases { + if err := c.entry.Validate(); err == nil { + t.Errorf("%s: expected validation error, got nil", c.name) + } + } +} + +func TestPlaybookEntry_BoostFactor(t *testing.T) { + cases := []struct { + score float64 + want float64 + }{ + {0.0, 1.0}, + {0.5, 0.75}, + {1.0, 0.5}, + {-0.1, 1.0}, // clamped + {1.5, 0.5}, // clamped + } + for _, c := range cases { + got := PlaybookEntry{Score: c.score}.BoostFactor() + if abs(got-c.want) > 1e-9 { + t.Errorf("BoostFactor(score=%.2f): want %.4f, got %.4f", c.score, c.want, got) + } + } +} + +func TestApplyPlaybookBoost_NoHitsLeaveResultsAlone(t *testing.T) { + results := []Result{ + {ID: "a", Distance: 0.1, Corpus: "x"}, + {ID: "b", Distance: 0.2, Corpus: "x"}, + } + n := ApplyPlaybookBoost(results, nil) + if n != 0 { + t.Errorf("expected 0 boosted, got %d", n) + } + if results[0].ID != "a" || results[1].ID != "b" { + t.Errorf("results reordered without hits: %v", results) + } +} + +func TestApplyPlaybookBoost_BoostMovesResultUp(t *testing.T) { + // Initial: a (0.10) beats b (0.20) beats c (0.30). + // Playbook says (answer=c, score=1.0) should be boosted → c's + // distance becomes 0.30 * 0.5 = 0.15. New ordering: a, c, b. + results := []Result{ + {ID: "a", Distance: 0.10, Corpus: "x"}, + {ID: "b", Distance: 0.20, Corpus: "x"}, + {ID: "c", Distance: 0.30, Corpus: "x"}, + } + hits := []PlaybookHit{ + {PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{ + AnswerID: "c", AnswerCorpus: "x", Score: 1.0, + }}, + } + n := ApplyPlaybookBoost(results, hits) + if n != 1 { + t.Errorf("expected 1 boosted, got %d", n) + } + if results[0].ID != "a" || results[1].ID != "c" || results[2].ID != "b" { + t.Errorf("expected order a,c,b after boost; got %v", idsOf(results)) + } + if abs(float64(results[1].Distance)-0.15) > 1e-6 { + t.Errorf("expected c distance 0.15 after boost; got %.4f", results[1].Distance) + } +} + +func TestApplyPlaybookBoost_HighestScoreWinsForSameAnswer(t *testing.T) { + results := []Result{ + {ID: "a", Distance: 0.30, Corpus: "x"}, + } + // Two playbook hits both pointing at "a". Score=0.4 (weak boost) + // + Score=0.9 (strong boost). Strong should win — distance gets + // multiplied by 1-0.5*0.9 = 0.55, not by 1-0.5*0.4 = 0.80. + hits := []PlaybookHit{ + {PlaybookID: "p_weak", Distance: 0.05, Entry: PlaybookEntry{ + AnswerID: "a", AnswerCorpus: "x", Score: 0.4, + }}, + {PlaybookID: "p_strong", Distance: 0.05, Entry: PlaybookEntry{ + AnswerID: "a", AnswerCorpus: "x", Score: 0.9, + }}, + } + ApplyPlaybookBoost(results, hits) + wantDist := 0.30 * 0.55 + if abs(float64(results[0].Distance)-wantDist) > 1e-6 { + t.Errorf("strong-score boost should win: want %.4f, got %.4f", wantDist, results[0].Distance) + } +} + +func TestApplyPlaybookBoost_CorpusAttributionRespected(t *testing.T) { + // Playbook references answer_id="a" in corpus="x". + // Results have answer_id="a" in corpus="y" — DIFFERENT corpus. + // Boost should NOT apply; the (id, corpus) tuple is the join key, + // not just id (otherwise different-corpus collisions would create + // false positives). + results := []Result{ + {ID: "a", Distance: 0.30, Corpus: "y"}, + } + hits := []PlaybookHit{ + {PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{ + AnswerID: "a", AnswerCorpus: "x", Score: 1.0, + }}, + } + n := ApplyPlaybookBoost(results, hits) + if n != 0 { + t.Errorf("cross-corpus collision should not boost: got %d", n) + } + if abs(float64(results[0].Distance)-0.30) > 1e-6 { + // 1e-6 tolerance accounts for float32→float64 conversion; + // the assertion that matters is "unchanged from input." + t.Errorf("distance should be unchanged: got %.6f", results[0].Distance) + } +} + +func TestPlaybookEntry_RoundTripJSON(t *testing.T) { + e := NewPlaybookEntry("forklift query", "w-12345", "workers", 0.85, []string{"chicago", "verified"}) + raw, err := e.MarshalMetadata() + if err != nil { + t.Fatalf("marshal: %v", err) + } + got, err := UnmarshalPlaybookMetadata(raw) + if err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.QueryText != e.QueryText || got.AnswerID != e.AnswerID || + got.AnswerCorpus != e.AnswerCorpus || got.Score != e.Score { + t.Errorf("round-trip mismatch: want %+v, got %+v", e, got) + } + if len(got.Tags) != 2 || got.Tags[0] != "chicago" { + t.Errorf("tags lost in round-trip: %v", got.Tags) + } + if got.RecordedAtNs == 0 { + t.Error("RecordedAtNs not set by NewPlaybookEntry") + } +} + +func TestUnmarshalPlaybookMetadata_RejectsEmpty(t *testing.T) { + if _, err := UnmarshalPlaybookMetadata(json.RawMessage{}); err == nil { + t.Error("empty metadata should error") + } +} + +func abs(f float64) float64 { + if f < 0 { + return -f + } + return f +} + +func idsOf(rs []Result) []string { + out := make([]string, len(rs)) + for i, r := range rs { + out[i] = r.ID + } + return out +} diff --git a/internal/matrix/retrieve.go b/internal/matrix/retrieve.go index 8b1c9ed..a0b9504 100644 --- a/internal/matrix/retrieve.go +++ b/internal/matrix/retrieve.go @@ -17,12 +17,16 @@ package matrix import ( "bytes" "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "errors" "fmt" "io" + "log/slog" "net/http" "sort" + "strings" "sync" "time" @@ -42,21 +46,38 @@ type Result struct { // SearchRequest is the matrix search input. Either QueryText (matrix // embeds it via embedd) or QueryVector (already embedded by caller) // must be set; QueryVector takes precedence if both supplied. +// +// Playbook fields (component 5 — learning loop): +// UsePlaybook=true: after normal retrieve+merge, fetch top similar +// past queries from PlaybookCorpus and apply distance boost to +// any current results that match a recorded answer. +// PlaybookCorpus: index name; empty = DefaultPlaybookCorpus. +// PlaybookTopK: number of similar past queries to consider; 0 = +// DefaultPlaybookTopK. +// PlaybookMaxDistance: cosine ceiling for "similar enough"; 0 = +// DefaultPlaybookMaxDistance. type SearchRequest struct { - QueryText string `json:"query_text,omitempty"` - QueryVector []float32 `json:"query_vector,omitempty"` - Corpora []string `json:"corpora"` - K int `json:"k"` - PerCorpusK int `json:"per_corpus_k,omitempty"` - Model string `json:"model,omitempty"` + QueryText string `json:"query_text,omitempty"` + QueryVector []float32 `json:"query_vector,omitempty"` + Corpora []string `json:"corpora"` + K int `json:"k"` + PerCorpusK int `json:"per_corpus_k,omitempty"` + Model string `json:"model,omitempty"` + UsePlaybook bool `json:"use_playbook,omitempty"` + PlaybookCorpus string `json:"playbook_corpus,omitempty"` + PlaybookTopK int `json:"playbook_top_k,omitempty"` + PlaybookMaxDistance float64 `json:"playbook_max_distance,omitempty"` } // SearchResponse wraps the merged results plus per-corpus return // counts so callers can detect "this corpus returned nothing" -// without re-querying. +// without re-querying. PlaybookBoosted is the count of results that +// received a boost from playbook memory; useful for telemetry on +// "how much the learning loop influenced this query." type SearchResponse struct { Results []Result `json:"results"` PerCorpusCounts map[string]int `json:"per_corpus_counts"` + PlaybookBoosted int `json:"playbook_boosted,omitempty"` } // Retriever holds the HTTP clients to embedd and vectord. Stateless @@ -158,7 +179,178 @@ func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchRespo if len(allHits) > req.K { allHits = allHits[:req.K] } - return &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus}, nil + resp := &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus} + + // Playbook boost (component 5). Reuses the query vector — no + // extra embed call. If the playbook corpus doesn't exist (first + // search before any Record), the lookup gracefully no-ops. + if req.UsePlaybook { + hits, err := r.fetchPlaybookHits(ctx, qvec, req) + if err != nil { + // Don't fail the whole search on playbook errors — the + // boost is opportunistic. Log + continue. + slog.Warn("matrix: playbook lookup failed; skipping boost", "err", err) + } else if len(hits) > 0 { + resp.PlaybookBoosted = ApplyPlaybookBoost(resp.Results, hits) + } + } + + return resp, nil +} + +// fetchPlaybookHits queries the playbook corpus with the same query +// vector and returns hits whose decoded entries are within +// PlaybookMaxDistance. A missing playbook corpus returns nil + nil +// (legitimate no-op state for a system before any Record call). +func (r *Retriever) fetchPlaybookHits(ctx context.Context, qvec []float32, req SearchRequest) ([]PlaybookHit, error) { + corpus := req.PlaybookCorpus + if corpus == "" { + corpus = DefaultPlaybookCorpus + } + topK := req.PlaybookTopK + if topK <= 0 { + topK = DefaultPlaybookTopK + } + maxDist := req.PlaybookMaxDistance + if maxDist <= 0 { + maxDist = DefaultPlaybookMaxDistance + } + + rawHits, err := r.searchCorpus(ctx, corpus, qvec, topK) + if err != nil { + // vectord returns 404 for missing index. We treat that as + // "no playbook yet" — legitimate cold-start state, not an + // error. + if strings.Contains(err.Error(), "status 404") { + return nil, nil + } + return nil, err + } + + out := make([]PlaybookHit, 0, len(rawHits)) + for _, h := range rawHits { + if float64(h.Distance) > maxDist { + continue + } + entry, err := UnmarshalPlaybookMetadata(h.Metadata) + if err != nil { + slog.Warn("matrix: skip malformed playbook entry", "id", h.ID, "err", err) + continue + } + out = append(out, PlaybookHit{ + PlaybookID: h.ID, + Distance: h.Distance, + Entry: entry, + }) + } + return out, nil +} + +// Record stores a (query → answer_id) playbook entry in the +// playbook corpus. Embeds the query via embedd, ensures the corpus +// exists (idempotent create), and writes the entry as one vectord +// item with the entry's JSON in metadata. +// +// Uses a deterministic ID derived from (query_text, answer_id, +// answer_corpus) so re-recording the same triple upserts (last +// score wins). Callers wanting to accumulate distinct samples can +// vary one of the three. +// +// corpus="" defaults to DefaultPlaybookCorpus. +func (r *Retriever) Record(ctx context.Context, entry PlaybookEntry, corpus string) (string, error) { + if err := entry.Validate(); err != nil { + return "", err + } + if corpus == "" { + corpus = DefaultPlaybookCorpus + } + + qvec, err := r.embed(ctx, entry.QueryText, "") + if err != nil { + return "", fmt.Errorf("playbook record embed: %w", err) + } + + if err := r.ensureCorpus(ctx, corpus, len(qvec)); err != nil { + return "", fmt.Errorf("playbook ensure corpus: %w", err) + } + + if entry.RecordedAtNs == 0 { + entry.RecordedAtNs = time.Now().UnixNano() + } + + pbID := playbookID(entry.QueryText, entry.AnswerID, entry.AnswerCorpus) + + meta, err := entry.MarshalMetadata() + if err != nil { + return "", err + } + + if err := r.addItem(ctx, corpus, pbID, qvec, meta); err != nil { + return "", fmt.Errorf("playbook add: %w", err) + } + return pbID, nil +} + +// playbookID is sha256-truncated 8 bytes (16 hex chars) prefixed +// with "pb-". Deterministic on (query, answer_id, answer_corpus). +func playbookID(query, answerID, answerCorpus string) string { + h := sha256.Sum256([]byte(query + "|" + answerID + "|" + answerCorpus)) + return "pb-" + hex.EncodeToString(h[:8]) +} + +// ensureCorpus creates a vectord index if it doesn't exist. +// 201 = created; 409 = already exists; both fine for idempotent use. +func (r *Retriever) ensureCorpus(ctx context.Context, name string, dim int) error { + body, err := json.Marshal(map[string]any{ + "name": name, "dimension": dim, "distance": "cosine", + }) + if err != nil { + return err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, + r.vectordURL+"/vectors/index", bytes.NewReader(body)) + if err != nil { + return err + } + httpReq.Header.Set("Content-Type", "application/json") + resp, err := r.httpClient.Do(httpReq) + if err != nil { + return err + } + defer resp.Body.Close() + io.Copy(io.Discard, resp.Body) + if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict { + return nil + } + return fmt.Errorf("ensure %q: status %d", name, resp.StatusCode) +} + +// addItem POSTs a single-item batch to /vectors/index/{name}/add. +func (r *Retriever) addItem(ctx context.Context, corpus, id string, vec []float32, meta json.RawMessage) error { + body, err := json.Marshal(map[string]any{ + "items": []map[string]any{ + {"id": id, "vector": vec, "metadata": meta}, + }, + }) + if err != nil { + return err + } + url := r.vectordURL + "/vectors/index/" + corpus + "/add" + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return err + } + httpReq.Header.Set("Content-Type", "application/json") + resp, err := r.httpClient.Do(httpReq) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("add %q: status %d: %s", corpus, resp.StatusCode, b) + } + return nil } // Corpora returns the list of vectord index names. Thin proxy to diff --git a/scripts/playbook_smoke.sh b/scripts/playbook_smoke.sh new file mode 100755 index 0000000..ebf0ca9 --- /dev/null +++ b/scripts/playbook_smoke.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# Playbook smoke — learning-loop integration end-to-end. +# All assertions go through gateway :3110. +# +# Validates the full boost cycle: +# 1. Build a test corpus with 3 items +# 2. Query → get baseline ranking +# 3. Record a playbook: query → bottom-ranked answer with score=1.0 +# 4. Re-query with use_playbook=true +# 5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost +# math: distance' = distance × (1 - 0.5×score)) +# 6. Assert: PlaybookBoosted >= 1 in the response +# +# Requires Ollama on :11434 with nomic-embed-text loaded — Record +# embeds the query_text. Skips (exit 0) when Ollama is absent. + +set -euo pipefail +cd "$(dirname "$0")/.." + +export PATH="$PATH:/usr/local/go/bin" + +if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then + echo "[playbook-smoke] Ollama not reachable on :11434 — skipping" + exit 0 +fi + +echo "[playbook-smoke] building stack..." +go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway + +pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true +sleep 0.3 + +PIDS=() +TMP="$(mktemp -d)" +CFG="$TMP/playbook.toml" + +cleanup() { + echo "[playbook-smoke] cleanup" + for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done + rm -rf "$TMP" +} +trap cleanup EXIT INT TERM + +cat > "$CFG" </dev/null 2>&1; then return 0; fi + sleep 0.05 + done + return 1 +} + +echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..." +./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!) +poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; } +./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!) +poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; } +./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!) +poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; } +./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!) +poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; } + +FAILED=0 + +# Embed three corpus items + the query, all via /v1/embed. +echo "[playbook-smoke] embedding 3 corpus items + query..." +EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \ + -H 'Content-Type: application/json' \ + -d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')" +V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')" +V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')" +V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')" +V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')" + +# Build corpus +echo "[playbook-smoke] create corpus widgets + add 3 items..." +curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \ + -H 'Content-Type: application/json' \ + -d '{"name":"widgets","dimension":768,"distance":"cosine"}' +curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \ + -H 'Content-Type: application/json' \ + -d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \ + '{items:[ + {id:"widget-a", vector:$va, metadata:{label:"a"}}, + {id:"widget-b", vector:$vb, metadata:{label:"b"}}, + {id:"widget-c", vector:$vc, metadata:{label:"c"}} + ]}')" + +# Baseline matrix search (no playbook) — using query_vector to skip +# embedd round-trip and keep the test deterministic on the geometry +# we know. +echo "[playbook-smoke] baseline search (no playbook):" +BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \ + -H 'Content-Type: application/json' \ + -d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")" +BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')" +BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')" +echo " baseline order: $BASE_ORDER widget-c distance=$BASE_C_DIST" + +# Record a playbook entry for the query → widget-c (use the same +# query_text that the playbook will be re-queried by, exact match). +QUERY_TEXT="alpha staffing query test full prompt" +echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0" +RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \ + -H 'Content-Type: application/json' \ + -d "$(jq -n --arg q "$QUERY_TEXT" \ + '{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")" +PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')" +if [ -z "$PB_ID" ]; then + echo " ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1 +else + echo " ✓ playbook_id=$PB_ID" +fi + +# Re-search with use_playbook=true. Use query_text so matrixd embeds +# it again (proves end-to-end). The newly-recorded playbook entry has +# the SAME query_text → cosine distance ~0 → boost applies to widget-c. +echo "[playbook-smoke] boosted search (use_playbook=true):" +BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \ + -H 'Content-Type: application/json' \ + -d "$(jq -n --arg q "$QUERY_TEXT" \ + '{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")" +BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')" +BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')" +PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')" +echo " boosted order: $BOOST_ORDER widget-c distance=$BOOST_C_DIST playbook_boosted=$PB_BOOSTED" + +# ── Assertion 1: PlaybookBoosted >= 1 ──────────────────────────── +if [ "$PB_BOOSTED" -ge 1 ]; then + echo " ✓ playbook_boosted=$PB_BOOSTED ≥ 1" +else + echo " ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1 +fi + +# ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor) +# Allow some tolerance because the query and recorded query may not +# be byte-identical depending on Ollama's tokenization stability. +RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')" +echo " widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)" +WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')" +if [ "$WITHIN" = "true" ]; then + echo " ✓ ratio in [0.40, 0.60] — boost applied correctly" +else + echo " ✗ ratio out of band: $RATIO"; FAILED=1 +fi + +if [ "$FAILED" -eq 0 ]; then + echo "[playbook-smoke] Playbook acceptance gate: PASSED" + exit 0 +else + echo "[playbook-smoke] Playbook acceptance gate: FAILED" + exit 1 +fi