// Package matrix is the multi-corpus retrieval layer above vectord. // Per docs/SPEC.md §3.4: the matrix indexer composes N single-corpus // vectord indexes into one retrieve+merge surface, with corpus // attribution preserved per result. Future work in the same package: // relevance filter, strong-model downgrade gate, learning-loop // integration. This file is component 2 of the dependency-ordered // port plan — multi-corpus retrieve+merge, no filter yet. // // Why corpus-as-shard rather than hash-shard a single index: // different corpora have distinct topology and distinct retrieval // intent (workers vs candidates vs scrum_findings vs lakehouse_arch). // Multi-corpus search merges across them by distance — that IS the // matrix indexer's whole purpose. See feedback_meta_index_vision.md // and project_small_model_pipeline_vision.md. package matrix import ( "bytes" "context" "crypto/sha256" "encoding/hex" "encoding/json" "errors" "fmt" "io" "log/slog" "net/http" "sort" "sync" "time" "git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord" ) // Result is one merged hit with corpus attribution. The corpus field // is load-bearing — losing it would defeat the matrix's purpose // (knowing WHICH corpus contributed each hit is half the signal). type Result struct { ID string `json:"id"` Distance float32 `json:"distance"` Corpus string `json:"corpus"` Metadata json.RawMessage `json:"metadata,omitempty"` } // SearchRequest is the matrix search input. Either QueryText (matrix // embeds it via embedd) or QueryVector (already embedded by caller) // must be set; QueryVector takes precedence if both supplied. // // Playbook fields (component 5 — learning loop): // UsePlaybook=true: after normal retrieve+merge, fetch top similar // past queries from PlaybookCorpus and apply distance boost to // any current results that match a recorded answer. // PlaybookCorpus: index name; empty = DefaultPlaybookCorpus. // PlaybookTopK: number of similar past queries to consider; 0 = // DefaultPlaybookTopK. // PlaybookMaxDistance: cosine ceiling for "similar enough" on the // BOOST path (re-rank in place); 0 = DefaultPlaybookMaxDistance. // PlaybookMaxInjectDistance: tighter cosine ceiling for the SHAPE B // INJECT path; 0 = DefaultPlaybookMaxInjectDistance. Splitting the // two thresholds is intentional — boost is safe at loose thresholds // because it only re-ranks results that already retrieved on their // own merits, while inject forces results in and so cross-pollinates // wrong-domain answers if the threshold is too loose. // // Metadata filter (post-retrieval structured gate): // MetadataFilter: map of metadata-field → expected value. Results // whose metadata doesn't match every key are dropped. Addresses // the reality-test gap surfaced in the candidates/workers // experiments — pure semantic retrieval can't gate by status, // state, etc. Caller can compensate for filter shrinkage by // requesting larger PerCorpusK. // Each filter value can be a single value (string|number|bool — // whatever JSON unmarshals to `any`) or a []any meaning "any // of these values" (OR semantics within one key, AND across keys). type SearchRequest struct { QueryText string `json:"query_text,omitempty"` QueryVector []float32 `json:"query_vector,omitempty"` Corpora []string `json:"corpora"` K int `json:"k"` PerCorpusK int `json:"per_corpus_k,omitempty"` Model string `json:"model,omitempty"` UsePlaybook bool `json:"use_playbook,omitempty"` PlaybookCorpus string `json:"playbook_corpus,omitempty"` PlaybookTopK int `json:"playbook_top_k,omitempty"` PlaybookMaxDistance float64 `json:"playbook_max_distance,omitempty"` PlaybookMaxInjectDistance float64 `json:"playbook_max_inject_distance,omitempty"` // JudgeURL: when set, every Shape B injection candidate is // rated by an LLM at this Ollama-shape /api/chat endpoint // (chatd's /v1/chat works too). Candidates with rating < // JudgeMinRating are skipped. Empty = no judge gate (current // behavior — distance-only filter). JudgeURL string `json:"judge_url,omitempty"` JudgeModel string `json:"judge_model,omitempty"` JudgeMinRating int `json:"judge_min_rating,omitempty"` // QueryRole is an optional role hint that gates Shape A boost + // Shape B inject on role match (real_001 cross-role bleed fix). // When set, a recorded playbook entry only fires for queries with // the same role under roleEqual's normalization. Empty here OR // empty on the recorded entry = gate disabled (preserves behavior // for callers that don't supply role — e.g. lift-suite multi- // constraint queries that have no clean single role). QueryRole string `json:"query_role,omitempty"` MetadataFilter map[string]any `json:"metadata_filter,omitempty"` // ExcludeIDs filters out specific worker IDs post-retrieval. // Real-world driver: a coordinator places 200 workers at a // contract, then mid-day the client asks for a different set — // the next query should NOT return the already-placed workers. // Filter runs after merge but before metadata filter, so an // excluded ID never wastes a slot in the post-filter top-K. // Also applies to playbook boost + Shape B inject — excluded // answers are skipped at injection time. ExcludeIDs []string `json:"exclude_ids,omitempty"` } // SearchResponse wraps the merged results plus per-corpus return // counts so callers can detect "this corpus returned nothing" // without re-querying. PlaybookBoosted is the count of results that // received a boost from playbook memory; useful for telemetry on // "how much the learning loop influenced this query." // MetadataFilterDropped is the count of results dropped by the // post-retrieval structured filter (when set in the request). type SearchResponse struct { Results []Result `json:"results"` PerCorpusCounts map[string]int `json:"per_corpus_counts"` PlaybookBoosted int `json:"playbook_boosted,omitempty"` // PlaybookInjected is Shape B's per-query metric: synthetic // results inserted from playbook hits whose answer wasn't already // in the regular retrieval. Distinct from PlaybookBoosted (which // counts in-place re-ranks of results that WERE present). PlaybookInjected int `json:"playbook_injected,omitempty"` MetadataFilterDropped int `json:"metadata_filter_dropped,omitempty"` } // Retriever holds the HTTP clients to embedd and vectord. Stateless // otherwise — safe to share across goroutines. type Retriever struct { httpClient *http.Client embeddURL string vectordURL string } // New returns a Retriever configured to call embedd at embeddURL // and vectord at vectordURL (both gateway-internal upstreams, // usually 127.0.0.1:3216 and :3215 respectively). func New(embeddURL, vectordURL string) *Retriever { return &Retriever{ httpClient: &http.Client{Timeout: 30 * time.Second}, embeddURL: embeddURL, vectordURL: vectordURL, } } // Errors surfaced to HTTP handlers. var ( ErrEmptyCorpora = errors.New("matrix: corpora must be non-empty") ErrEmptyQuery = errors.New("matrix: query_text or query_vector required") ErrCorpus = errors.New("matrix: corpus search failed") // wraps vectord errors ErrEmbed = errors.New("matrix: embed failed") ErrCorpusNotFound = errors.New("matrix: corpus not found") // distinct sentinel for vectord 404 ) // Search runs the matrix retrieve+merge. // // Error policy: fail-loud on any corpus error. Silent partial results // would lie about what was actually searched, which defeats the // indexer's coverage guarantee. Callers that want best-effort can // catch the error and re-issue with a smaller corpora list. func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) { if len(req.Corpora) == 0 { return nil, ErrEmptyCorpora } if req.K <= 0 { return nil, errors.New("matrix: k must be > 0") } if req.PerCorpusK <= 0 { req.PerCorpusK = req.K } // Resolve query → vector. qvec := req.QueryVector if len(qvec) == 0 { if req.QueryText == "" { return nil, ErrEmptyQuery } v, err := r.embed(ctx, req.QueryText, req.Model) if err != nil { return nil, fmt.Errorf("%w: %v", ErrEmbed, err) } qvec = v } // Parallel search across corpora. Each shard is independent; // fan-out + collect with WaitGroup is cleaner than channels-only. type shardResult struct { corpus string hits []vectord.Result err error } results := make([]shardResult, len(req.Corpora)) var wg sync.WaitGroup for i, c := range req.Corpora { wg.Add(1) go func(i int, corpus string) { defer wg.Done() hits, err := r.searchCorpus(ctx, corpus, qvec, req.PerCorpusK) results[i] = shardResult{corpus: corpus, hits: hits, err: err} }(i, c) } wg.Wait() var allHits []Result perCorpus := make(map[string]int, len(req.Corpora)) for _, s := range results { if s.err != nil { return nil, fmt.Errorf("%w: %s: %v", ErrCorpus, s.corpus, s.err) } perCorpus[s.corpus] = len(s.hits) for _, h := range s.hits { allHits = append(allHits, Result{ ID: h.ID, Distance: h.Distance, Corpus: s.corpus, Metadata: h.Metadata, }) } } // Stable sort so equal-distance ties keep input order (which is // per-corpus order from vectord's HNSW result heap). This matters // for deterministic test assertions. sort.SliceStable(allHits, func(i, j int) bool { return allHits[i].Distance < allHits[j].Distance }) // ExcludeIDs filter — applied first so excluded IDs don't waste // a slot in the post-filter top-K. Real-world driver: coordinator // has placed N workers at a contract; mid-day the client asks for // alternatives, so this query passes ExcludeIDs= and // gets back fresh candidates instead of the same N. if len(req.ExcludeIDs) > 0 { excludeSet := make(map[string]bool, len(req.ExcludeIDs)) for _, id := range req.ExcludeIDs { excludeSet[id] = true } kept := make([]Result, 0, len(allHits)) for _, h := range allHits { if !excludeSet[h.ID] { kept = append(kept, h) } } allHits = kept } // Metadata filter (component B — staffing-side structured gate). // Applied BEFORE top-K truncation so the filter doesn't accidentally // reduce coverage further. Caller can request larger PerCorpusK to // compensate when filters are aggressive. var dropped int if len(req.MetadataFilter) > 0 { filtered := make([]Result, 0, len(allHits)) for _, h := range allHits { if matchesMetadataFilter(h.Metadata, req.MetadataFilter) { filtered = append(filtered, h) } else { dropped++ } } allHits = filtered } if len(allHits) > req.K { allHits = allHits[:req.K] } resp := &SearchResponse{ Results: allHits, PerCorpusCounts: perCorpus, MetadataFilterDropped: dropped, } // Playbook (component 5) — both boost (re-rank existing) and // inject (Shape B: bring in answers that aren't in regular // retrieval). Reuses the query vector — no extra embed call. // Missing playbook corpus is a legitimate cold-start no-op. if req.UsePlaybook { hits, err := r.fetchPlaybookHits(ctx, qvec, req) if err != nil { slog.Warn("matrix: playbook lookup failed; skipping boost+inject", "err", err) } else if len(hits) > 0 { // Filter playbook hits to honor ExcludeIDs — without this, // an excluded answer in a playbook recording would re-enter // the result set via Shape B inject, defeating the swap // semantics that the exclude list exists to enforce. if len(req.ExcludeIDs) > 0 { excludeSet := make(map[string]bool, len(req.ExcludeIDs)) for _, id := range req.ExcludeIDs { excludeSet[id] = true } keptHits := make([]PlaybookHit, 0, len(hits)) for _, h := range hits { if !excludeSet[h.Entry.AnswerID] { keptHits = append(keptHits, h) } } hits = keptHits } resp.PlaybookBoosted = ApplyPlaybookBoost(req.QueryRole, resp.Results, hits) maxInjectDist := float32(req.PlaybookMaxInjectDistance) if maxInjectDist <= 0 { maxInjectDist = float32(DefaultPlaybookMaxInjectDistance) } // Optional LLM judge gate (per OPEN item #1). nil when // JudgeURL/JudgeModel are unset → distance-only filter. var gate InjectGate if g := NewLLMJudgeGate(req.JudgeURL, req.JudgeModel, req.JudgeMinRating, nil); g != nil { gate = g } var injected int resp.Results, injected = InjectPlaybookMisses(req.QueryText, req.QueryRole, resp.Results, hits, maxInjectDist, gate) resp.PlaybookInjected = injected if injected > 0 { // Re-sort + truncate after injection. ApplyPlaybookBoost // already sorted, but injection appends past the end — // resort to merge, then enforce K. sort.SliceStable(resp.Results, func(i, j int) bool { return resp.Results[i].Distance < resp.Results[j].Distance }) if len(resp.Results) > req.K { resp.Results = resp.Results[:req.K] } } } } return resp, nil } // fetchPlaybookHits queries the playbook corpus with the same query // vector and returns hits whose decoded entries are within // PlaybookMaxDistance. A missing playbook corpus returns nil + nil // (legitimate no-op state for a system before any Record call). func (r *Retriever) fetchPlaybookHits(ctx context.Context, qvec []float32, req SearchRequest) ([]PlaybookHit, error) { corpus := req.PlaybookCorpus if corpus == "" { corpus = DefaultPlaybookCorpus } topK := req.PlaybookTopK if topK <= 0 { topK = DefaultPlaybookTopK } maxDist := req.PlaybookMaxDistance if maxDist <= 0 { maxDist = DefaultPlaybookMaxDistance } rawHits, err := r.searchCorpus(ctx, corpus, qvec, topK) if errors.Is(err, ErrCorpusNotFound) { // Cold-start state: no Record call has happened yet, so the // playbook corpus doesn't exist. Legit no-op, not an error. return nil, nil } if err != nil { return nil, err } out := make([]PlaybookHit, 0, len(rawHits)) for _, h := range rawHits { if float64(h.Distance) > maxDist { continue } entry, err := UnmarshalPlaybookMetadata(h.Metadata) if err != nil { slog.Warn("matrix: skip malformed playbook entry", "id", h.ID, "err", err) continue } out = append(out, PlaybookHit{ PlaybookID: h.ID, Distance: h.Distance, Entry: entry, }) } return out, nil } // Record stores a (query → answer_id) playbook entry in the // playbook corpus. Embeds the query via embedd, ensures the corpus // exists (idempotent create), and writes the entry as one vectord // item with the entry's JSON in metadata. // // Uses a deterministic ID derived from (query_text, answer_id, // answer_corpus) so re-recording the same triple upserts (last // score wins). Callers wanting to accumulate distinct samples can // vary one of the three. // // corpus="" defaults to DefaultPlaybookCorpus. func (r *Retriever) Record(ctx context.Context, entry PlaybookEntry, corpus string) (string, error) { if err := entry.Validate(); err != nil { return "", err } if corpus == "" { corpus = DefaultPlaybookCorpus } qvec, err := r.embed(ctx, entry.QueryText, "") if err != nil { return "", fmt.Errorf("playbook record embed: %w", err) } if err := r.ensureCorpus(ctx, corpus, len(qvec)); err != nil { return "", fmt.Errorf("playbook ensure corpus: %w", err) } if entry.RecordedAtNs == 0 { entry.RecordedAtNs = time.Now().UnixNano() } pbID := playbookID(entry.QueryText, entry.AnswerID, entry.AnswerCorpus) meta, err := entry.MarshalMetadata() if err != nil { return "", err } if err := r.addItem(ctx, corpus, pbID, qvec, meta); err != nil { return "", fmt.Errorf("playbook add: %w", err) } return pbID, nil } // playbookID is sha256-truncated 8 bytes (16 hex chars) prefixed // with "pb-". Deterministic on (query, answer_id, answer_corpus). func playbookID(query, answerID, answerCorpus string) string { h := sha256.Sum256([]byte(query + "|" + answerID + "|" + answerCorpus)) return "pb-" + hex.EncodeToString(h[:8]) } // ensureCorpus creates a vectord index if it doesn't exist. // 201 = created; 409 = already exists; both fine for idempotent use. func (r *Retriever) ensureCorpus(ctx context.Context, name string, dim int) error { body, err := json.Marshal(map[string]any{ "name": name, "dimension": dim, "distance": "cosine", }) if err != nil { return err } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.vectordURL+"/vectors/index", bytes.NewReader(body)) if err != nil { return err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return err } defer resp.Body.Close() io.Copy(io.Discard, resp.Body) if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict { return nil } return fmt.Errorf("ensure %q: status %d", name, resp.StatusCode) } // addItem POSTs a single-item batch to /vectors/index/{name}/add. func (r *Retriever) addItem(ctx context.Context, corpus, id string, vec []float32, meta json.RawMessage) error { body, err := json.Marshal(map[string]any{ "items": []map[string]any{ {"id": id, "vector": vec, "metadata": meta}, }, }) if err != nil { return err } url := r.vectordURL + "/vectors/index/" + corpus + "/add" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return fmt.Errorf("add %q: status %d: %s", corpus, resp.StatusCode, b) } return nil } // matchesMetadataFilter reports whether a result's metadata satisfies // the filter. Each filter key must be present in the metadata; the // value must equal (or for a list filter, contain) the metadata // value. Missing key = drop. Type mismatches are JSON-equality // checked (e.g. filter wants 1 but metadata has 1.0 → match via // canonical JSON form). // // Filter value semantics: // string|number|bool → exact equality (after JSON normalization) // []any → OR within key (any element matching wins) // // AND across keys: every filter key must match. func matchesMetadataFilter(rawMeta json.RawMessage, filter map[string]any) bool { if len(filter) == 0 { return true } if len(rawMeta) == 0 { return false // no metadata can't satisfy any filter } var meta map[string]any if err := json.Unmarshal(rawMeta, &meta); err != nil { return false } for k, expected := range filter { got, present := meta[k] if !present { return false } if !valueMatches(got, expected) { return false } } return true } // valueMatches handles single-value and list-value filter semantics. // JSON-canonical equality so 1 ≡ 1.0 and "true" != true. func valueMatches(got, expected any) bool { if list, ok := expected.([]any); ok { for _, e := range list { if jsonEqual(got, e) { return true } } return false } return jsonEqual(got, expected) } // jsonEqual marshals both sides and compares the canonical forms. // Handles the float64-vs-int problem inherent to encoding/json // (which decodes all numbers as float64) — both sides go through // the same encoder so 1 == 1.0 if both came in as numbers. func jsonEqual(a, b any) bool { ab, errA := json.Marshal(a) bb, errB := json.Marshal(b) if errA != nil || errB != nil { return false } return string(ab) == string(bb) } // Corpora returns the list of vectord index names. Thin proxy to // GET /vectors/index — exposed at the matrix layer so callers don't // need direct vectord access. func (r *Retriever) Corpora(ctx context.Context) ([]string, error) { url := r.vectordURL + "/vectors/index" httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("vectord index list: status %d: %s", resp.StatusCode, b) } var out struct { Names []string `json:"names"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } return out.Names, nil } // embed POSTs a single-text /embed call. Reuses embedd's batched // /embed shape with len(texts)==1; embedd's LRU cache absorbs // repeat queries (commit 56844c3). func (r *Retriever) embed(ctx context.Context, text, model string) ([]float32, error) { body, err := json.Marshal(map[string]any{"texts": []string{text}, "model": model}) if err != nil { return nil, err } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.embeddURL+"/embed", bytes.NewReader(body)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, b) } var out struct { Vectors [][]float32 `json:"vectors"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } if len(out.Vectors) == 0 { return nil, errors.New("embed returned no vectors") } return out.Vectors[0], nil } // searchCorpus calls vectord /vectors/index/{name}/search. Returns // ErrCorpusNotFound (wrapped) on HTTP 404 so callers can distinguish // "this corpus doesn't exist" from "this corpus errored." Per // 2026-04-29 cross-lineage scrum (Opus + Kimi convergent): caught // the original strings.Contains "status 404" detection that would // silently break if the error format changed. func (r *Retriever) searchCorpus(ctx context.Context, corpus string, vec []float32, k int) ([]vectord.Result, error) { body, err := json.Marshal(map[string]any{"vector": vec, "k": k}) if err != nil { return nil, err } url := r.vectordURL + "/vectors/index/" + corpus + "/search" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode == http.StatusNotFound { return nil, fmt.Errorf("%w: %s", ErrCorpusNotFound, corpus) } if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("status %d: %s", resp.StatusCode, b) } var out struct { Results []vectord.Result `json:"results"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } return out.Results, nil }