root c1d96b7b60 matrixd: multi-corpus retrieve+merge — SPEC §3.4 component 2 of 5
Lands the matrix indexer's first piece per docs/SPEC.md §3.4:
multi-corpus retrieve+merge with corpus attribution per result.
Future components (relevance filter, downgrade gate, learning-loop
integration) layer on top of this surface.

Architecture:
  - internal/matrix/retrieve.go — Retriever takes (query, corpora,
    k, per_corpus_k), parallel-fans across vectord indexes, merges
    by distance ascending, preserves corpus origin per hit
  - cmd/matrixd — HTTP service on :3217, fronts /v1/matrix/*
  - gateway proxy + [matrixd] config + lakehouse.toml entry
  - Either query_text (matrix calls embedd) or query_vector
    (caller pre-embedded) — vector takes precedence if both set

Error policy: fail-loud on any corpus error. Silent partial returns
would lie about coverage, defeating the matrix's whole purpose.
Bubbles vectord errors as 502 (upstream), validation as 400.

Smoke (scripts/matrix_smoke.sh, 6 assertions PASS first try):
  - /matrix/corpora lists indexes
  - Multi-corpus search returns hits from BOTH corpora
  - Top hit is the globally-closest across all corpora
    (b-near beats a-near at distance 0.05 vs 0.1 — proves merge)
  - Metadata round-trips through the merge
  - Distances ascending in result list
  - Negative paths: empty corpora → 400, missing corpus → 502,
    no query → 400

12-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 18:39:17 -05:00

254 lines
8.0 KiB
Go

// Package matrix is the multi-corpus retrieval layer above vectord.
// Per docs/SPEC.md §3.4: the matrix indexer composes N single-corpus
// vectord indexes into one retrieve+merge surface, with corpus
// attribution preserved per result. Future work in the same package:
// relevance filter, strong-model downgrade gate, learning-loop
// integration. This file is component 2 of the dependency-ordered
// port plan — multi-corpus retrieve+merge, no filter yet.
//
// Why corpus-as-shard rather than hash-shard a single index:
// different corpora have distinct topology and distinct retrieval
// intent (workers vs candidates vs scrum_findings vs lakehouse_arch).
// Multi-corpus search merges across them by distance — that IS the
// matrix indexer's whole purpose. See feedback_meta_index_vision.md
// and project_small_model_pipeline_vision.md.
package matrix
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"sort"
"sync"
"time"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord"
)
// Result is one merged hit with corpus attribution. The corpus field
// is load-bearing — losing it would defeat the matrix's purpose
// (knowing WHICH corpus contributed each hit is half the signal).
type Result struct {
ID string `json:"id"`
Distance float32 `json:"distance"`
Corpus string `json:"corpus"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
// SearchRequest is the matrix search input. Either QueryText (matrix
// embeds it via embedd) or QueryVector (already embedded by caller)
// must be set; QueryVector takes precedence if both supplied.
type SearchRequest struct {
QueryText string `json:"query_text,omitempty"`
QueryVector []float32 `json:"query_vector,omitempty"`
Corpora []string `json:"corpora"`
K int `json:"k"`
PerCorpusK int `json:"per_corpus_k,omitempty"`
Model string `json:"model,omitempty"`
}
// SearchResponse wraps the merged results plus per-corpus return
// counts so callers can detect "this corpus returned nothing"
// without re-querying.
type SearchResponse struct {
Results []Result `json:"results"`
PerCorpusCounts map[string]int `json:"per_corpus_counts"`
}
// Retriever holds the HTTP clients to embedd and vectord. Stateless
// otherwise — safe to share across goroutines.
type Retriever struct {
httpClient *http.Client
embeddURL string
vectordURL string
}
// New returns a Retriever configured to call embedd at embeddURL
// and vectord at vectordURL (both gateway-internal upstreams,
// usually 127.0.0.1:3216 and :3215 respectively).
func New(embeddURL, vectordURL string) *Retriever {
return &Retriever{
httpClient: &http.Client{Timeout: 30 * time.Second},
embeddURL: embeddURL,
vectordURL: vectordURL,
}
}
// Errors surfaced to HTTP handlers.
var (
ErrEmptyCorpora = errors.New("matrix: corpora must be non-empty")
ErrEmptyQuery = errors.New("matrix: query_text or query_vector required")
ErrCorpus = errors.New("matrix: corpus search failed") // wraps vectord errors
ErrEmbed = errors.New("matrix: embed failed")
)
// Search runs the matrix retrieve+merge.
//
// Error policy: fail-loud on any corpus error. Silent partial results
// would lie about what was actually searched, which defeats the
// indexer's coverage guarantee. Callers that want best-effort can
// catch the error and re-issue with a smaller corpora list.
func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
if len(req.Corpora) == 0 {
return nil, ErrEmptyCorpora
}
if req.K <= 0 {
return nil, errors.New("matrix: k must be > 0")
}
if req.PerCorpusK <= 0 {
req.PerCorpusK = req.K
}
// Resolve query → vector.
qvec := req.QueryVector
if len(qvec) == 0 {
if req.QueryText == "" {
return nil, ErrEmptyQuery
}
v, err := r.embed(ctx, req.QueryText, req.Model)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrEmbed, err)
}
qvec = v
}
// Parallel search across corpora. Each shard is independent;
// fan-out + collect with WaitGroup is cleaner than channels-only.
type shardResult struct {
corpus string
hits []vectord.Result
err error
}
results := make([]shardResult, len(req.Corpora))
var wg sync.WaitGroup
for i, c := range req.Corpora {
wg.Add(1)
go func(i int, corpus string) {
defer wg.Done()
hits, err := r.searchCorpus(ctx, corpus, qvec, req.PerCorpusK)
results[i] = shardResult{corpus: corpus, hits: hits, err: err}
}(i, c)
}
wg.Wait()
var allHits []Result
perCorpus := make(map[string]int, len(req.Corpora))
for _, s := range results {
if s.err != nil {
return nil, fmt.Errorf("%w: %s: %v", ErrCorpus, s.corpus, s.err)
}
perCorpus[s.corpus] = len(s.hits)
for _, h := range s.hits {
allHits = append(allHits, Result{
ID: h.ID, Distance: h.Distance, Corpus: s.corpus, Metadata: h.Metadata,
})
}
}
// Stable sort so equal-distance ties keep input order (which is
// per-corpus order from vectord's HNSW result heap). This matters
// for deterministic test assertions.
sort.SliceStable(allHits, func(i, j int) bool {
return allHits[i].Distance < allHits[j].Distance
})
if len(allHits) > req.K {
allHits = allHits[:req.K]
}
return &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus}, nil
}
// Corpora returns the list of vectord index names. Thin proxy to
// GET /vectors/index — exposed at the matrix layer so callers don't
// need direct vectord access.
func (r *Retriever) Corpora(ctx context.Context) ([]string, error) {
url := r.vectordURL + "/vectors/index"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("vectord index list: status %d: %s", resp.StatusCode, b)
}
var out struct {
Names []string `json:"names"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
return out.Names, nil
}
// embed POSTs a single-text /embed call. Reuses embedd's batched
// /embed shape with len(texts)==1; embedd's LRU cache absorbs
// repeat queries (commit 56844c3).
func (r *Retriever) embed(ctx context.Context, text, model string) ([]float32, error) {
body, err := json.Marshal(map[string]any{"texts": []string{text}, "model": model})
if err != nil {
return nil, err
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.embeddURL+"/embed", bytes.NewReader(body))
if err != nil {
return nil, err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, b)
}
var out struct {
Vectors [][]float32 `json:"vectors"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
if len(out.Vectors) == 0 {
return nil, errors.New("embed returned no vectors")
}
return out.Vectors[0], nil
}
// searchCorpus calls vectord /vectors/index/{name}/search.
func (r *Retriever) searchCorpus(ctx context.Context, corpus string, vec []float32, k int) ([]vectord.Result, error) {
body, err := json.Marshal(map[string]any{"vector": vec, "k": k})
if err != nil {
return nil, err
}
url := r.vectordURL + "/vectors/index/" + corpus + "/search"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("status %d: %s", resp.StatusCode, b)
}
var out struct {
Results []vectord.Result `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
return out.Results, nil
}