// Package matrix is the multi-corpus retrieval layer above vectord. // Per docs/SPEC.md §3.4: the matrix indexer composes N single-corpus // vectord indexes into one retrieve+merge surface, with corpus // attribution preserved per result. Future work in the same package: // relevance filter, strong-model downgrade gate, learning-loop // integration. This file is component 2 of the dependency-ordered // port plan — multi-corpus retrieve+merge, no filter yet. // // Why corpus-as-shard rather than hash-shard a single index: // different corpora have distinct topology and distinct retrieval // intent (workers vs candidates vs scrum_findings vs lakehouse_arch). // Multi-corpus search merges across them by distance — that IS the // matrix indexer's whole purpose. See feedback_meta_index_vision.md // and project_small_model_pipeline_vision.md. package matrix import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "sort" "sync" "time" "git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord" ) // Result is one merged hit with corpus attribution. The corpus field // is load-bearing — losing it would defeat the matrix's purpose // (knowing WHICH corpus contributed each hit is half the signal). type Result struct { ID string `json:"id"` Distance float32 `json:"distance"` Corpus string `json:"corpus"` Metadata json.RawMessage `json:"metadata,omitempty"` } // SearchRequest is the matrix search input. Either QueryText (matrix // embeds it via embedd) or QueryVector (already embedded by caller) // must be set; QueryVector takes precedence if both supplied. type SearchRequest struct { QueryText string `json:"query_text,omitempty"` QueryVector []float32 `json:"query_vector,omitempty"` Corpora []string `json:"corpora"` K int `json:"k"` PerCorpusK int `json:"per_corpus_k,omitempty"` Model string `json:"model,omitempty"` } // SearchResponse wraps the merged results plus per-corpus return // counts so callers can detect "this corpus returned nothing" // without re-querying. type SearchResponse struct { Results []Result `json:"results"` PerCorpusCounts map[string]int `json:"per_corpus_counts"` } // Retriever holds the HTTP clients to embedd and vectord. Stateless // otherwise — safe to share across goroutines. type Retriever struct { httpClient *http.Client embeddURL string vectordURL string } // New returns a Retriever configured to call embedd at embeddURL // and vectord at vectordURL (both gateway-internal upstreams, // usually 127.0.0.1:3216 and :3215 respectively). func New(embeddURL, vectordURL string) *Retriever { return &Retriever{ httpClient: &http.Client{Timeout: 30 * time.Second}, embeddURL: embeddURL, vectordURL: vectordURL, } } // Errors surfaced to HTTP handlers. var ( ErrEmptyCorpora = errors.New("matrix: corpora must be non-empty") ErrEmptyQuery = errors.New("matrix: query_text or query_vector required") ErrCorpus = errors.New("matrix: corpus search failed") // wraps vectord errors ErrEmbed = errors.New("matrix: embed failed") ) // Search runs the matrix retrieve+merge. // // Error policy: fail-loud on any corpus error. Silent partial results // would lie about what was actually searched, which defeats the // indexer's coverage guarantee. Callers that want best-effort can // catch the error and re-issue with a smaller corpora list. func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) { if len(req.Corpora) == 0 { return nil, ErrEmptyCorpora } if req.K <= 0 { return nil, errors.New("matrix: k must be > 0") } if req.PerCorpusK <= 0 { req.PerCorpusK = req.K } // Resolve query → vector. qvec := req.QueryVector if len(qvec) == 0 { if req.QueryText == "" { return nil, ErrEmptyQuery } v, err := r.embed(ctx, req.QueryText, req.Model) if err != nil { return nil, fmt.Errorf("%w: %v", ErrEmbed, err) } qvec = v } // Parallel search across corpora. Each shard is independent; // fan-out + collect with WaitGroup is cleaner than channels-only. type shardResult struct { corpus string hits []vectord.Result err error } results := make([]shardResult, len(req.Corpora)) var wg sync.WaitGroup for i, c := range req.Corpora { wg.Add(1) go func(i int, corpus string) { defer wg.Done() hits, err := r.searchCorpus(ctx, corpus, qvec, req.PerCorpusK) results[i] = shardResult{corpus: corpus, hits: hits, err: err} }(i, c) } wg.Wait() var allHits []Result perCorpus := make(map[string]int, len(req.Corpora)) for _, s := range results { if s.err != nil { return nil, fmt.Errorf("%w: %s: %v", ErrCorpus, s.corpus, s.err) } perCorpus[s.corpus] = len(s.hits) for _, h := range s.hits { allHits = append(allHits, Result{ ID: h.ID, Distance: h.Distance, Corpus: s.corpus, Metadata: h.Metadata, }) } } // Stable sort so equal-distance ties keep input order (which is // per-corpus order from vectord's HNSW result heap). This matters // for deterministic test assertions. sort.SliceStable(allHits, func(i, j int) bool { return allHits[i].Distance < allHits[j].Distance }) if len(allHits) > req.K { allHits = allHits[:req.K] } return &SearchResponse{Results: allHits, PerCorpusCounts: perCorpus}, nil } // Corpora returns the list of vectord index names. Thin proxy to // GET /vectors/index — exposed at the matrix layer so callers don't // need direct vectord access. func (r *Retriever) Corpora(ctx context.Context) ([]string, error) { url := r.vectordURL + "/vectors/index" httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("vectord index list: status %d: %s", resp.StatusCode, b) } var out struct { Names []string `json:"names"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } return out.Names, nil } // embed POSTs a single-text /embed call. Reuses embedd's batched // /embed shape with len(texts)==1; embedd's LRU cache absorbs // repeat queries (commit 56844c3). func (r *Retriever) embed(ctx context.Context, text, model string) ([]float32, error) { body, err := json.Marshal(map[string]any{"texts": []string{text}, "model": model}) if err != nil { return nil, err } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.embeddURL+"/embed", bytes.NewReader(body)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, b) } var out struct { Vectors [][]float32 `json:"vectors"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } if len(out.Vectors) == 0 { return nil, errors.New("embed returned no vectors") } return out.Vectors[0], nil } // searchCorpus calls vectord /vectors/index/{name}/search. func (r *Retriever) searchCorpus(ctx context.Context, corpus string, vec []float32, k int) ([]vectord.Result, error) { body, err := json.Marshal(map[string]any{"vector": vec, "k": k}) if err != nil { return nil, err } url := r.vectordURL + "/vectors/index/" + corpus + "/search" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") resp, err := r.httpClient.Do(httpReq) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("status %d: %s", resp.StatusCode, b) } var out struct { Results []vectord.Result `json:"results"` } if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } return out.Results, nil }