root 06e71520c4 matrix: playbook memory + boost — SPEC §3.4 component 5 of 5 (LEARNING LOOP)
Closes SPEC §3.4. The matrix indexer is now a learning meta-index per
feedback_meta_index_vision.md — every successful (query → answer)
pair recorded via /matrix/playbooks/record boosts that answer for
future similar queries.

This is the architectural piece that lifts vectord from "static
hybrid search" to the meta-index J originally framed in Phase 19 of
the Rust system.

What's new:
  - internal/matrix/playbook.go — PlaybookEntry, PlaybookHit,
    ApplyPlaybookBoost. Pure-function boost math:
      distance' = distance * (1 - 0.5 * score)
    Score 0 = no boost (factor 1.0); score 1 = halve distance
    (factor 0.5). Capped at 0.5 deliberately so a single high-
    confidence playbook can't dominate the base ranking forever
    (runaway-feedback-loop guard).
  - Retriever.Record(entry, corpus) — embeds query_text, ensures
    playbook corpus exists (idempotent), upserts via deterministic
    sha256-derived ID (last score wins on re-record of same triple).
  - Retriever.Search extended with UsePlaybook + PlaybookCorpus +
    PlaybookTopK + PlaybookMaxDistance. Reuses the query vector —
    no extra embed call. Missing-corpus 404 = no-op (cold-start
    state before any Record call), not an error.
  - POST /v1/matrix/playbooks/record (matrixd) — caller submits
    {query_text, answer_id, answer_corpus, score, tags?}; gets
    {playbook_id} back.

Storage: a vectord index named "playbook_memory" (configurable per
request) with embed(query_text) as the vector and the
PlaybookEntry JSON as metadata. Just another corpus — observable
from /vectors/index, persistable through G1P, etc.

Match key for boost: (AnswerID, AnswerCorpus). Cross-corpus ID
collisions don't false-match — verified by
TestApplyPlaybookBoost_CorpusAttributionRespected.

End-to-end smoke (scripts/playbook_smoke.sh, all assertions PASS):
  - Baseline search: widget-c at distance 0.6566 (rank 3)
  - Record playbook: query → widget-c, score=1.0
  - Re-search with use_playbook=true:
      widget-c distance: 0.3283 (rank 2)
      ratio: 0.5 EXACTLY (matches boost math precisely)
      playbook_boosted: 1
  - widget-c jumped from #3 to #2 — learning loop visible

Tests:
  - 8 unit tests in internal/matrix/playbook_test.go covering
    Validate, BoostFactor (5 cases), the no-boost identity, the
    boost-moves-result-up scenario, highest-score wins on duplicate
    matches, cross-corpus attribution, JSON round-trip, and
    rejection of empty metadata
  - scripts/playbook_smoke.sh integration test (3 assertions PASS)

15-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance, downgrade, playbook).

SPEC §3.4 NOW COMPLETE: 5 of 5 components shipped. The matrix
indexer's port is done as a substrate; remaining work is operational
(rating signal sources, telemetry, eventual structured filtering for
staffing data — none in §3.4).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 19:34:24 -05:00

224 lines
7.1 KiB
Go

// matrixd is the matrix indexer service. Wraps internal/matrix's
// Retriever with HTTP routes per docs/SPEC.md §3.4.
//
// Routes:
// POST /matrix/search — multi-corpus retrieve+merge,
// with optional playbook boost
// GET /matrix/corpora — list known vectord indexes
// POST /matrix/relevance — adjacency-pollution filter
// POST /matrix/downgrade — strong-model downgrade gate
// POST /matrix/playbooks/record — record a (query → answer)
// success for the learning loop
//
// matrixd talks to embedd (for query-text embedding) and vectord
// (for per-corpus search) via HTTP. Both URLs come from
// [matrixd] config; gateway sets them to its own upstream URLs so
// matrixd inherits the same provider topology.
package main
import (
"encoding/json"
"errors"
"flag"
"log/slog"
"net/http"
"os"
"strings"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
if cfg.Matrixd.EmbeddURL == "" || cfg.Matrixd.VectordURL == "" {
slog.Error("matrixd: embedd_url and vectord_url required in [matrixd]")
os.Exit(1)
}
retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
h := &handlers{r: retriever}
if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
r *matrix.Retriever
}
func (h *handlers) register(r chi.Router) {
r.Post("/matrix/search", h.handleSearch)
r.Get("/matrix/corpora", h.handleCorpora)
r.Post("/matrix/relevance", h.handleRelevance)
r.Post("/matrix/downgrade", h.handleDowngrade)
r.Post("/matrix/playbooks/record", h.handlePlaybookRecord)
}
func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
var req matrix.SearchRequest
if !decodeJSON(w, r, &req) {
return
}
resp, err := h.r.Search(r.Context(), req)
if err != nil {
writeMatrixError(w, err)
return
}
writeJSON(w, http.StatusOK, resp)
}
// relevanceRequest is the POST /matrix/relevance body. Threshold
// defaults to matrix.DefaultRelevanceThreshold when zero.
type relevanceRequest struct {
Focus matrix.FocusFile `json:"focus"`
Chunks []matrix.CandidateChunk `json:"chunks"`
Threshold float64 `json:"threshold,omitempty"`
}
func (h *handlers) handleRelevance(w http.ResponseWriter, r *http.Request) {
var req relevanceRequest
if !decodeJSON(w, r, &req) {
return
}
if len(req.Chunks) == 0 {
http.Error(w, "chunks must be non-empty", http.StatusBadRequest)
return
}
threshold := req.Threshold
if threshold == 0 {
threshold = matrix.DefaultRelevanceThreshold
}
res := matrix.FilterChunks(req.Focus, req.Chunks, threshold)
writeJSON(w, http.StatusOK, res)
}
// playbookRecordRequest is the POST /matrix/playbooks/record body.
// Corpus is optional; defaults to matrix.DefaultPlaybookCorpus.
type playbookRecordRequest struct {
QueryText string `json:"query_text"`
AnswerID string `json:"answer_id"`
AnswerCorpus string `json:"answer_corpus"`
Score float64 `json:"score"`
Tags []string `json:"tags,omitempty"`
Corpus string `json:"corpus,omitempty"`
}
func (h *handlers) handlePlaybookRecord(w http.ResponseWriter, r *http.Request) {
var req playbookRecordRequest
if !decodeJSON(w, r, &req) {
return
}
entry := matrix.NewPlaybookEntry(req.QueryText, req.AnswerID, req.AnswerCorpus, req.Score, req.Tags)
if err := entry.Validate(); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
pbID, err := h.r.Record(r.Context(), entry, req.Corpus)
if err != nil {
slog.Warn("playbook record", "err", err)
http.Error(w, err.Error(), http.StatusBadGateway)
return
}
writeJSON(w, http.StatusOK, map[string]any{
"playbook_id": pbID,
"query_text": entry.QueryText,
"answer_id": entry.AnswerID,
"answer_corpus": entry.AnswerCorpus,
"score": entry.Score,
})
}
// downgradeRequest is the POST /matrix/downgrade body. Mirrors
// matrix.DowngradeInput; ForceFullOverride is read from the
// LH_FORCE_FULL_ENRICHMENT env var when omitted from the body.
type downgradeRequest struct {
Mode string `json:"mode"`
Model string `json:"model"`
ForcedMode bool `json:"forced_mode,omitempty"`
ForceFullOverride *bool `json:"force_full_override,omitempty"`
}
func (h *handlers) handleDowngrade(w http.ResponseWriter, r *http.Request) {
var req downgradeRequest
if !decodeJSON(w, r, &req) {
return
}
if req.Mode == "" || req.Model == "" {
http.Error(w, "mode and model are required", http.StatusBadRequest)
return
}
in := matrix.NewDowngradeInputFromEnv(req.Mode, req.Model, req.ForcedMode)
if req.ForceFullOverride != nil {
// Explicit body override beats env, useful for tooling that
// wants to ask "what would the gate do under these conditions"
// without env pollution.
in.ForceFullOverride = *req.ForceFullOverride
}
writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
}
func (h *handlers) handleCorpora(w http.ResponseWriter, r *http.Request) {
names, err := h.r.Corpora(r.Context())
if err != nil {
slog.Error("matrix corpora", "err", err)
http.Error(w, "vectord unavailable", http.StatusBadGateway)
return
}
writeJSON(w, http.StatusOK, map[string]any{"corpora": names, "count": len(names)})
}
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("matrix write json", "err", err)
}
}
// writeMatrixError maps internal/matrix sentinels to HTTP statuses.
// Corpus / embed failures bubble up as 502 (the upstream service is
// what's wrong); validation errors are 400.
func writeMatrixError(w http.ResponseWriter, err error) {
switch {
case errors.Is(err, matrix.ErrEmptyCorpora),
errors.Is(err, matrix.ErrEmptyQuery):
http.Error(w, err.Error(), http.StatusBadRequest)
case errors.Is(err, matrix.ErrCorpus),
errors.Is(err, matrix.ErrEmbed):
slog.Warn("matrix upstream", "err", err)
http.Error(w, err.Error(), http.StatusBadGateway)
default:
slog.Error("matrix", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
}
}