53 changed files with 275 additions and 11539 deletions
--- a/cmd/gateway/main.go
+++ b/cmd/gateway/main.go
@ -44,9 +44,6 @@ func main() {
 		"queryd_url":   cfg.Gateway.QuerydURL,
 		"vectord_url":  cfg.Gateway.VectordURL,
 		"embedd_url":   cfg.Gateway.EmbeddURL,
-		"pathwayd_url": cfg.Gateway.PathwaydURL,
-		"matrixd_url":  cfg.Gateway.MatrixdURL,
-		"observerd_url": cfg.Gateway.ObserverdURL,
 	}
 	for k, v := range upstreams {
 		if v == "" {
@ -66,9 +63,6 @@ func main() {
 	querydURL := mustParseUpstream("queryd_url", cfg.Gateway.QuerydURL)
 	vectordURL := mustParseUpstream("vectord_url", cfg.Gateway.VectordURL)
 	embeddURL := mustParseUpstream("embedd_url", cfg.Gateway.EmbeddURL)
-	pathwaydURL := mustParseUpstream("pathwayd_url", cfg.Gateway.PathwaydURL)
-	matrixdURL := mustParseUpstream("matrixd_url", cfg.Gateway.MatrixdURL)
-	observerdURL := mustParseUpstream("observerd_url", cfg.Gateway.ObserverdURL)

 	storagedProxy := gateway.NewProxyHandler(storagedURL)
 	catalogdProxy := gateway.NewProxyHandler(catalogdURL)
@ -76,9 +70,6 @@ func main() {
 	querydProxy := gateway.NewProxyHandler(querydURL)
 	vectordProxy := gateway.NewProxyHandler(vectordURL)
 	embeddProxy := gateway.NewProxyHandler(embeddURL)
-	pathwaydProxy := gateway.NewProxyHandler(pathwaydURL)
-	matrixdProxy := gateway.NewProxyHandler(matrixdURL)
-	observerdProxy := gateway.NewProxyHandler(observerdURL)

 	if err := shared.Run("gateway", cfg.Gateway.Bind, func(r chi.Router) {

@ -97,12 +88,6 @@ func main() {
 		r.Handle("/v1/vectors/*", vectordProxy)
 		// Embedding service — /v1/embed
 		r.Handle("/v1/embed", embeddProxy)
-		// Pathway memory — /v1/pathway/*
-		r.Handle("/v1/pathway/*", pathwaydProxy)
-		// Matrix indexer — /v1/matrix/*  (multi-corpus retrieve+merge per SPEC §3.4)
-		r.Handle("/v1/matrix/*", matrixdProxy)
-		// Observer — /v1/observer/*  (autonomous-iteration witness loop)
-		r.Handle("/v1/observer/*", observerdProxy)
 	}, cfg.Auth); err != nil {
 		slog.Error("server", "err", err)
 		os.Exit(1)
--- a/cmd/matrixd/main.go
+++ b/cmd/matrixd/main.go
@ -1,295 +0,0 @@
-// matrixd is the matrix indexer service. Wraps internal/matrix's
-// Retriever with HTTP routes per docs/SPEC.md §3.4.
-//
-// Routes:
-//   POST /matrix/search             — multi-corpus retrieve+merge,
-//                                      with optional playbook boost
-//   GET  /matrix/corpora            — list known vectord indexes
-//   POST /matrix/relevance          — adjacency-pollution filter
-//   POST /matrix/downgrade          — strong-model downgrade gate
-//   POST /matrix/playbooks/record   — record a single (query → answer)
-//                                      success for the learning loop
-//   POST /matrix/playbooks/bulk     — bulk-record N successes; useful
-//                                      for backfilling historical
-//                                      placement data into the
-//                                      playbook substrate
-//
-// matrixd talks to embedd (for query-text embedding) and vectord
-// (for per-corpus search) via HTTP. Both URLs come from
-// [matrixd] config; gateway sets them to its own upstream URLs so
-// matrixd inherits the same provider topology.
-package main
-
-import (
-	"encoding/json"
-	"errors"
-	"flag"
-	"log/slog"
-	"net/http"
-	"os"
-	"strings"
-
-	"github.com/go-chi/chi/v5"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
-)
-
-const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
-
-func main() {
-	configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
-	flag.Parse()
-
-	cfg, err := shared.LoadConfig(*configPath)
-	if err != nil {
-		slog.Error("config", "err", err)
-		os.Exit(1)
-	}
-	if cfg.Matrixd.EmbeddURL == "" || cfg.Matrixd.VectordURL == "" {
-		slog.Error("matrixd: embedd_url and vectord_url required in [matrixd]")
-		os.Exit(1)
-	}
-
-	retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
-	h := &handlers{r: retriever}
-
-	if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
-		slog.Error("server", "err", err)
-		os.Exit(1)
-	}
-}
-
-type handlers struct {
-	r *matrix.Retriever
-}
-
-func (h *handlers) register(r chi.Router) {
-	r.Post("/matrix/search", h.handleSearch)
-	r.Get("/matrix/corpora", h.handleCorpora)
-	r.Post("/matrix/relevance", h.handleRelevance)
-	r.Post("/matrix/downgrade", h.handleDowngrade)
-	r.Post("/matrix/playbooks/record", h.handlePlaybookRecord)
-	r.Post("/matrix/playbooks/bulk", h.handlePlaybookBulk)
-}
-
-func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
-	var req matrix.SearchRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	resp, err := h.r.Search(r.Context(), req)
-	if err != nil {
-		writeMatrixError(w, err)
-		return
-	}
-	writeJSON(w, http.StatusOK, resp)
-}
-
-// relevanceRequest is the POST /matrix/relevance body. Threshold
-// defaults to matrix.DefaultRelevanceThreshold when zero.
-type relevanceRequest struct {
-	Focus     matrix.FocusFile        `json:"focus"`
-	Chunks    []matrix.CandidateChunk `json:"chunks"`
-	Threshold float64                 `json:"threshold,omitempty"`
-}
-
-func (h *handlers) handleRelevance(w http.ResponseWriter, r *http.Request) {
-	var req relevanceRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	if len(req.Chunks) == 0 {
-		http.Error(w, "chunks must be non-empty", http.StatusBadRequest)
-		return
-	}
-	threshold := req.Threshold
-	if threshold == 0 {
-		threshold = matrix.DefaultRelevanceThreshold
-	}
-	res := matrix.FilterChunks(req.Focus, req.Chunks, threshold)
-	writeJSON(w, http.StatusOK, res)
-}
-
-// playbookRecordRequest is the POST /matrix/playbooks/record body.
-// Corpus is optional; defaults to matrix.DefaultPlaybookCorpus.
-type playbookRecordRequest struct {
-	QueryText    string   `json:"query_text"`
-	AnswerID     string   `json:"answer_id"`
-	AnswerCorpus string   `json:"answer_corpus"`
-	Score        float64  `json:"score"`
-	Tags         []string `json:"tags,omitempty"`
-	Corpus       string   `json:"corpus,omitempty"`
-}
-
-func (h *handlers) handlePlaybookRecord(w http.ResponseWriter, r *http.Request) {
-	var req playbookRecordRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	entry := matrix.NewPlaybookEntry(req.QueryText, req.AnswerID, req.AnswerCorpus, req.Score, req.Tags)
-	if err := entry.Validate(); err != nil {
-		http.Error(w, err.Error(), http.StatusBadRequest)
-		return
-	}
-	pbID, err := h.r.Record(r.Context(), entry, req.Corpus)
-	if err != nil {
-		slog.Warn("playbook record", "err", err)
-		http.Error(w, err.Error(), http.StatusBadGateway)
-		return
-	}
-	writeJSON(w, http.StatusOK, map[string]any{
-		"playbook_id":   pbID,
-		"query_text":    entry.QueryText,
-		"answer_id":     entry.AnswerID,
-		"answer_corpus": entry.AnswerCorpus,
-		"score":         entry.Score,
-	})
-}
-
-// playbookBulkRequest is the POST /matrix/playbooks/bulk body —
-// component C (operational rating wiring). Used to backfill
-// historical placement data, or batch-record a session's worth of
-// coordinator click-tracking. Each Entry is recorded independently;
-// failures are reported per-entry without aborting the batch.
-type playbookBulkRequest struct {
-	Entries []playbookRecordRequest `json:"entries"`
-	Corpus  string                  `json:"corpus,omitempty"` // applies to all if entry-level not set
-}
-
-// playbookBulkResult reports per-entry outcomes plus the aggregate
-// count. Errors include the entry index so callers can locate the
-// offending record without diffing.
-type playbookBulkResult struct {
-	Recorded int                      `json:"recorded"`
-	Failed   int                      `json:"failed"`
-	Results  []playbookBulkItemResult `json:"results"`
-}
-
-type playbookBulkItemResult struct {
-	Index      int    `json:"index"`
-	PlaybookID string `json:"playbook_id,omitempty"`
-	Error      string `json:"error,omitempty"`
-}
-
-func (h *handlers) handlePlaybookBulk(w http.ResponseWriter, r *http.Request) {
-	var req playbookBulkRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	if len(req.Entries) == 0 {
-		http.Error(w, "entries must be non-empty", http.StatusBadRequest)
-		return
-	}
-
-	out := playbookBulkResult{
-		Results: make([]playbookBulkItemResult, len(req.Entries)),
-	}
-	for i, item := range req.Entries {
-		corpus := item.Corpus
-		if corpus == "" {
-			corpus = req.Corpus
-		}
-		entry := matrix.NewPlaybookEntry(item.QueryText, item.AnswerID, item.AnswerCorpus, item.Score, item.Tags)
-		if err := entry.Validate(); err != nil {
-			out.Results[i] = playbookBulkItemResult{Index: i, Error: err.Error()}
-			out.Failed++
-			continue
-		}
-		pbID, err := h.r.Record(r.Context(), entry, corpus)
-		if err != nil {
-			out.Results[i] = playbookBulkItemResult{Index: i, Error: err.Error()}
-			out.Failed++
-			continue
-		}
-		out.Results[i] = playbookBulkItemResult{Index: i, PlaybookID: pbID}
-		out.Recorded++
-	}
-	writeJSON(w, http.StatusOK, out)
-}
-
-// downgradeRequest is the POST /matrix/downgrade body. Mirrors
-// matrix.DowngradeInput. When ForceFullOverride is omitted from
-// the body, the value falls back to matrixd's process env
-// (LH_FORCE_FULL_ENRICHMENT) — an opinionated default that lets
-// operators set the env var on the matrixd unit and have every
-// gate decision honor it without per-request changes. Per
-// 2026-04-29 cross-lineage scrum (Opus WARN): callers that want
-// deterministic gate behavior independent of matrixd's env should
-// pass ForceFullOverride explicitly in the body.
-type downgradeRequest struct {
-	Mode              string `json:"mode"`
-	Model             string `json:"model"`
-	ForcedMode        bool   `json:"forced_mode,omitempty"`
-	ForceFullOverride *bool  `json:"force_full_override,omitempty"`
-}
-
-func (h *handlers) handleDowngrade(w http.ResponseWriter, r *http.Request) {
-	var req downgradeRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	if req.Mode == "" || req.Model == "" {
-		http.Error(w, "mode and model are required", http.StatusBadRequest)
-		return
-	}
-	in := matrix.NewDowngradeInputFromEnv(req.Mode, req.Model, req.ForcedMode)
-	if req.ForceFullOverride != nil {
-		// Explicit body override beats env, useful for tooling that
-		// wants to ask "what would the gate do under these conditions"
-		// without env pollution.
-		in.ForceFullOverride = *req.ForceFullOverride
-	}
-	writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
-}
-
-func (h *handlers) handleCorpora(w http.ResponseWriter, r *http.Request) {
-	names, err := h.r.Corpora(r.Context())
-	if err != nil {
-		slog.Error("matrix corpora", "err", err)
-		http.Error(w, "vectord unavailable", http.StatusBadGateway)
-		return
-	}
-	writeJSON(w, http.StatusOK, map[string]any{"corpora": names, "count": len(names)})
-}
-
-func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
-	defer r.Body.Close()
-	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
-	if err := json.NewDecoder(r.Body).Decode(v); err != nil {
-		var maxErr *http.MaxBytesError
-		if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
-			http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
-			return false
-		}
-		http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
-		return false
-	}
-	return true
-}
-
-func writeJSON(w http.ResponseWriter, code int, v any) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(code)
-	if err := json.NewEncoder(w).Encode(v); err != nil {
-		slog.Warn("matrix write json", "err", err)
-	}
-}
-
-// writeMatrixError maps internal/matrix sentinels to HTTP statuses.
-// Corpus / embed failures bubble up as 502 (the upstream service is
-// what's wrong); validation errors are 400.
-func writeMatrixError(w http.ResponseWriter, err error) {
-	switch {
-	case errors.Is(err, matrix.ErrEmptyCorpora),
-		errors.Is(err, matrix.ErrEmptyQuery):
-		http.Error(w, err.Error(), http.StatusBadRequest)
-	case errors.Is(err, matrix.ErrCorpus),
-		errors.Is(err, matrix.ErrEmbed):
-		slog.Warn("matrix upstream", "err", err)
-		http.Error(w, err.Error(), http.StatusBadGateway)
-	default:
-		slog.Error("matrix", "err", err)
-		http.Error(w, "internal", http.StatusInternalServerError)
-	}
-}
--- a/cmd/observerd/main.go
+++ b/cmd/observerd/main.go
@ -1,263 +0,0 @@
-// observerd is the autonomous-iteration witness service. Port of
-// the load-bearing pieces of mcp-server/observer.ts (Rust system).
-//
-// Routes (all under /observer):
-//   GET  /observer/health        — service liveness + ring size
-//   GET  /observer/stats         — aggregate counters + recent scenarios
-//   POST /observer/event         — record one observed op
-//
-// Deferred to follow-up commits (see internal/observer doc):
-//   - POST /observer/review (cloud-LLM hand review fall-back)
-//   - background loops (analyzeErrors, consolidatePlaybooks,
-//     tailOverseerCorrections)
-//   - failure-cluster escalation to LLM Team
-//
-// /relevance was already ported to internal/matrix in 9588bd8 and is
-// not duplicated here.
-
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"flag"
-	"fmt"
-	"log/slog"
-	"net/http"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/go-chi/chi/v5"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/observer"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/workflow"
-)
-
-const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
-
-func main() {
-	configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
-	flag.Parse()
-
-	cfg, err := shared.LoadConfig(*configPath)
-	if err != nil {
-		slog.Error("config", "err", err)
-		os.Exit(1)
-	}
-
-	// Persistence is optional — empty path = ephemeral (matches the
-	// pathwayd pattern). Production sets a stable path under
-	// /var/lib/lakehouse/observer/ops.jsonl.
-	var persistor *observer.Persistor
-	if cfg.Observerd.PersistPath != "" {
-		persistor, err = observer.NewPersistor(cfg.Observerd.PersistPath)
-		if err != nil {
-			slog.Error("observer persistor", "err", err)
-			os.Exit(1)
-		}
-	}
-
-	store := observer.NewStore(persistor)
-	if persistor != nil {
-		n, err := store.Load()
-		if err != nil {
-			slog.Warn("observer load", "err", err, "loaded", n)
-		} else {
-			slog.Info("observer loaded", "ops", n, "path", cfg.Observerd.PersistPath)
-		}
-	}
-
-	runner := workflow.NewRunner()
-	// matrixd URL: prefer explicit observerd config field, fall back
-	// to gateway's matrixd_url so a single-toml deploy works without
-	// duplicating the address.
-	matrixdURL := cfg.Gateway.MatrixdURL
-	registerBuiltinModes(runner, store, matrixdURL)
-
-	h := &handlers{store: store, runner: runner}
-	if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
-		slog.Error("server", "err", err)
-		os.Exit(1)
-	}
-}
-
-type handlers struct {
-	store  *observer.Store
-	runner *workflow.Runner
-}
-
-func (h *handlers) register(r chi.Router) {
-	r.Get("/observer/stats", h.handleStats)
-	r.Post("/observer/event", h.handleEvent)
-	r.Post("/observer/workflow/run", h.handleWorkflowRun)
-	r.Get("/observer/workflow/modes", h.handleWorkflowModes)
-}
-
-func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
-	writeJSON(w, http.StatusOK, h.store.Stats())
-}
-
-func (h *handlers) handleEvent(w http.ResponseWriter, r *http.Request) {
-	var op observer.ObservedOp
-	if !decodeJSON(w, r, &op) {
-		return
-	}
-	if err := h.store.Record(op); err != nil {
-		if errors.Is(err, observer.ErrInvalidOp) {
-			http.Error(w, err.Error(), http.StatusBadRequest)
-			return
-		}
-		slog.Error("observer record", "err", err)
-		http.Error(w, "internal", http.StatusInternalServerError)
-		return
-	}
-	stats := h.store.Stats()
-	writeJSON(w, http.StatusOK, map[string]any{
-		"accepted":  true,
-		"ring_size": stats.Total,
-	})
-}
-
-// workflowRunRequest is the POST /observer/workflow/run body — a
-// Workflow definition in JSON form (matches Archon's YAML shape but
-// JSON-serialized for the HTTP path).
-type workflowRunRequest struct {
-	Workflow workflow.Workflow `json:"workflow"`
-}
-
-func (h *handlers) handleWorkflowRun(r http.ResponseWriter, req *http.Request) {
-	var body workflowRunRequest
-	if !decodeJSON(r, req, &body) {
-		return
-	}
-	res, err := h.runner.Run(req.Context(), body.Workflow)
-	// Record per-node provenance into the observer ring AS the
-	// workflow runs — same shape as any other ObservedOp so the
-	// existing /observer/stats aggregation surfaces workflow ops
-	// alongside scenario ops without a schema change.
-	for _, n := range res.Nodes {
-		op := observer.ObservedOp{
-			Endpoint:      "/observer/workflow/run/" + body.Workflow.Name + "/" + n.NodeID,
-			InputSummary:  fmt.Sprintf("workflow=%s node=%s mode=%s", body.Workflow.Name, n.NodeID, n.Mode),
-			Success:       n.Error == "",
-			DurationMs:    n.DurationMs,
-			OutputSummary: summarizeOutput(n.Output),
-			Source:        observer.Source("workflow"),
-			Error:         n.Error,
-			Timestamp:     n.StartedAt.UTC().Format(time.RFC3339Nano),
-		}
-		if recErr := h.store.Record(op); recErr != nil {
-			slog.Warn("workflow run: provenance record failed", "err", recErr)
-		}
-	}
-	if err != nil {
-		// Aborting errors (cycle, missing dep, unknown mode) — surface
-		// as 4xx because the workflow definition itself is wrong.
-		slog.Warn("workflow run aborted", "err", err)
-		writeJSON(r, http.StatusBadRequest, map[string]any{
-			"error":  err.Error(),
-			"result": res,
-		})
-		return
-	}
-	writeJSON(r, http.StatusOK, res)
-}
-
-func (h *handlers) handleWorkflowModes(w http.ResponseWriter, _ *http.Request) {
-	modes := h.runner.Modes()
-	writeJSON(w, http.StatusOK, map[string]any{
-		"modes": modes,
-		"count": len(modes),
-	})
-}
-
-// summarizeOutput renders a workflow node's output map for the
-// ObservedOp's OutputSummary string. Best-effort — long values get
-// truncated rather than ballooning the ring buffer's memory.
-func summarizeOutput(output map[string]any) string {
-	if output == nil {
-		return "(nil)"
-	}
-	bs, err := json.Marshal(output)
-	if err != nil {
-		return fmt.Sprintf("(marshal err: %v)", err)
-	}
-	if len(bs) > 256 {
-		return string(bs[:256]) + "...(truncated)"
-	}
-	return string(bs)
-}
-
-// registerBuiltinModes wires the modes the runner knows about. The
-// pure-function wrappers (matrix.relevance, matrix.downgrade,
-// distillation.score, drift.scorer) are direct Go calls. matrix.search
-// is HTTP-backed, pointed at the configured matrixd_url so workflows
-// can compose retrieval into multi-pass measurement chains.
-//
-// Fixture modes (fixture.echo, fixture.upper) stay registered for
-// the workflow_smoke that proves the runner mechanics independently
-// of the real modes' availability.
-//
-// Real-mode follow-ups still pending:
-//   - playbook.record (HTTP to matrixd)
-//   - playbook.lookup (HTTP to matrixd)
-//   - llm.chat (HTTP to gateway /v1/chat)
-func registerBuiltinModes(r *workflow.Runner, store *observer.Store, matrixdURL string) {
-	// Fixture modes for runner mechanics smokes.
-	r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
-		out := make(map[string]any, len(input))
-		for k, v := range input {
-			out[k] = v
-		}
-		return out, nil
-	})
-	r.RegisterMode("fixture.upper", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
-		prompt, _ := input["prompt"].(string)
-		return map[string]any{"upper": strings.ToUpper(prompt)}, nil
-	})
-
-	// Real modes — pure-function wrappers (no I/O).
-	r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
-	r.RegisterMode("matrix.downgrade", workflow.MatrixDowngrade)
-	r.RegisterMode("distillation.score", workflow.DistillationScore)
-	r.RegisterMode("drift.scorer", workflow.DriftScorer)
-
-	// HTTP-backed modes — only register when their backend URL is set.
-	// matrixd_url defaults to a known address but tests/dev may run
-	// without matrixd.
-	if matrixdURL != "" {
-		hc := &http.Client{Timeout: 30 * time.Second}
-		r.RegisterMode("matrix.search", workflow.MatrixSearch(matrixdURL, hc))
-	}
-
-	_ = store // reserved for future modes that need self-provenance
-}
-
-// context still used in decodeJSON via http.Request.Context().
-var _ = context.Background
-
-func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
-	defer r.Body.Close()
-	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
-	if err := json.NewDecoder(r.Body).Decode(v); err != nil {
-		var maxErr *http.MaxBytesError
-		if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
-			http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
-			return false
-		}
-		http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
-		return false
-	}
-	return true
-}
-
-func writeJSON(w http.ResponseWriter, code int, v any) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(code)
-	if err := json.NewEncoder(w).Encode(v); err != nil {
-		slog.Warn("observer write json", "err", err)
-	}
-}
--- a/cmd/pathwayd/main.go
+++ b/cmd/pathwayd/main.go
@ -1,278 +0,0 @@
-// pathwayd is the pathway memory service. Wraps internal/pathway's
-// Store with HTTP routes for the Mem0-style operations defined in
-// ADR-004.
-//
-// Routes (all under /pathway):
-//   POST /pathway/add              — new trace with fresh UID
-//   POST /pathway/add_idempotent   — UID-keyed add or replay-bump
-//   POST /pathway/update           — replace content for an existing UID
-//   POST /pathway/revise           — new revision linked to predecessor
-//   POST /pathway/retire           — mark trace retired (excluded from search)
-//   GET  /pathway/get/{uid}        — fetch one trace (incl. retired)
-//   GET  /pathway/history/{uid}    — backward chain via predecessor links
-//   POST /pathway/search           — filter-based listing
-//   GET  /pathway/stats            — total/active/retired counters
-//
-// Persistence: optional. Empty [pathwayd].persist_path = in-memory
-// only (matches vectord G1's pattern). Set a path for durable
-// per-trace JSONL append.
-package main
-
-import (
-	"encoding/json"
-	"errors"
-	"flag"
-	"log/slog"
-	"net/http"
-	"os"
-	"strings"
-
-	"github.com/go-chi/chi/v5"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/pathway"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
-)
-
-const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
-
-func main() {
-	configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
-	flag.Parse()
-
-	cfg, err := shared.LoadConfig(*configPath)
-	if err != nil {
-		slog.Error("config", "err", err)
-		os.Exit(1)
-	}
-
-	// Persistence is optional — empty path = in-memory ephemeral.
-	var persistor *pathway.Persistor
-	if cfg.Pathwayd.PersistPath != "" {
-		persistor, err = pathway.NewPersistor(cfg.Pathwayd.PersistPath)
-		if err != nil {
-			slog.Error("pathway persistor", "err", err)
-			os.Exit(1)
-		}
-	}
-
-	store := pathway.NewStore(persistor)
-	if persistor != nil {
-		n, err := store.Load()
-		if err != nil {
-			slog.Warn("pathway load", "err", err, "loaded", n)
-		} else {
-			slog.Info("pathway loaded", "events", n, "path", cfg.Pathwayd.PersistPath)
-		}
-	}
-
-	h := &handlers{store: store}
-
-	if err := shared.Run("pathwayd", cfg.Pathwayd.Bind, h.register, cfg.Auth); err != nil {
-		slog.Error("server", "err", err)
-		os.Exit(1)
-	}
-}
-
-type handlers struct {
-	store *pathway.Store
-}
-
-func (h *handlers) register(r chi.Router) {
-	r.Post("/pathway/add", h.handleAdd)
-	r.Post("/pathway/add_idempotent", h.handleAddIdempotent)
-	r.Post("/pathway/update", h.handleUpdate)
-	r.Post("/pathway/revise", h.handleRevise)
-	r.Post("/pathway/retire", h.handleRetire)
-	r.Get("/pathway/get/{uid}", h.handleGet)
-	r.Get("/pathway/history/{uid}", h.handleHistory)
-	r.Post("/pathway/search", h.handleSearch)
-	r.Get("/pathway/stats", h.handleStats)
-}
-
-// ── request shapes ───────────────────────────────────────────────
-
-type addRequest struct {
-	Content json.RawMessage `json:"content"`
-	Tags    []string        `json:"tags,omitempty"`
-}
-
-type addIdempotentRequest struct {
-	UID     string          `json:"uid"`
-	Content json.RawMessage `json:"content"`
-	Tags    []string        `json:"tags,omitempty"`
-}
-
-type updateRequest struct {
-	UID     string          `json:"uid"`
-	Content json.RawMessage `json:"content"`
-}
-
-type reviseRequest struct {
-	PredecessorUID string          `json:"predecessor_uid"`
-	Content        json.RawMessage `json:"content"`
-	Tags           []string        `json:"tags,omitempty"`
-}
-
-type retireRequest struct {
-	UID string `json:"uid"`
-}
-
-type searchRequest struct {
-	Tag             string `json:"tag,omitempty"`
-	ContentContains string `json:"content_contains,omitempty"`
-	CreatedAfterNs  int64  `json:"created_after_ns,omitempty"`
-	CreatedBeforeNs int64  `json:"created_before_ns,omitempty"`
-	IncludeRetired  bool   `json:"include_retired,omitempty"`
-}
-
-// ── handlers ────────────────────────────────────────────────────
-
-func (h *handlers) handleAdd(w http.ResponseWriter, r *http.Request) {
-	var req addRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	tr, err := h.store.Add(req.Content, req.Tags...)
-	if writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusCreated, tr)
-}
-
-func (h *handlers) handleAddIdempotent(w http.ResponseWriter, r *http.Request) {
-	var req addIdempotentRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	tr, err := h.store.AddIdempotent(req.UID, req.Content, req.Tags...)
-	if writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusOK, tr)
-}
-
-func (h *handlers) handleUpdate(w http.ResponseWriter, r *http.Request) {
-	var req updateRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	if err := h.store.Update(req.UID, req.Content); writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusOK, map[string]any{"status": "updated"})
-}
-
-func (h *handlers) handleRevise(w http.ResponseWriter, r *http.Request) {
-	var req reviseRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	tr, err := h.store.Revise(req.PredecessorUID, req.Content, req.Tags...)
-	if writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusCreated, tr)
-}
-
-func (h *handlers) handleRetire(w http.ResponseWriter, r *http.Request) {
-	var req retireRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	if err := h.store.Retire(req.UID); writeStoreError(w, err) {
-		return
-	}
-	w.WriteHeader(http.StatusNoContent)
-}
-
-func (h *handlers) handleGet(w http.ResponseWriter, r *http.Request) {
-	uid := chi.URLParam(r, "uid")
-	tr, err := h.store.Get(uid)
-	if writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusOK, tr)
-}
-
-func (h *handlers) handleHistory(w http.ResponseWriter, r *http.Request) {
-	uid := chi.URLParam(r, "uid")
-	chain, err := h.store.History(uid)
-	if writeStoreError(w, err) {
-		return
-	}
-	writeJSON(w, http.StatusOK, map[string]any{
-		"chain":  chain,
-		"length": len(chain),
-	})
-}
-
-func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
-	var req searchRequest
-	if !decodeJSON(w, r, &req) {
-		return
-	}
-	results := h.store.Search(pathway.SearchFilter{
-		Tag:             req.Tag,
-		ContentContains: req.ContentContains,
-		CreatedAfterNs:  req.CreatedAfterNs,
-		CreatedBeforeNs: req.CreatedBeforeNs,
-		IncludeRetired:  req.IncludeRetired,
-	})
-	writeJSON(w, http.StatusOK, map[string]any{
-		"results": results,
-		"count":   len(results),
-	})
-}
-
-func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
-	writeJSON(w, http.StatusOK, h.store.Stats())
-}
-
-// ── helpers ────────────────────────────────────────────────────
-
-func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
-	defer r.Body.Close()
-	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
-	if err := json.NewDecoder(r.Body).Decode(v); err != nil {
-		var maxErr *http.MaxBytesError
-		if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
-			http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
-			return false
-		}
-		http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
-		return false
-	}
-	return true
-}
-
-func writeJSON(w http.ResponseWriter, code int, v any) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(code)
-	if err := json.NewEncoder(w).Encode(v); err != nil {
-		slog.Warn("pathway write json", "err", err)
-	}
-}
-
-// writeStoreError maps internal/pathway sentinel errors to HTTP
-// status codes. Returns true if a response was written (caller
-// should return). Returns false on success (caller continues).
-func writeStoreError(w http.ResponseWriter, err error) bool {
-	if err == nil {
-		return false
-	}
-	switch {
-	case errors.Is(err, pathway.ErrNotFound):
-		http.Error(w, err.Error(), http.StatusNotFound)
-	case errors.Is(err, pathway.ErrPredecessorMissing):
-		http.Error(w, err.Error(), http.StatusNotFound)
-	case errors.Is(err, pathway.ErrEmptyUID),
-		errors.Is(err, pathway.ErrInvalidContent):
-		http.Error(w, err.Error(), http.StatusBadRequest)
-	case errors.Is(err, pathway.ErrCycle):
-		http.Error(w, err.Error(), http.StatusConflict)
-	default:
-		slog.Error("pathway store", "err", err)
-		http.Error(w, "internal", http.StatusInternalServerError)
-	}
-	return true
-}
--- a/cmd/vectord/main.go
+++ b/cmd/vectord/main.go
@ -274,18 +274,21 @@ func (h *handlers) handleAdd(w http.ResponseWriter, r *http.Request) {
 			return
 		}
 	}
-	// Pre-validation above is exhaustive (id, dim, finite, zero-norm),
-	// so BatchAdd takes the write-lock once and pushes the whole batch
-	// into coder/hnsw via one variadic Graph.Add. Saves N-1 lock
-	// acquisitions per HTTP batch.
-	batch := make([]vectord.BatchItem, len(req.Items))
 	for j, it := range req.Items {
-		batch[j] = vectord.BatchItem{ID: it.ID, Vector: it.Vector, Metadata: it.Metadata}
-	}
-	if err := idx.BatchAdd(batch); err != nil {
-		slog.Error("batch add", "name", name, "err", err)
-		http.Error(w, "internal", http.StatusInternalServerError)
-		return
+		if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
+			// Vector-validation errors (NaN/Inf, zero-norm under
+			// cosine) only surface here; pre-validation is intentional
+			// minimal scope (id + dim only).
+			if errors.Is(err, vectord.ErrDimensionMismatch) ||
+				strings.Contains(err.Error(), "non-finite") ||
+				strings.Contains(err.Error(), "zero-norm") {
+				http.Error(w, "items["+strconv.Itoa(j)+"]: "+err.Error(), http.StatusBadRequest)
+				return
+			}
+			slog.Error("add", "name", name, "id", it.ID, "err", err)
+			http.Error(w, "internal", http.StatusInternalServerError)
+			return
+		}
 	}
 	// One save per batch (post-loop), not per item. Per scrum
 	// O-W4-style discipline: HTTP-batch boundary is the natural unit.
--- a/docs/DECISIONS.md
+++ b/docs/DECISIONS.md
@ -242,123 +242,6 @@ need rotate-without-restart.

 ---

-## ADR-004: Pathway memory data model — Mem0-style versioned traces
-**Date:** 2026-04-29
-**Decided by:** J + Claude
-**Status:** Decided — substrate landing in `internal/pathway/`
-
-**Decision:** Pathway memory is an append-only event log of opaque
-traces with Mem0-style semantics: Add / Update / Revise / Retire /
-History / Search. Each trace has a UID; revisions chain backward
-via `predecessor_uid` so the full history is reconstructible.
-Persistence is JSONL append-only with full-replay on load;
-corruption recovery skips bad lines without halting startup.
-
-### Operations
-
-| Op | Effect |
-|---|---|
-| `Add(content, tags...)` | New UID, stored fresh, replay_count=1. |
-| `AddIdempotent(uid, content, tags...)` | If UID exists → replay_count++. Else → Add with that UID. |
-| `Update(uid, content)` | In-place content replacement (same UID). Bumps `updated_at_ns`. NOT a revision — same trace, new content. |
-| `Revise(predecessorUID, content, tags...)` | New UID with `predecessor_uid` set. Old trace stays accessible via History. Failure modes: predecessor missing → error; predecessor retired → still allowed (revisions of retired traces are valid). |
-| `Retire(uid)` | Sets `retired=true`. Excluded from `Search` by default; still accessible via `Get` and `History`. |
-| `Get(uid)` | Returns the trace (including if retired); error on missing. |
-| `History(uid)` | Walks `predecessor_uid` chain backward, returns slice [self, parent, grandparent, ...]. Cycle-detected via visited-set; returns error on cycle (which only happens if persistence file was hand-edited). |
-| `Search(filter)` | Returns matching traces. Default excludes retired; opt in via `IncludeRetired: true`. Filters: tag-match, content-substring, time range. |
-
-### Why Mem0-style + Why these specific ops
-
- **Mem0** (memory pattern from the OpenAI Memories paper / Mem0 lib)
-  is the canonical "agent memory" interface for the same reason
-  Markdown is the canonical text format: it's the lowest-common-
-  denominator that the entire ecosystem assumes. Adopting it lets
-  agent loops written against any Mem0-aware substrate work here.
- Update vs Revise are deliberately separate. Update is "I noticed
-  a typo in my note." Revise is "I now believe something different
-  than I did when I wrote this; preserve the old belief for audit."
-  Conflating them loses the audit trail.
- Retire vs Delete is deliberate. Retire stops a trace from
-  surfacing in search but preserves it for history reconstruction.
-  Delete (which we don't expose) would break references.
-
-### Trace data shape
-
-```go
-type Trace struct {
-    UID            string          // UUID v4 unless caller provides one
-    Content        json.RawMessage // opaque, schema is caller's contract
-    PredecessorUID string          // empty if root revision
-    CreatedAtNs    int64
-    UpdatedAtNs    int64
-    Retired        bool
-    ReplayCount    int             // ≥1 for any stored trace
-    Tags           []string        // for Search
-}
-```
-
-`Content` is opaque JSON (not a struct) so callers can store any
-shape — the data model doesn't constrain semantics. Callers add
-their own validators on top.
-
-### Persistence
-
-JSONL append-only log under `_pathway/<store_name>.jsonl`. Each
-mutation appends one JSON line:
-
-```
-{"op":"add",     "trace":{...}}
-{"op":"update",  "uid":"…",   "content":"…"}
-{"op":"revise",  "trace":{…}}    # trace.PredecessorUID is set
-{"op":"retire",  "uid":"…"}
-{"op":"replay",  "uid":"…"}     # idempotent re-add hit
-```
-
-On startup, replay every line in order, building in-memory state.
-A malformed line logs a warn and is skipped; load continues.
-Corruption tolerance is non-optional — partial state is better
-than no state for an agent substrate.
-
-Compaction is a future concern. A 100K-trace log replays in
-seconds; below that scale, JSONL append is the simplest correct
-choice. When compaction lands, the format will be: snapshot file
-(full state JSON) + tail JSONL since snapshot. Detect snapshot,
-load it, then replay tail.
-
-### Cycle safety
-
-UIDs are generated server-side via `uuid.New()` (existing dep —
-catalogd uses it). New UID for every Add and Revise. The data
-model itself can't form cycles — every Revise points at an
-EXISTING uid, and the new uid didn't exist a moment ago.
-
-History walks defensively anyway: visited-set tracks UIDs seen
-this walk; if we encounter a duplicate, return error. Protects
-against corruption (manual edit, bug in a future op) without
-constraining the happy path.
-
-### Storage location
-
-JSONL file path is configurable per store. Default:
-`/var/lib/lakehouse/pathway/<name>.jsonl` for prod; tests use
-`t.TempDir()`. Persistence is OPTIONAL — empty path means
-in-memory only (matches vectord G1's pattern).
-
-### What this ADR does NOT do
-
- **No HTTP surface decision.** Whether `cmd/pathwayd` is its own
-  binary or routes get added to `cmd/vectord` is the next ADR's
-  concern. The substrate is a pure library either way.
- **No vector index integration.** Pathway traces can carry a
-  vector embedding in `Content` (caller decides), but this ADR
-  doesn't define how the substrate integrates with `vectord`'s
-  HNSW indexes. That's the staffing co-pilot's design problem
-  when those layers compose.
- **No agent-loop semantics.** "When does an agent ADD vs
-  REVISE?" is a workflow decision, not a substrate decision.
-
---
-
-(Future ADRs from ADR-005 onward will be added as the Go
-implementation accrues design decisions — e.g. observer fail-safe
-semantics, distillation rebuild, gRPC adapter wire format, etc.)
+(Future ADRs from ADR-004 onward will be added as the Go
+implementation accrues design decisions — e.g. HNSW parameter
+choices, pathway-memory hash function, auditor model rotation, etc.)
--- a/docs/PRD.md
+++ b/docs/PRD.md
@ -9,61 +9,6 @@ estimates, library choices, and acceptance gates.

 ---

-## Product vision — what we're actually building
-
-**The Go refactor isn't the goal. The goal is a small-model-driven autonomous pipeline that gets better with each run, with frontier models in audit/oversight and humans triaged in only for the genuinely abstract cases.**
-
-The Rust Lakehouse already has most of the pieces:
- **Pathway memory** (`internal/pathway` in Go, 88 Rust traces preserved) — what we tried, what worked
- **Matrix indexer** (SPEC §3.4) — multi-corpus retrieve+merge that gives the small model the right knowledge slice for *this* task
- **Observer** — watches runs, refines configs, escalates
- **Distillation v1.0.0** (`e7636f2`) — turns successful runs into denser playbooks
- **Auditor cross-lineage fabric** — Kimi/Haiku/Opus oversight on small-model outputs
-
-What the Go refactor is FOR: a second-language pass surfaces architectural weaknesses that Rust hid. The pipeline has to pull together cleanly *as a pipeline* — not as 15 crates that happen to interact.
-
-### The five-loop substrate
-
-1. **Knowledge pathway loop** — pathway memory + matrix indexer give the small model context for the task. Pathway answers "what worked last time?"; matrix answers "what's relevant now?"
-2. **Execution loop** — small model runs on focused context. Frontier API calls are reserved for audit/escalation, not the inner loop. Cost + rate limits stay sane.
-3. **Observer loop** — watches each run, refines the configs (matrix corpus picks, downgrade gate, prompt mold) that got the model to a good pathway. Outputs new config, not new prompt.
-4. **Rating + distillation loop** — successful outcomes get scored and folded back into the playbook substrate. The playbook gets denser; the next run starts smarter.
-5. **Drift loop** — quantify when the distilled playbook stops matching reality (codebase changed, contracts shifted, profiles updated). Drift is a *measured* signal, not "hope nothing broke."
-
-### The gate
-
-**The playbook + matrix indexer must produce the results we're looking for.** That's the single load-bearing acceptance criterion. Throughput, scaling, code elegance — all secondary. If a deep-field reality test on the 500K corpus surfaces wrong answers, the loop isn't working and we fix that before adding anything else.
-
-### Observer as system resource (clarified 2026-04-29)
-
-The observer is not a service among services — it's a *system
-resource*. Its job is to be objective about the process: watch
-everything, record measurements, surface what worked vs what
-didn't, feed the KB so the playbook substrate can decide the
-right pathway to the correct outcome.
-
-The bare-bones observerd shipped in `bc9ab93` (event ingest +
-stats) is the substrate for this. The architectural pattern
-that grows it into the full "objective measurement engine" is
-the **multi-pass workflow runner** documented in SPEC §3.8 —
-inspired by Archon (`/home/profit/external/Archon`) and proven
-in the Rust `observer-kb` branch's Python prototypes (`deep_analysis.py`,
-`extract_knowledge.py`, `process_knowledge.py`).
-
-The pipeline mode-chain (extract → validator → hallucination →
-consensus → redteam → pipeline → render) IS how the observer
-makes actionable decisions: each mode pass is a deterministic
-measurement; what survives the gauntlet is what feeds the KB.
-
-### Triage / human-in-loop
-
-Most cases are abstract enough that small-model + pathway + matrix can complete them. Some can't — they need a human. The system's job is to **identify which is which** and only escalate the second class. Frontier models partially solve this internally with their thinking loops; we're externalizing it so:
- Small models are swappable (vendor independence)
- Drift is measurable (quantitative signal, not vibes)
- Each loop iteration is auditable (the pathway memory IS the audit trail)
-
-This is what the auditor cross-lineage fabric proves out in Rust — Opus auto-promote on diffs >100k chars is the same pattern: triage by signal, not by guesswork.
-
 ## Direction pivot — why this PRD exists

 The Rust-first Lakehouse (15 crates, ~24 unmerged commits past PR #11,
--- a/docs/SPEC.md
+++ b/docs/SPEC.md
@ -28,7 +28,6 @@ Effort scale (one engineer-week = ~40h focused work):
 | `queryd` | datafusion, arrow | `cmd/queryd` | **`duckdb/duckdb-go/v2`** (cgo, official) | **HARD** | high — see §3 |
 | `ingestd` | csv, json, lopdf, postgres | `cmd/ingestd` | stdlib `encoding/csv`, `encoding/json`, `pdfcpu/pdfcpu`, `jackc/pgx/v5` | **L** | low |
 | `vectord` | hora, arrow, hnsw | `cmd/vectord` | `coder/hnsw`, `apache/arrow-go/v18` | **L** | medium — re-validate HNSW recall |
-| **matrix indexer** (emergent in Rust — `mode.rs` + `build_*_corpus.ts` + observer `/relevance`) | scripts/build_*_corpus.ts, crates/gateway/src/v1/mode.rs, mcp-server/observer.ts | `internal/matrix/` + gateway routes (`/v1/matrix/*`) | stdlib + vectord client | **L** | medium — see §3.4. Corpus-as-shard composer; relevance filter; strong-model downgrade gate; multi-corpus retrieve+merge. The learning-loop layer that lifts vectord from "static index" to "meta-index that learns from playbooks." |
 | `vectord-lance` | lance | **DROPPED** | n/a | n/a | n/a — Parquet+HNSW only |
 | `journald` | parquet, arrow | `cmd/journald` | `apache/arrow-go/v18` | **M** | low |
 | `aibridge` | reqwest | library | `net/http` + connection pool · `anthropics/anthropic-sdk-go` available for direct Claude calls (currently routed via opencode) | **S** | low |
@ -117,287 +116,6 @@ needs revisiting in Go to confirm the sidecar format we ship.
 - G3.2.C — Recall@10 within 2% of Rust baseline on
  `lakehouse_arch_v1`

-### §3.4 — Matrix indexer (corpus-as-shard composer)
-
-**What it is.** The matrix indexer is the layer above `vectord` that
-turns a fleet of single-corpus HNSW indexes into a learning meta-index.
-In the Rust system this is emergent — split between corpus builders
-(`scripts/build_*_corpus.ts`), the mode runner (`crates/gateway/src/v1/mode.rs`),
-the observer relevance endpoint (`mcp-server/observer.ts`), and the
-strong-model downgrade gate (`mode.rs::execute`). In Go we name it
-explicitly so future sessions don't reduce it to "vectord."
-
-**Why corpus-as-shard, not shard-by-id.** Sharding a single index by
-hash(id) is a pure throughput hack with a recall tax. Sharding by
-corpus is the existing retrieval shape — `lakehouse_arch_v1`,
-`lakehouse_symbols_v1`, `scrum_findings_v1`, `lakehouse_answers_v1`,
-`kb_team_runs_v1`, `successful_playbooks_live`, etc. — each with
-distinct topology and a distinct retrieval intent. Concurrent Adds
-parallelize naturally because they go to different corpora; the
-matrix layer's job is to retrieve+merge across them, filter for
-relevance, and downgrade composition when strong models prove the
-matrix is anti-additive.
-
-**Components to port (in dependency order):**
-
-1. **Corpus builders** — Go equivalents of `scripts/build_*_corpus.ts`.
-   For each named corpus, a builder that reads source, splits into
-   chunks per the corpus's schema, embeds via `/v1/embed`, and adds
-   to a vectord index of the same name. Effort: **M** for the first
-   builder, **S** for each subsequent.
-
-2. **Multi-corpus retrieve+merge** (`internal/matrix/retrieve.go`) —
-   given a query and a list of corpus names, search each at top_k=K,
-   merge by score, return top N globally. Match Rust's pattern:
-   top_k=6 per corpus, top 8 globally before relevance filter.
-
-3. **Relevance filter** (`internal/matrix/relevance.go`) — port the
-   threshold-based filter from `mcp-server/observer.ts:/relevance`.
-   Drops adjacency-pollution chunks that share a corpus with the hit
-   but aren't actually about the query. `LH_RELEVANCE_FILTER` /
-   `LH_RELEVANCE_THRESHOLD` env knobs preserved.
-
-4. **Strong-model downgrade gate** (`internal/matrix/downgrade.go`) —
-   port `is_weak_model` + the `codereview_lakehouse → codereview_isolation`
-   flip from `mode.rs::execute`. Pass5 proved composed corpora lose
-   5/5 vs isolation on grok-4.1-fast (p=0.031); the gate is
-   load-bearing for paid-model retrieval quality.
-
-5. **Learning-loop integration** — write outcomes back to a
-   playbook-memory corpus (probably `lakehouse_answers_v1` analogue).
-   This is what makes the matrix INDEX a learning system rather than
-   static retrieval. Per `feedback_meta_index_vision.md`: this is the
-   north star, not the data structure.
-
-**Gateway routes:** `/v1/matrix/search` (multi-corpus retrieve+merge),
-`/v1/matrix/corpora` (list + metadata), `/v1/matrix/relevance` (filter
-endpoint, used by both internal callers and external tooling).
-
-**Acceptance gates:**
- G3.4.A — `/v1/matrix/search` against ≥3 corpora returns merged top-N
-  with corpus attribution per result.
- G3.4.B — Relevance filter drops at least the threshold-margin chunks
-  on a known adjacency-pollution test case.
- G3.4.C — Strong-model downgrade gate flips composed→isolation when
-  the model is non-weak; bypassed when caller sets `force_mode`.
- G3.4.D — Concurrent Adds across N=4 corpora parallelize (no shared
-  write-lock); Add throughput scales near-linearly with corpus count.
-
-**Persistence:** each corpus's vectord index persists via the existing
-G1P LHV1 format. The matrix layer is stateless above that — corpus
-list lives in catalog, retrieval params in config.
-
-**Why this is its own §3.x:** in Rust the matrix indexer was emergent
-and got reduced to "we have vectord" in earlier port-planning. The
-SPEC names it explicitly so the port preserves the multi-corpus
-retrieval shape AND the learning loop, not just the HNSW substrate.
-
-### §3.5 — Drift quantification (loop 5 of the PRD)
-
-**What it is.** PRD names "drift" as the 5th loop: quantify when
-historical decisions stop matching current reality. Distinct from
-the rating+distillation loop because drift is MEASUREMENT, not
-LEARNING. The learning loop says "this match worked, remember it";
-the drift loop says "this 4-month-old playbook entry — does it
-still match what the substrate would surface today?"
-
-**What's shipped (commit `be65f85`):**
-  - SCORER drift: re-runs current `distillation.ScoreRecord` over
-    historical (EvidenceRecord, persisted_category) pairs and
-    reports mismatches + a sorted shift matrix
-  - `internal/drift/drift.go` — pure-function `ComputeScorerDrift`
-  - 6 unit tests covering no-drift, shift detection, multi-shift
-    sorted-by-count, includeEntries flag, empty input, scorer-version
-    stamping
-
-**Future drift shapes (not shipped):**
-  - PLAYBOOK drift: re-run playbook queries through current
-    matrix-search; recorded answer not in top-K = drift
-  - EMBEDDING drift: KS-test on vector distribution at T1 vs T2
-  - AUDIT BASELINE drift: matches Rust `audit_baselines.jsonl`
-    longitudinal signal
-
-**Acceptance gates:**
- G3.5.A — A scorer-version bump triggers a non-zero `Drifted` count
-  on a corpus of historical ScoredRuns where the new logic produces
-  different categories than the persisted ones.
- G3.5.B — `ScorerDriftReport.ShiftMatrix` is deterministic-ordered
-  (count desc, ties broken alphabetically) so JSON output is stable
-  across runs.
-
-### §3.6 — Staffing-side structured filter
-
-**What it is.** Reality tests on the candidates + workers corpora
-(commits `0d1553c`, `a97881d`) surfaced that pure semantic retrieval
-can't gate by location/status/availability — the matrix indexer
-returns Production Workers for a Forklift+OSHA-30 query because
-nomic-embed-text's geometry doesn't separate the role labels well.
-Structured filtering is the addressable piece: pre-filter the
-candidate set on metadata fields BEFORE semantic ranking.
-
-**What's shipped (commit `b199093`):**
-  - `SearchRequest.MetadataFilter` — `map[string]any` of metadata
-    field → expected value (single value or list-of-values for OR
-    semantics within a key, AND across keys)
-  - Post-retrieval filter applied before top-K truncation in
-    `internal/matrix/retrieve.go`
-  - `SearchResponse.MetadataFilterDropped` for telemetry on filter
-    aggressiveness
-  - 7 unit tests covering nil filter, missing metadata, exact match,
-    AND across keys, OR within list, bool match, malformed JSON
-
-**Deferred:**
-  - Pre-retrieval SQL gate via `queryd` (the actual hybrid). The
-    post-retrieval filter is an MVP that helps when the candidate
-    set is mostly relevant; for aggressive filters that drop most
-    results, a SQL pre-filter into matrix retrieval would surface
-    the right candidates with less wasted embedding work.
-  - Filter language richer than equality (e.g. range, prefix, regex).
-
-**Acceptance gates:**
- G3.6.A — `MetadataFilter: {"state": "IL"}` against a mixed-state
-  corpus drops every non-IL result; `MetadataFilterDropped` reports
-  the count.
- G3.6.B — List filter `{"state": ["IL", "WI"]}` keeps both states,
-  drops the rest (OR within key).
- G3.6.C — Multi-key filter is AND: a result missing any key is
-  dropped, no exception.
-
-### §3.7 — Operational rating wiring
-
-**What it is.** PRD loop 4 (rating + distillation) needs real
-inflows to be a learning system rather than a substrate. The
-playbook-record endpoint (`06e7152`) takes one (query, answer,
-score) per call; productizing it into actual signal sources is what
-makes the system get smarter with use.
-
-**What's shipped (commit `6392772`):**
-  - `POST /v1/matrix/playbooks/bulk` — bulk-record N successes;
-    per-entry success/failure response so callers can see which of
-    a 4,701-row historical placement import succeeded vs which
-    failed validation.
-  - Single-record path from `06e7152` unchanged.
-
-**Deferred:**
-  - UI shim for click-tracking (no Go demo UI yet — the Bun demo at
-    `devop.live/lakehouse/` is still serving the public surface).
-    When the Go UI lands or a feedback API is added to the Bun UI,
-    every coordinator click → bulk-batched POST → playbook entry.
-  - Negative feedback (this match didn't work). Currently only
-    positive scores are recorded; a rejection signal would help the
-    learning loop avoid pushing bad matches.
-  - Time-decay on playbook scores so stale recommendations attenuate.
-
-**Acceptance gates:**
- G3.7.A — Bulk POST of N entries returns `{recorded, failed,
-  results[]}` with per-entry IDs/errors, no single-entry failure
-  aborting the batch.
- G3.7.B — Each recorded entry surfaces in `/v1/matrix/search` with
-  `use_playbook=true` after a re-query.
-
-### §3.8 — Observer-KB workflow runner (Archon-style multi-pass)
-
-**What it is.** The architectural pattern documented in the Rust
-`observer-kb` branch (10 commits ahead of main, never merged) and
-proven by `/home/profit/external/Archon`'s workflow engine. Multiple
-mode passes processing data, with each pass an objective measurement
-that contributes to the KB:
-
-```
-Raw data
-   ↓ Mode: EXTRACT       structured facts/entities/relationships
-   ↓ Mode: VALIDATOR     fact-check, confidence 1-10
-   ↓ Mode: HALLUCINATION verify each claim, flag likely fabrications
-   ↓ Mode: CONSENSUS     multiple passes until extraction converges
-   ↓ Mode: REDTEAM       attack what survived, patch what fails
-   ↓ Mode: PIPELINE      clean → Q&A structure → topic group → rank
-   ↓ RENDER              curated doc anchored on questions
-```
-
-This is the *orchestrator* missing from §3.4 components 1-5: each
-SPEC §3.4 piece (relevance, downgrade, scorer, drift) is a "mode";
-what's missing is the workflow engine that chains them.
-
-**Why it matters.** Per the PRD's product vision: the observer
-should make actionable decisions based on watching what's
-successful. The workflow runner is how observers compose modes
-into multi-pass pipelines that score outcomes rigorously enough
-to feed the KB and inform the playbook substrate.
-
-**Reference materials on the system:**
-  - `/home/profit/lakehouse/.archon/workflows/lakehouse-architect-review.yaml`
-    (committed `69919d9` in main) — proves Archon-via-Lakehouse
-    works with a 3-node `shape → weakness → improvement` workflow
-  - `/home/profit/external/Archon` — the upstream workflow engine
-    (cloned 2026-04-26); `packages/providers/src/community/pi/provider.ts`
-    has the local Lakehouse-routing mod committed locally as
-    `3f2afc8` (not pushed to upstream `coleam00/Archon`)
-  - Rust `observer-kb` branch (10 commits, +4338/-55506 LoC) —
-    `apps/observer-kb/docs/PRD.md` documents the multi-pass
-    architecture; `scripts/{deep_analysis,extract_knowledge,process_knowledge}.py`
-    are the Python prototypes that proved it on real ChatGPT/Claude
-    PDF data (496 topics, 300 decisions, 100 insights extracted)
-
-**Components to port (in dependency order):**
-
-1. **Workflow definition** (`internal/workflow/types.go`) — YAML
-   schema matching Archon's shape: `name`, `description`, `provider`,
-   `model`, list of `nodes` each with `id`, `prompt`, `allowed_tools`,
-   `effort`, `idle_timeout`, `depends_on`. The depends_on edges form
-   a DAG; the runner resolves topologically.
-
-2. **Node executor** (`internal/workflow/runner.go`) — given a
-   workflow and a starting context, walks the DAG, executes each
-   node by dispatching to the configured backend (matrix.Search,
-   distillation.ScoreRecord, drift.ComputeScorerDrift, or a generic
-   prompt-against-LLM via gateway `/v1/chat`), captures per-node
-   output, makes it available as `$<node_id>.output` in subsequent
-   nodes.
-
-3. **Provenance recording** — every node execution lands an
-   ObservedOp (via the observerd substrate from `bc9ab93`) with
-   `source: "workflow"`, the workflow name + node ID, input/output
-   summaries, and timing. The ring buffer + JSONL log become the
-   substrate for the rating+distillation loop's KB feed.
-
-4. **Mode catalog** (`internal/workflow/modes.go`) — registry of
-   the modes the runner can dispatch to. Each mode is a Go function
-   matching a uniform `func(ctx, input map[string]any) (map[string]any, error)`
-   signature so workflows can compose them. Initial modes from
-   §3.4: `matrix.search`, `matrix.relevance`, `matrix.downgrade`,
-   `playbook.record`, `playbook.lookup`, `distillation.score`,
-   `drift.scorer`. Plus `llm.chat` for free-form mode prompts.
-
-5. **HTTP surface** — `POST /v1/observer/workflow/run` accepts a
-   workflow YAML body + a starting context; returns the per-node
-   results + the chain of ObservedOps generated. `GET
-   /v1/observer/workflow/list` lists workflows in a known directory
-   for operator discoverability.
-
-**Why integrate into observerd, not a new service.** The observer
-is the system resource that watches and records. Workflows ARE
-observation patterns — multi-step processes whose every step is
-recorded. Putting the runner inside observerd keeps the
-"measurement → KB feed" wiring tight; a separate service would
-re-implement the recording layer.
-
-**Acceptance gates:**
- G3.8.A — Load a workflow YAML matching the Archon `lakehouse-architect-review.yaml`
-  shape; runner executes the 3-node DAG topologically.
- G3.8.B — Each node execution lands an ObservedOp with
-  `source: "workflow"` and the node's input/output. Stats endpoint
-  shows the workflow ops.
- G3.8.C — A node referencing `$<prior_node>.output` in its prompt
-  resolves correctly; missing reference is a clear error not a
-  silent empty string.
- G3.8.D — Mode catalog dispatches `matrix.search` invocation to
-  the matrixd backend without going through HTTP (in-process
-  function call when matrixd is co-resident).
-
-**Status:** PORT TARGET, not yet started. SPEC commits the design;
-implementation is its own wave (estimated **L** effort given the
-DAG runner + mode dispatch + provenance recording).
-
 ### §3.3 — UI (HTMX)

 **Approach:** server-rendered Go templates using `html/template`,
--- a/internal/corpusingest/ingest.go
+++ b/internal/corpusingest/ingest.go
@ -1,437 +0,0 @@
-// Package corpusingest is the generalized text→vector ingestion
-// pipeline. Originally extracted from scripts/staffing_500k/main.go;
-// reusable by any corpus-builder script that needs to embed a stream
-// of (id, text, metadata) rows and push them into a vectord index.
-//
-// Design: per-corpus Source impls own the parsing/column-mapping;
-// this package owns the parallel-embed dispatcher, batching, vectord
-// index lifecycle, and progress reporting. Adding a corpus is one
-// Source struct + one main.go that calls Run; no copy-pasted pipeline.
-//
-// Per docs/SPEC.md §3.4 component 1 (corpus builders): this is the
-// substrate the rest of the matrix indexer's value depends on. Get
-// the pipeline right, then iterate on builders.
-package corpusingest
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"log/slog"
-	"net/http"
-	"sync"
-	"sync/atomic"
-	"time"
-)
-
-// Row is one logical document in a corpus. Metadata may be any
-// JSON-marshalable value (struct, map, json.RawMessage); the library
-// marshals once per row before pushing to vectord.
-type Row struct {
-	ID       string
-	Text     string
-	Metadata any
-}
-
-// Source produces a stream of rows. Source lifecycle (open/close) is
-// owned by the caller; this package only consumes Next() until io.EOF.
-type Source interface {
-	// Next returns the next row or io.EOF when the source is drained.
-	// Other errors cause Run to abort with the error wrapped.
-	Next() (Row, error)
-}
-
-// Config drives one Run. Defaults match the Ollama-on-A4000 sweet
-// spot from the 500K validation; override per-deployment if needed.
-type Config struct {
-	GatewayURL   string // default "http://127.0.0.1:3110"
-	IndexName    string // required
-	Dimension    int    // required, must match the embed model output
-	Distance     string // default "cosine"
-	EmbedModel   string // optional; empty = embedd's default
-	EmbedBatch   int    // default 16, texts per /v1/embed call
-	EmbedWorkers int    // default 8, parallel embed goroutines
-	AddBatch     int    // default 1000, items per /v1/vectors/index/add call
-	Limit        int    // 0 = no limit (process all rows)
-	DropExisting bool   // true = DELETE index first; false = idempotent reuse
-	HTTPClient   *http.Client
-	// LogProgress is the interval between progress logs. 0 disables.
-	LogProgress time.Duration
-}
-
-// Stats reports run outcomes. FailedBatches counts embed-or-add
-// batches that errored out and were skipped (partial-failure
-// semantics). When non-zero, Run returns ErrPartialFailure so
-// callers can't accidentally treat "1 of 313 batches succeeded"
-// as a successful run.
-type Stats struct {
-	Scanned       int64
-	Embedded      int64
-	Added         int64
-	Wall          time.Duration
-	FailedBatches int64
-}
-
-// ErrPartialFailure signals that one or more batches errored during
-// Run. Stats.FailedBatches has the count; the caller decides
-// whether to retry / log / abort. Per 2026-04-29 cross-lineage
-// scrum (Opus WARN): the original behavior returned nil even when
-// 100% of batches failed silently, making "embedded=0/scanned=N"
-// look like an empty corpus rather than a broken pipeline.
-var ErrPartialFailure = errors.New("corpusingest: one or more batches failed")
-
-// Run executes the ingest pipeline. Returns on source EOF after all
-// in-flight jobs drain, on context cancellation, or on the first
-// embed/add error (errors are logged via slog and the pipeline
-// continues — partial-failure semantics; see comment inside).
-func Run(ctx context.Context, cfg Config, src Source) (Stats, error) {
-	cfg = applyDefaults(cfg)
-	if err := validateConfig(cfg); err != nil {
-		return Stats{}, err
-	}
-
-	t0 := time.Now()
-	if err := prepareIndex(ctx, cfg); err != nil {
-		return Stats{}, fmt.Errorf("prepare index: %w", err)
-	}
-
-	jobs := make(chan job, cfg.EmbedWorkers*2)
-
-	var (
-		totalEmbedded int64
-		totalAdded    int64
-		failedBatches int64
-	)
-
-	var wg sync.WaitGroup
-	for i := 0; i < cfg.EmbedWorkers; i++ {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			for j := range jobs {
-				vecs, err := embedBatch(ctx, cfg, j.texts)
-				if err != nil {
-					// Partial-failure semantics: log + continue. A wedged
-					// embed batch shouldn't kill 8 workers' worth of
-					// progress; Run returns ErrPartialFailure on any
-					// failure so callers can't miss the signal.
-					slog.Warn("corpusingest: embed batch failed",
-						"index", cfg.IndexName, "items", len(j.texts), "err", err)
-					atomic.AddInt64(&failedBatches, 1)
-					continue
-				}
-				// Defense against a degraded embed backend that returns
-				// fewer vectors than texts: vecs[i] would panic in
-				// addBatch otherwise. Caught by ContextCancel unit test.
-				if len(vecs) != len(j.ids) {
-					slog.Warn("corpusingest: embed returned wrong count",
-						"index", cfg.IndexName, "want", len(j.ids), "got", len(vecs))
-					atomic.AddInt64(&failedBatches, 1)
-					continue
-				}
-				atomic.AddInt64(&totalEmbedded, int64(len(vecs)))
-				if err := addBatch(ctx, cfg, j.ids, vecs, j.metas); err != nil {
-					slog.Warn("corpusingest: add batch failed",
-						"index", cfg.IndexName, "items", len(j.ids), "err", err)
-					atomic.AddInt64(&failedBatches, 1)
-					continue
-				}
-				atomic.AddInt64(&totalAdded, int64(len(j.ids)))
-			}
-		}()
-	}
-
-	stopProgress := make(chan struct{})
-	progressDone := make(chan struct{})
-	if cfg.LogProgress > 0 {
-		go func() {
-			defer close(progressDone)
-			ticker := time.NewTicker(cfg.LogProgress)
-			defer ticker.Stop()
-			for {
-				select {
-				case <-ticker.C:
-					slog.Info("corpusingest: progress",
-						"index", cfg.IndexName,
-						"embedded", atomic.LoadInt64(&totalEmbedded),
-						"added", atomic.LoadInt64(&totalAdded))
-				case <-stopProgress:
-					return
-				case <-ctx.Done():
-					return
-				}
-			}
-		}()
-	} else {
-		close(progressDone)
-	}
-
-	scanned, err := drainSource(ctx, cfg, src, jobs)
-	close(jobs)
-	wg.Wait()
-	close(stopProgress) // tell the progress goroutine to exit; would otherwise hang Run forever (caught by candidates e2e 2026-04-29)
-	<-progressDone
-
-	stats := Stats{
-		Scanned:       scanned,
-		Embedded:      atomic.LoadInt64(&totalEmbedded),
-		Added:         atomic.LoadInt64(&totalAdded),
-		Wall:          time.Since(t0),
-		FailedBatches: atomic.LoadInt64(&failedBatches),
-	}
-	if err != nil {
-		return stats, err
-	}
-	if stats.FailedBatches > 0 {
-		return stats, fmt.Errorf("%w: %d batches failed (embedded=%d added=%d scanned=%d)",
-			ErrPartialFailure, stats.FailedBatches, stats.Embedded, stats.Added, stats.Scanned)
-	}
-	return stats, nil
-}
-
-// drainSource pulls rows, batches them, and dispatches into jobs.
-// Returns when source EOFs, ctx cancels, or limit is hit.
-func drainSource(ctx context.Context, cfg Config, src Source, jobs chan<- job) (int64, error) {
-	curIDs := make([]string, 0, cfg.EmbedBatch)
-	curTexts := make([]string, 0, cfg.EmbedBatch)
-	curMetas := make([]json.RawMessage, 0, cfg.EmbedBatch)
-
-	flush := func() {
-		if len(curIDs) == 0 {
-			return
-		}
-		jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
-		curIDs = make([]string, 0, cfg.EmbedBatch)
-		curTexts = make([]string, 0, cfg.EmbedBatch)
-		curMetas = make([]json.RawMessage, 0, cfg.EmbedBatch)
-	}
-
-	var scanned int64
-	for {
-		if ctx.Err() != nil {
-			flush()
-			return scanned, ctx.Err()
-		}
-		row, err := src.Next()
-		if err == io.EOF {
-			flush()
-			return scanned, nil
-		}
-		if err != nil {
-			flush()
-			return scanned, fmt.Errorf("source row %d: %w", scanned, err)
-		}
-		if row.ID == "" {
-			return scanned, fmt.Errorf("source row %d: empty id", scanned)
-		}
-		// Empty Text would 400 at embedd; skip-with-warn rather than
-		// abort the whole run — a stray empty row shouldn't kill 500K.
-		if row.Text == "" {
-			slog.Warn("corpusingest: skipping row with empty text",
-				"index", cfg.IndexName, "id", row.ID)
-			scanned++
-			continue
-		}
-		meta, err := marshalMeta(row.Metadata)
-		if err != nil {
-			return scanned, fmt.Errorf("row %s: marshal metadata: %w", row.ID, err)
-		}
-		curIDs = append(curIDs, row.ID)
-		curTexts = append(curTexts, row.Text)
-		curMetas = append(curMetas, meta)
-		scanned++
-
-		if len(curIDs) >= cfg.EmbedBatch {
-			flush()
-		}
-		if cfg.Limit > 0 && scanned >= int64(cfg.Limit) {
-			flush()
-			return scanned, nil
-		}
-	}
-}
-
-// job is the unit of work between drainSource and the embed workers.
-// Internal type; kept small so the channel buffer doesn't bloat.
-type job struct {
-	ids   []string
-	texts []string
-	metas []json.RawMessage
-}
-
-func marshalMeta(v any) (json.RawMessage, error) {
-	if v == nil {
-		return nil, nil
-	}
-	if rm, ok := v.(json.RawMessage); ok {
-		return rm, nil
-	}
-	return json.Marshal(v)
-}
-
-// prepareIndex creates the vectord index, optionally dropping a
-// preexisting one. Idempotent on matching params: 409 from create is
-// treated as "already exists, reuse." If DropExisting is set, DELETE
-// fires first to give a clean slate.
-func prepareIndex(ctx context.Context, cfg Config) error {
-	if cfg.DropExisting {
-		if err := httpDelete(ctx, cfg.HTTPClient,
-			cfg.GatewayURL+"/v1/vectors/index/"+cfg.IndexName); err != nil {
-			// 404 (not found) is fine — drop-existing is idempotent.
-			slog.Debug("corpusingest: drop existing", "err", err)
-		}
-	}
-	body, _ := json.Marshal(map[string]any{
-		"name":      cfg.IndexName,
-		"dimension": cfg.Dimension,
-		"distance":  cfg.Distance,
-	})
-	code, msg, err := httpPost(ctx, cfg.HTTPClient, cfg.GatewayURL+"/v1/vectors/index", body)
-	if err != nil {
-		return err
-	}
-	switch code {
-	case http.StatusCreated:
-		slog.Info("corpusingest: created index",
-			"name", cfg.IndexName, "dim", cfg.Dimension, "distance", cfg.Distance)
-	case http.StatusConflict:
-		// Already exists — vectord didn't change params on conflict.
-		// Caller's responsibility to ensure existing dim/distance match.
-		slog.Info("corpusingest: index already exists, reusing", "name", cfg.IndexName)
-	default:
-		return fmt.Errorf("create index %d: %s", code, msg)
-	}
-	return nil
-}
-
-func embedBatch(ctx context.Context, cfg Config, texts []string) ([][]float32, error) {
-	body := map[string]any{"texts": texts}
-	if cfg.EmbedModel != "" {
-		body["model"] = cfg.EmbedModel
-	}
-	bs, _ := json.Marshal(body)
-	code, msg, raw, err := httpPostRaw(ctx, cfg.HTTPClient, cfg.GatewayURL+"/v1/embed", bs)
-	if err != nil {
-		return nil, err
-	}
-	if code != http.StatusOK {
-		return nil, fmt.Errorf("embed status %d: %s", code, msg)
-	}
-	var er struct {
-		Vectors [][]float32 `json:"vectors"`
-	}
-	if err := json.Unmarshal(raw, &er); err != nil {
-		return nil, fmt.Errorf("embed decode: %w", err)
-	}
-	return er.Vectors, nil
-}
-
-func addBatch(ctx context.Context, cfg Config, ids []string, vecs [][]float32, metas []json.RawMessage) error {
-	type addItem struct {
-		ID       string          `json:"id"`
-		Vector   []float32       `json:"vector"`
-		Metadata json.RawMessage `json:"metadata,omitempty"`
-	}
-	// Add-batch may exceed cfg.AddBatch when EmbedBatch divides into it
-	// non-evenly; vectord handles that fine. Keep one HTTP per job.
-	items := make([]addItem, len(ids))
-	for i := range ids {
-		items[i] = addItem{ID: ids[i], Vector: vecs[i], Metadata: metas[i]}
-	}
-	bs, _ := json.Marshal(map[string]any{"items": items})
-	code, msg, err := httpPost(ctx, cfg.HTTPClient,
-		cfg.GatewayURL+"/v1/vectors/index/"+cfg.IndexName+"/add", bs)
-	if err != nil {
-		return err
-	}
-	if code != http.StatusOK {
-		return fmt.Errorf("add status %d: %s", code, msg)
-	}
-	return nil
-}
-
-// ── HTTP helpers — small, no extra deps ─────────────────────────
-
-func httpPost(ctx context.Context, hc *http.Client, url string, body []byte) (int, string, error) {
-	code, msg, _, err := httpPostRaw(ctx, hc, url, body)
-	return code, msg, err
-}
-
-func httpPostRaw(ctx context.Context, hc *http.Client, url string, body []byte) (int, string, []byte, error) {
-	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-	if err != nil {
-		return 0, "", nil, err
-	}
-	req.Header.Set("Content-Type", "application/json")
-	resp, err := hc.Do(req)
-	if err != nil {
-		return 0, "", nil, err
-	}
-	defer resp.Body.Close()
-	raw, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return resp.StatusCode, "", nil, err
-	}
-	preview := raw
-	if len(preview) > 256 {
-		preview = preview[:256]
-	}
-	return resp.StatusCode, string(preview), raw, nil
-}
-
-func httpDelete(ctx context.Context, hc *http.Client, url string) error {
-	req, err := http.NewRequestWithContext(ctx, http.MethodDelete, url, nil)
-	if err != nil {
-		return err
-	}
-	resp, err := hc.Do(req)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-	io.Copy(io.Discard, resp.Body)
-	if resp.StatusCode >= 400 && resp.StatusCode != http.StatusNotFound {
-		return fmt.Errorf("delete status %d", resp.StatusCode)
-	}
-	return nil
-}
-
-// ── config validation + defaults ────────────────────────────────
-
-func applyDefaults(cfg Config) Config {
-	if cfg.GatewayURL == "" {
-		cfg.GatewayURL = "http://127.0.0.1:3110"
-	}
-	if cfg.Distance == "" {
-		cfg.Distance = "cosine"
-	}
-	if cfg.EmbedBatch <= 0 {
-		cfg.EmbedBatch = 16
-	}
-	if cfg.EmbedWorkers <= 0 {
-		cfg.EmbedWorkers = 8
-	}
-	if cfg.AddBatch <= 0 {
-		cfg.AddBatch = 1000
-	}
-	if cfg.HTTPClient == nil {
-		cfg.HTTPClient = &http.Client{Timeout: 5 * time.Minute}
-	}
-	if cfg.LogProgress < 0 {
-		cfg.LogProgress = 0
-	}
-	return cfg
-}
-
-func validateConfig(cfg Config) error {
-	if cfg.IndexName == "" {
-		return errors.New("corpusingest: IndexName is required")
-	}
-	if cfg.Dimension <= 0 {
-		return errors.New("corpusingest: Dimension must be > 0")
-	}
-	return nil
-}
--- a/internal/corpusingest/ingest_test.go
+++ b/internal/corpusingest/ingest_test.go
@ -1,455 +0,0 @@
-package corpusingest
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"sync"
-	"testing"
-	"time"
-)
-
-// fakeGateway records the embed + add calls corpusingest fires and
-// returns canned responses. The whole point of the unit test is to
-// validate the pipeline shape (request payloads, batching, stats)
-// without needing live embedd/vectord.
-type fakeGateway struct {
-	mu             sync.Mutex
-	embedCalls     int
-	embedTexts     [][]string  // texts per call
-	addCalls       int
-	addItems       [][]addItem // items per call
-	createCalled   bool
-	deleteCalled   bool
-	indexConflict  bool        // simulate "index already exists" → 409
-	embedDimension int
-}
-
-type addItem struct {
-	ID       string          `json:"id"`
-	Vector   []float32       `json:"vector"`
-	Metadata json.RawMessage `json:"metadata,omitempty"`
-}
-
-func newFakeGateway(dim int) *fakeGateway {
-	return &fakeGateway{embedDimension: dim}
-}
-
-func (f *fakeGateway) handler() http.Handler {
-	mux := http.NewServeMux()
-
-	mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodPost {
-			http.Error(w, "wrong method", http.StatusMethodNotAllowed)
-			return
-		}
-		f.mu.Lock()
-		f.createCalled = true
-		conflict := f.indexConflict
-		f.mu.Unlock()
-		if conflict {
-			http.Error(w, "exists", http.StatusConflict)
-			return
-		}
-		w.WriteHeader(http.StatusCreated)
-	})
-
-	mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
-		var req struct {
-			Texts []string `json:"texts"`
-		}
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), http.StatusBadRequest)
-			return
-		}
-		// Synthesize deterministic vectors: vector[i] = float32(i+1).
-		vecs := make([][]float32, len(req.Texts))
-		for i := range vecs {
-			v := make([]float32, f.embedDimension)
-			for j := range v {
-				v[j] = float32(i + j + 1)
-			}
-			vecs[i] = v
-		}
-		f.mu.Lock()
-		f.embedCalls++
-		// Copy because we'll release the slice after returning.
-		texts := append([]string(nil), req.Texts...)
-		f.embedTexts = append(f.embedTexts, texts)
-		f.mu.Unlock()
-		w.Header().Set("Content-Type", "application/json")
-		_ = json.NewEncoder(w).Encode(map[string]any{
-			"vectors":   vecs,
-			"dimension": f.embedDimension,
-			"model":     "fake-embed",
-		})
-	})
-
-	mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
-		// /v1/vectors/index/{name}/add
-		if !strings.HasSuffix(r.URL.Path, "/add") {
-			if r.Method == http.MethodDelete {
-				f.mu.Lock()
-				f.deleteCalled = true
-				f.mu.Unlock()
-				w.WriteHeader(http.StatusNoContent)
-				return
-			}
-			http.Error(w, "unhandled "+r.URL.Path, http.StatusNotFound)
-			return
-		}
-		var req struct {
-			Items []addItem `json:"items"`
-		}
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), http.StatusBadRequest)
-			return
-		}
-		f.mu.Lock()
-		f.addCalls++
-		f.addItems = append(f.addItems, append([]addItem(nil), req.Items...))
-		f.mu.Unlock()
-		_, _ = io.WriteString(w, `{"added":`+fmt.Sprint(len(req.Items))+`}`)
-	})
-
-	return mux
-}
-
-// staticSource yields a fixed slice of rows.
-type staticSource struct {
-	rows []Row
-	i    int
-}
-
-func (s *staticSource) Next() (Row, error) {
-	if s.i >= len(s.rows) {
-		return Row{}, io.EOF
-	}
-	r := s.rows[s.i]
-	s.i++
-	return r, nil
-}
-
-func TestRun_PipelineShapeAndStats(t *testing.T) {
-	const dim = 4
-	fg := newFakeGateway(dim)
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	rows := make([]Row, 50)
-	for i := range rows {
-		rows[i] = Row{
-			ID:       fmt.Sprintf("r-%03d", i),
-			Text:     fmt.Sprintf("row %d text", i),
-			Metadata: map[string]any{"i": i, "kind": "test"},
-		}
-	}
-
-	stats, err := Run(context.Background(), Config{
-		GatewayURL:   srv.URL,
-		IndexName:    "test_corpus",
-		Dimension:    dim,
-		Distance:     "cosine",
-		EmbedBatch:   16,
-		EmbedWorkers: 4,
-		HTTPClient:   srv.Client(),
-		LogProgress:  0,
-	}, &staticSource{rows: rows})
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-
-	if stats.Scanned != 50 {
-		t.Errorf("Scanned: want 50, got %d", stats.Scanned)
-	}
-	if stats.Embedded != 50 {
-		t.Errorf("Embedded: want 50, got %d", stats.Embedded)
-	}
-	if stats.Added != 50 {
-		t.Errorf("Added: want 50, got %d", stats.Added)
-	}
-	if !fg.createCalled {
-		t.Error("expected create-index to be called")
-	}
-	// 50 rows / 16 batch = ceil(50/16) = 4 batches → 4 embed calls + 4 add calls
-	if fg.embedCalls != 4 {
-		t.Errorf("embedCalls: want 4 (50 rows / 16 batch), got %d", fg.embedCalls)
-	}
-	if fg.addCalls != 4 {
-		t.Errorf("addCalls: want 4, got %d", fg.addCalls)
-	}
-
-	// Sum of texts across embed calls must be 50, and IDs across add
-	// calls must be every r-NNN exactly once.
-	totalTexts := 0
-	for _, ts := range fg.embedTexts {
-		totalTexts += len(ts)
-	}
-	if totalTexts != 50 {
-		t.Errorf("total embedded texts: want 50, got %d", totalTexts)
-	}
-	seen := make(map[string]bool)
-	for _, items := range fg.addItems {
-		for _, it := range items {
-			if seen[it.ID] {
-				t.Errorf("duplicate id in add stream: %s", it.ID)
-			}
-			seen[it.ID] = true
-			if len(it.Vector) != dim {
-				t.Errorf("vector dim: want %d, got %d", dim, len(it.Vector))
-			}
-		}
-	}
-	if len(seen) != 50 {
-		t.Errorf("unique ids added: want 50, got %d", len(seen))
-	}
-}
-
-func TestRun_DropExistingFiresDelete(t *testing.T) {
-	fg := newFakeGateway(4)
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	_, err := Run(context.Background(), Config{
-		GatewayURL:   srv.URL,
-		IndexName:    "drops_first",
-		Dimension:    4,
-		DropExisting: true,
-		HTTPClient:   srv.Client(),
-	}, &staticSource{rows: []Row{{ID: "x", Text: "y", Metadata: nil}}})
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-	if !fg.deleteCalled {
-		t.Error("expected delete-index to fire when DropExisting=true")
-	}
-}
-
-func TestRun_IndexAlreadyExistsIsReused(t *testing.T) {
-	fg := newFakeGateway(4)
-	fg.indexConflict = true // first POST /v1/vectors/index → 409
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	stats, err := Run(context.Background(), Config{
-		GatewayURL:   srv.URL,
-		IndexName:    "exists_already",
-		Dimension:    4,
-		HTTPClient:   srv.Client(),
-		EmbedWorkers: 1,
-	}, &staticSource{rows: []Row{{ID: "x", Text: "y", Metadata: nil}}})
-	if err != nil {
-		t.Fatalf("Run with existing index should succeed: %v", err)
-	}
-	if stats.Added != 1 {
-		t.Errorf("Added: want 1, got %d", stats.Added)
-	}
-}
-
-func TestRun_LimitStopsEarly(t *testing.T) {
-	fg := newFakeGateway(4)
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	rows := make([]Row, 100)
-	for i := range rows {
-		rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "t", Metadata: nil}
-	}
-
-	stats, err := Run(context.Background(), Config{
-		GatewayURL:   srv.URL,
-		IndexName:    "limited",
-		Dimension:    4,
-		Limit:        25,
-		EmbedBatch:   8,
-		EmbedWorkers: 2,
-		HTTPClient:   srv.Client(),
-	}, &staticSource{rows: rows})
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-	if stats.Scanned != 25 {
-		t.Errorf("Scanned: want 25 (limit), got %d", stats.Scanned)
-	}
-}
-
-func TestRun_EmptyTextSkipped(t *testing.T) {
-	fg := newFakeGateway(4)
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	rows := []Row{
-		{ID: "a", Text: "real text", Metadata: nil},
-		{ID: "b", Text: "", Metadata: nil}, // skipped
-		{ID: "c", Text: "more text", Metadata: nil},
-	}
-
-	stats, err := Run(context.Background(), Config{
-		GatewayURL: srv.URL, IndexName: "skip", Dimension: 4,
-		HTTPClient: srv.Client(),
-	}, &staticSource{rows: rows})
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-	if stats.Scanned != 3 {
-		t.Errorf("Scanned: want 3 (b is skipped but counted as scanned), got %d", stats.Scanned)
-	}
-	if stats.Added != 2 {
-		t.Errorf("Added: want 2 (b excluded from embed), got %d", stats.Added)
-	}
-}
-
-// TestRun_ProgressLoggerExits guards the bug caught 2026-04-29 in
-// the candidates e2e: when LogProgress > 0, the progress goroutine's
-// only exit was ctx.Done(). With context.Background() in the
-// production driver, Run hung forever after the pipeline finished.
-// This test bounds Run's wall to a few hundred ms — if it regresses,
-// the test deadline kicks in.
-func TestRun_ProgressLoggerExits(t *testing.T) {
-	fg := newFakeGateway(4)
-	srv := httptest.NewServer(fg.handler())
-	defer srv.Close()
-
-	rows := []Row{
-		{ID: "a", Text: "x", Metadata: nil},
-		{ID: "b", Text: "y", Metadata: nil},
-	}
-
-	done := make(chan error, 1)
-	go func() {
-		_, err := Run(context.Background(), Config{
-			GatewayURL:  srv.URL,
-			IndexName:   "progress_test",
-			Dimension:   4,
-			HTTPClient:  srv.Client(),
-			LogProgress: 50 * time.Millisecond,
-		}, &staticSource{rows: rows})
-		done <- err
-	}()
-
-	select {
-	case err := <-done:
-		if err != nil {
-			t.Fatalf("Run: %v", err)
-		}
-	case <-time.After(2 * time.Second):
-		t.Fatal("Run did not return within 2s — progress goroutine likely hanging")
-	}
-}
-
-// TestRun_NonzeroFailedBatchesReturnsError guards the 2026-04-29
-// scrum WARN: original behavior returned nil even when 100% of
-// batches failed, making "embedded=0/scanned=N" look like an empty
-// corpus rather than a broken pipeline.
-func TestRun_NonzeroFailedBatchesReturnsError(t *testing.T) {
-	// Fake gateway that fails every embed call.
-	mux := http.NewServeMux()
-	mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusCreated)
-	})
-	mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
-		http.Error(w, "embed failure injected", http.StatusBadGateway)
-	})
-	mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
-		// shouldn't reach here since embed fails first
-		http.Error(w, "should not be called", http.StatusInternalServerError)
-	})
-	srv := httptest.NewServer(mux)
-	defer srv.Close()
-
-	rows := make([]Row, 5)
-	for i := range rows {
-		rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "x"}
-	}
-
-	stats, err := Run(context.Background(), Config{
-		GatewayURL: srv.URL, IndexName: "fail_only", Dimension: 4,
-		EmbedBatch: 1, EmbedWorkers: 1, HTTPClient: srv.Client(),
-	}, &staticSource{rows: rows})
-
-	if !errors.Is(err, ErrPartialFailure) {
-		t.Errorf("want ErrPartialFailure, got %v", err)
-	}
-	if stats.FailedBatches == 0 {
-		t.Error("FailedBatches should be > 0 when embeds fail")
-	}
-	if stats.Added != 0 {
-		t.Errorf("Added: want 0 (all failed), got %d", stats.Added)
-	}
-}
-
-func TestRun_RequiresIndexName(t *testing.T) {
-	_, err := Run(context.Background(), Config{Dimension: 4},
-		&staticSource{rows: nil})
-	if err == nil || !strings.Contains(err.Error(), "IndexName") {
-		t.Errorf("want IndexName-required error, got %v", err)
-	}
-}
-
-func TestRun_RequiresDimension(t *testing.T) {
-	_, err := Run(context.Background(), Config{IndexName: "x"},
-		&staticSource{rows: nil})
-	if err == nil || !strings.Contains(err.Error(), "Dimension") {
-		t.Errorf("want Dimension-required error, got %v", err)
-	}
-}
-
-// TestRun_ContextCancel verifies the pipeline drains cleanly when
-// ctx is cancelled mid-run. Source returns rows fast enough that
-// without ctx the run would complete; cancelling early should stop
-// well before all 1000 rows are processed.
-func TestRun_ContextCancel(t *testing.T) {
-	fg := newFakeGateway(4)
-	// Slow embed handler: each call sleeps 50ms.
-	mux := http.NewServeMux()
-	mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusCreated)
-	})
-	mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
-		var req struct {
-			Texts []string `json:"texts"`
-		}
-		_ = json.NewDecoder(r.Body).Decode(&req)
-		// Simulate slow-but-valid backend so we test ctx cancel, not
-		// degraded-payload handling (that's covered in production by
-		// the len-mismatch guard in Run's worker).
-		time.Sleep(50 * time.Millisecond)
-		_ = fg
-		vecs := make([][]float32, len(req.Texts))
-		for i := range vecs {
-			vecs[i] = []float32{1, 2, 3, 4}
-		}
-		_ = json.NewEncoder(w).Encode(map[string]any{
-			"vectors":   vecs,
-			"dimension": 4,
-			"model":     "x",
-		})
-	})
-	mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
-		_, _ = io.WriteString(w, `{}`)
-	})
-	srv := httptest.NewServer(mux)
-	defer srv.Close()
-
-	rows := make([]Row, 1000)
-	for i := range rows {
-		rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "t"}
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel()
-
-	stats, err := Run(ctx, Config{
-		GatewayURL: srv.URL, IndexName: "cancel_me", Dimension: 4,
-		EmbedBatch: 1, EmbedWorkers: 1, HTTPClient: srv.Client(),
-	}, &staticSource{rows: rows})
-	// Either an error or a partial stats; the point is "didn't process all 1000."
-	if stats.Scanned >= 1000 {
-		t.Errorf("ctx cancel did not stop early: scanned=%d err=%v", stats.Scanned, err)
-	}
-}
--- a/internal/distillation/scorer.go
+++ b/internal/distillation/scorer.go
@ -1,410 +0,0 @@
-package distillation
-
-// scorer.go — pure deterministic Success Scorer (port of Rust
-// scripts/distillation/scorer.ts at e7636f2).
-//
-// Takes one EvidenceRecord, returns category + reasons + sub_scores.
-// NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input
-// → identical output forever. Same contract as the Rust source —
-// future scoring-rule changes bump ScorerVersion atomically with
-// the logic.
-//
-// Three-class strategy mirrors the Rust source taxonomy
-// (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md):
-//
-//   CLASS A — verdict-bearing
-//     scrum_reviews, observer_reviews, audits, contract_analyses
-//     Direct scoring from existing markers / observer_verdict
-//
-//   CLASS B — telemetry-rich
-//     auto_apply, outcomes, mode_experiments
-//     Markers exist but partial; needs_human_review fills the gap
-//
-//   CLASS C — pure-extraction (no native scoring signal)
-//     distilled_*, audit_facts, observer_escalations
-//     Default needs_human_review; v2 will JOIN to parent verdict
-
-import (
-	"crypto/sha256"
-	"encoding/hex"
-	"encoding/json"
-	"fmt"
-	"strconv"
-	"strings"
-)
-
-// sourceClass categorizes an EvidenceRecord's source_file for the
-// scorer's three-class dispatch.
-type sourceClass string
-
-const (
-	classVerdict    sourceClass = "verdict"
-	classTelemetry  sourceClass = "telemetry"
-	classExtraction sourceClass = "extraction"
-)
-
-// sourceClassFor maps a source_file (from provenance) to a class.
-// Centralized so adding a new source is a one-line change. Mirrors
-// the Rust switch on the stem (data/_kb/X.jsonl → X).
-func sourceClassFor(sourceFile string) sourceClass {
-	stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
-	switch stem {
-	case "scrum_reviews", "observer_reviews", "audits", "contract_analyses":
-		return classVerdict
-	case "auto_apply", "outcomes", "mode_experiments":
-		return classTelemetry
-	case "distilled_facts", "distilled_procedures", "distilled_config_hints",
-		"audit_facts", "observer_escalations":
-		return classExtraction
-	default:
-		// Unknown source → most conservative path (forces
-		// needs_human_review until a transform is added).
-		return classExtraction
-	}
-}
-
-// stemOf extracts the stable corpus identifier from a source_file.
-// E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews".
-func stemOf(sourceFile string) string {
-	return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
-}
-
-// ScoreOutput is the scorer's return shape — category + reasons +
-// the captured sub-signals. Reasons is always non-empty (validator
-// requires it).
-type ScoreOutput struct {
-	Category  ScoreCategory
-	Reasons   []string
-	SubScores *SubScores
-}
-
-// ScoreRecord dispatches an EvidenceRecord to the appropriate class
-// scorer and returns the verdict + reasons + sub-scores. Pure
-// function. Caller wraps the output in a ScoredRun via BuildScoredRun
-// for the on-wire shape.
-func ScoreRecord(rec EvidenceRecord) ScoreOutput {
-	cls := sourceClassFor(rec.Provenance.SourceFile)
-	stem := stemOf(rec.Provenance.SourceFile)
-
-	switch cls {
-	case classVerdict:
-		switch stem {
-		case "scrum_reviews":
-			return scoreScrumReview(rec)
-		case "observer_reviews":
-			return scoreObserverReview(rec)
-		case "audits":
-			return scoreAudit(rec)
-		case "contract_analyses":
-			return scoreContractAnalysis(rec)
-		}
-	case classTelemetry:
-		switch stem {
-		case "auto_apply":
-			return scoreAutoApply(rec)
-		case "outcomes":
-			return scoreOutcomes(rec)
-		case "mode_experiments":
-			return scoreModeExperiment(rec)
-		}
-	}
-	return scoreExtraction()
-}
-
-// BuildScoredRun composes a complete ScoredRun for persistence.
-// Caller supplies recorded_at + the source file path/line offset.
-// SigHash is computed deterministically from the EvidenceRecord
-// JSON; ScoredRun traces to the materialized evidence row.
-func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) {
-	out := ScoreRecord(rec)
-	sig, err := canonicalSha256(rec)
-	if err != nil {
-		return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err)
-	}
-	return ScoredRun{
-		SchemaVersion:  ScoredRunSchemaVersion,
-		EvidenceRunID:  rec.RunID,
-		EvidenceTaskID: rec.TaskID,
-		Category:       out.Category,
-		Reasons:        out.Reasons,
-		ScoredAt:       recordedAt,
-		ScorerVersion:  ScorerVersion,
-		SubScores:      out.SubScores,
-		Provenance: Provenance{
-			SourceFile: sourceFile,
-			LineOffset: lineOffset,
-			SigHash:    sig,
-			RecordedAt: recordedAt,
-		},
-	}, nil
-}
-
-// canonicalSha256 hashes a value's canonical JSON encoding. Used
-// for ScoredRun.Provenance.SigHash. Matches the Rust pattern of
-// "hash the structured object, not the raw source bytes" so
-// re-materialization with same logic produces same hash.
-func canonicalSha256(v any) (string, error) {
-	bs, err := json.Marshal(v)
-	if err != nil {
-		return "", err
-	}
-	sum := sha256.Sum256(bs)
-	return hex.EncodeToString(sum[:]), nil
-}
-
-// ─── Class A: verdict-bearing ────────────────────────────────────
-
-func scoreScrumReview(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_")
-	if successMarker == "" {
-		return ScoreOutput{
-			Category: CategoryNeedsHumanReview,
-			Reasons:  []string{"scrum_review missing accepted_on_attempt_* success marker"},
-			SubScores: subs,
-		}
-	}
-	attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_")
-	attempt, err := strconv.Atoi(attemptStr)
-	if err != nil {
-		return ScoreOutput{
-			Category: CategoryNeedsHumanReview,
-			Reasons:  []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr},
-			SubScores: subs,
-		}
-	}
-	subs.AcceptedOnAttempt = &attempt
-	switch {
-	case attempt == 1:
-		return ScoreOutput{
-			Category: CategoryAccepted,
-			Reasons:  []string{"scrum: accepted on first attempt"},
-			SubScores: subs,
-		}
-	case attempt <= 3:
-		return ScoreOutput{
-			Category: CategoryPartiallyAccepted,
-			Reasons:  []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)},
-			SubScores: subs,
-		}
-	default:
-		return ScoreOutput{
-			Category: CategoryPartiallyAccepted,
-			Reasons:  []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)},
-			SubScores: subs,
-		}
-	}
-}
-
-func scoreObserverReview(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	switch r.ObserverVerdict {
-	case VerdictAccept:
-		subs.ObserverVerdict = VerdictAccept
-		return ScoreOutput{
-			Category: CategoryAccepted,
-			Reasons:  []string{"observer accepted the reviewed attempt"},
-			SubScores: subs,
-		}
-	case VerdictReject:
-		subs.ObserverVerdict = VerdictReject
-		return ScoreOutput{
-			Category: CategoryRejected,
-			Reasons:  []string{"observer rejected the reviewed attempt"},
-			SubScores: subs,
-		}
-	case VerdictCycle:
-		subs.ObserverVerdict = VerdictCycle
-		return ScoreOutput{
-			Category: CategoryPartiallyAccepted,
-			Reasons:  []string{"observer flagged the attempt as cycling — partial signal"},
-			SubScores: subs,
-		}
-	default:
-		return ScoreOutput{
-			Category: CategoryNeedsHumanReview,
-			Reasons:  []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)},
-			SubScores: subs,
-		}
-	}
-}
-
-func scoreAudit(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	succ := r.SuccessMarkers
-	fail := r.FailureMarkers
-
-	// Legacy markers (back-compat with pre-fix materializations).
-	if contains(succ, "approved") {
-		return ScoreOutput{Category: CategoryAccepted,
-			Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs}
-	}
-	if contains(fail, "blocked") {
-		return ScoreOutput{Category: CategoryRejected,
-			Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs}
-	}
-	if contains(fail, "request_changes") {
-		return ScoreOutput{Category: CategoryPartiallyAccepted,
-			Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs}
-	}
-
-	// Severity-derived markers (Phase 2 transform).
-	sevSucc := findPrefix(succ, "audit_severity_")
-	sevFail := findPrefix(fail, "audit_severity_")
-	if sevSucc != "" {
-		return ScoreOutput{Category: CategoryAccepted,
-			Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs}
-	}
-	if sevFail == "audit_severity_medium" {
-		return ScoreOutput{Category: CategoryPartiallyAccepted,
-			Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs}
-	}
-	if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" {
-		return ScoreOutput{Category: CategoryRejected,
-			Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs}
-	}
-	return ScoreOutput{Category: CategoryNeedsHumanReview,
-		Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs}
-}
-
-func scoreContractAnalysis(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	// failure_markers takes precedence: explicit rejection beats absent verdict.
-	if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject {
-		subs.ObserverVerdict = VerdictReject
-		return ScoreOutput{Category: CategoryRejected,
-			Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs}
-	}
-	switch r.ObserverVerdict {
-	case VerdictAccept:
-		subs.ObserverVerdict = VerdictAccept
-		return ScoreOutput{Category: CategoryAccepted,
-			Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs}
-	case VerdictCycle:
-		subs.ObserverVerdict = VerdictCycle
-		return ScoreOutput{Category: CategoryPartiallyAccepted,
-			Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs}
-	}
-	return ScoreOutput{Category: CategoryNeedsHumanReview,
-		Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs}
-}
-
-// ─── Class B: telemetry-rich ─────────────────────────────────────
-
-func scoreAutoApply(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	if contains(r.SuccessMarkers, "committed") {
-		t := true
-		subs.CargoGreen = &t
-		return ScoreOutput{Category: CategoryAccepted,
-			Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"},
-			SubScores: subs}
-	}
-	reverted := findContaining(r.FailureMarkers, "reverted")
-	if reverted != "" {
-		if strings.Contains(reverted, "build_red") {
-			f := false
-			subs.CargoGreen = &f
-		}
-		return ScoreOutput{Category: CategoryRejected,
-			Reasons: []string{"auto_apply: " + reverted}, SubScores: subs}
-	}
-	return ScoreOutput{Category: CategoryNeedsHumanReview,
-		Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"},
-		SubScores: subs}
-}
-
-func scoreOutcomes(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	if contains(r.SuccessMarkers, "all_events_ok") {
-		return ScoreOutput{Category: CategoryAccepted,
-			Reasons: []string{"outcomes: all events ok"}, SubScores: subs}
-	}
-	if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 {
-		return ScoreOutput{Category: CategoryPartiallyAccepted,
-			Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))},
-			SubScores: subs}
-	}
-	return ScoreOutput{Category: CategoryNeedsHumanReview,
-		Reasons: []string{"outcomes: no decisive marker — defer to human"},
-		SubScores: subs}
-}
-
-func scoreModeExperiment(r EvidenceRecord) ScoreOutput {
-	subs := &SubScores{}
-	if strings.TrimSpace(r.Text) == "" {
-		return ScoreOutput{Category: CategoryRejected,
-			Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs}
-	}
-	if r.LatencyMs > 120_000 {
-		return ScoreOutput{Category: CategoryPartiallyAccepted,
-			Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)},
-			SubScores: subs}
-	}
-	return ScoreOutput{Category: CategoryNeedsHumanReview,
-		Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"},
-		SubScores: subs}
-}
-
-// ─── Class C: pure-extraction ────────────────────────────────────
-
-func scoreExtraction() ScoreOutput {
-	return ScoreOutput{
-		Category: CategoryNeedsHumanReview,
-		Reasons:  []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"},
-		SubScores: &SubScores{},
-	}
-}
-
-// ─── Internal helpers ────────────────────────────────────────────
-
-func contains(slice []string, want string) bool {
-	for _, s := range slice {
-		if s == want {
-			return true
-		}
-	}
-	return false
-}
-
-func findPrefix(slice []string, prefix string) string {
-	for _, s := range slice {
-		if strings.HasPrefix(s, prefix) {
-			return s
-		}
-	}
-	return ""
-}
-
-func findContaining(slice []string, sub string) string {
-	for _, s := range slice {
-		if strings.Contains(s, sub) {
-			return s
-		}
-	}
-	return ""
-}
-
-func numericFromMap(m map[string]any, key string) float64 {
-	if m == nil {
-		return 0
-	}
-	v, ok := m[key]
-	if !ok {
-		return 0
-	}
-	switch n := v.(type) {
-	case int:
-		return float64(n)
-	case int64:
-		return float64(n)
-	case float32:
-		return float64(n)
-	case float64:
-		return n
-	case json.Number:
-		f, _ := n.Float64()
-		return f
-	}
-	return 0
-}
--- a/internal/distillation/scorer_test.go
+++ b/internal/distillation/scorer_test.go
@ -1,375 +0,0 @@
-package distillation
-
-import (
-	"errors"
-	"strings"
-	"testing"
-)
-
-func mkRecord(sourceFile string) EvidenceRecord {
-	return EvidenceRecord{
-		RunID:         "run-1",
-		TaskID:        "task-1",
-		Timestamp:     "2026-04-29T12:00:00Z",
-		SchemaVersion: EvidenceSchemaVersion,
-		Provenance: Provenance{
-			SourceFile: sourceFile,
-			SigHash:    "deadbeef",
-			RecordedAt: "2026-04-29T12:00:01Z",
-		},
-	}
-}
-
-func TestSourceClassFor(t *testing.T) {
-	cases := []struct {
-		path string
-		want sourceClass
-	}{
-		{"data/_kb/scrum_reviews.jsonl", classVerdict},
-		{"data/_kb/observer_reviews.jsonl", classVerdict},
-		{"data/_kb/audits.jsonl", classVerdict},
-		{"data/_kb/contract_analyses.jsonl", classVerdict},
-		{"data/_kb/auto_apply.jsonl", classTelemetry},
-		{"data/_kb/outcomes.jsonl", classTelemetry},
-		{"data/_kb/mode_experiments.jsonl", classTelemetry},
-		{"data/_kb/distilled_facts.jsonl", classExtraction},
-		{"data/_kb/audit_facts.jsonl", classExtraction},
-		{"data/_kb/observer_escalations.jsonl", classExtraction},
-		{"data/_kb/wholly_unknown.jsonl", classExtraction}, // unknown → extraction (conservative)
-	}
-	for _, c := range cases {
-		got := sourceClassFor(c.path)
-		if got != c.want {
-			t.Errorf("sourceClassFor(%q): want %q, got %q", c.path, c.want, got)
-		}
-	}
-}
-
-func TestScoreScrumReview(t *testing.T) {
-	cases := []struct {
-		name           string
-		successMarkers []string
-		wantCategory   ScoreCategory
-		wantReasonSub  string
-	}{
-		{
-			name:           "first attempt → accepted",
-			successMarkers: []string{"accepted_on_attempt_1"},
-			wantCategory:   CategoryAccepted,
-			wantReasonSub:  "first attempt",
-		},
-		{
-			name:           "second attempt → partial",
-			successMarkers: []string{"accepted_on_attempt_2"},
-			wantCategory:   CategoryPartiallyAccepted,
-			wantReasonSub:  "after 2 attempts",
-		},
-		{
-			name:           "fourth attempt → partial (high-cost)",
-			successMarkers: []string{"accepted_on_attempt_4"},
-			wantCategory:   CategoryPartiallyAccepted,
-			wantReasonSub:  "high-cost",
-		},
-		{
-			name:           "missing marker → needs_human_review",
-			successMarkers: []string{},
-			wantCategory:   CategoryNeedsHumanReview,
-			wantReasonSub:  "missing accepted_on_attempt",
-		},
-	}
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			rec := mkRecord("data/_kb/scrum_reviews.jsonl")
-			rec.SuccessMarkers = c.successMarkers
-			out := ScoreRecord(rec)
-			if out.Category != c.wantCategory {
-				t.Errorf("category: want %q, got %q (reasons=%v)", c.wantCategory, out.Category, out.Reasons)
-			}
-			if !reasonsContain(out.Reasons, c.wantReasonSub) {
-				t.Errorf("reasons missing %q: %v", c.wantReasonSub, out.Reasons)
-			}
-		})
-	}
-}
-
-func TestScoreObserverReview(t *testing.T) {
-	cases := []struct {
-		verdict ObserverVerdict
-		want    ScoreCategory
-	}{
-		{VerdictAccept, CategoryAccepted},
-		{VerdictReject, CategoryRejected},
-		{VerdictCycle, CategoryPartiallyAccepted},
-		{"", CategoryNeedsHumanReview},
-		{"weird-verdict", CategoryNeedsHumanReview},
-	}
-	for _, c := range cases {
-		rec := mkRecord("data/_kb/observer_reviews.jsonl")
-		rec.ObserverVerdict = c.verdict
-		out := ScoreRecord(rec)
-		if out.Category != c.want {
-			t.Errorf("verdict=%q: want %q, got %q", c.verdict, c.want, out.Category)
-		}
-	}
-}
-
-func TestScoreAudit_LegacyAndSeverityMarkers(t *testing.T) {
-	cases := []struct {
-		name string
-		succ []string
-		fail []string
-		want ScoreCategory
-	}{
-		{"legacy approved", []string{"approved"}, nil, CategoryAccepted},
-		{"legacy blocked", nil, []string{"blocked"}, CategoryRejected},
-		{"legacy request_changes", nil, []string{"request_changes"}, CategoryPartiallyAccepted},
-		{"severity_low → accepted", []string{"audit_severity_low"}, nil, CategoryAccepted},
-		{"severity_info → accepted", []string{"audit_severity_info"}, nil, CategoryAccepted},
-		{"severity_medium fail → partial", nil, []string{"audit_severity_medium"}, CategoryPartiallyAccepted},
-		{"severity_high → rejected", nil, []string{"audit_severity_high"}, CategoryRejected},
-		{"severity_critical → rejected", nil, []string{"audit_severity_critical"}, CategoryRejected},
-		{"no markers", nil, nil, CategoryNeedsHumanReview},
-	}
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			rec := mkRecord("data/_kb/audits.jsonl")
-			rec.SuccessMarkers = c.succ
-			rec.FailureMarkers = c.fail
-			out := ScoreRecord(rec)
-			if out.Category != c.want {
-				t.Errorf("want %q, got %q (reasons=%v)", c.want, out.Category, out.Reasons)
-			}
-		})
-	}
-}
-
-func TestScoreAutoApply(t *testing.T) {
-	cases := []struct {
-		name string
-		succ []string
-		fail []string
-		want ScoreCategory
-	}{
-		{"committed → accepted", []string{"committed"}, nil, CategoryAccepted},
-		{"reverted_build_red → rejected", nil, []string{"reverted_build_red"}, CategoryRejected},
-		{"reverted other → rejected", nil, []string{"reverted_warning_count_up"}, CategoryRejected},
-		{"no signal → needs_human", nil, nil, CategoryNeedsHumanReview},
-	}
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			rec := mkRecord("data/_kb/auto_apply.jsonl")
-			rec.SuccessMarkers = c.succ
-			rec.FailureMarkers = c.fail
-			out := ScoreRecord(rec)
-			if out.Category != c.want {
-				t.Errorf("want %q, got %q", c.want, out.Category)
-			}
-		})
-	}
-}
-
-func TestScoreOutcomes(t *testing.T) {
-	rec := mkRecord("data/_kb/outcomes.jsonl")
-	rec.SuccessMarkers = []string{"all_events_ok"}
-	if out := ScoreRecord(rec); out.Category != CategoryAccepted {
-		t.Errorf("all_events_ok: want accepted, got %q", out.Category)
-	}
-
-	rec2 := mkRecord("data/_kb/outcomes.jsonl")
-	rec2.ValidationResults = map[string]any{"gap_signals": float64(2)}
-	if out := ScoreRecord(rec2); out.Category != CategoryPartiallyAccepted {
-		t.Errorf("gap_signals=2: want partial, got %q (reasons=%v)", out.Category, out.Reasons)
-	}
-
-	rec3 := mkRecord("data/_kb/outcomes.jsonl")
-	if out := ScoreRecord(rec3); out.Category != CategoryNeedsHumanReview {
-		t.Errorf("no signal: want needs_human, got %q", out.Category)
-	}
-}
-
-func TestScoreModeExperiment(t *testing.T) {
-	rec := mkRecord("data/_kb/mode_experiments.jsonl")
-	rec.Text = ""
-	if out := ScoreRecord(rec); out.Category != CategoryRejected {
-		t.Errorf("empty text: want rejected, got %q", out.Category)
-	}
-
-	rec.Text = "real response"
-	rec.LatencyMs = 130_000
-	if out := ScoreRecord(rec); out.Category != CategoryPartiallyAccepted {
-		t.Errorf("over latency cap: want partial, got %q", out.Category)
-	}
-
-	rec.LatencyMs = 5000
-	if out := ScoreRecord(rec); out.Category != CategoryNeedsHumanReview {
-		t.Errorf("normal: want needs_human (verdict not yet wired), got %q", out.Category)
-	}
-}
-
-func TestScoreExtraction_Defaults(t *testing.T) {
-	for _, src := range []string{
-		"data/_kb/distilled_facts.jsonl",
-		"data/_kb/distilled_procedures.jsonl",
-		"data/_kb/audit_facts.jsonl",
-		"data/_kb/observer_escalations.jsonl",
-	} {
-		rec := mkRecord(src)
-		out := ScoreRecord(rec)
-		if out.Category != CategoryNeedsHumanReview {
-			t.Errorf("%s: want needs_human_review, got %q", src, out.Category)
-		}
-	}
-}
-
-// ─── Contamination firewall — the safety-critical guarantee ───────
-
-func TestValidateSftSample_RejectsContaminationCategories(t *testing.T) {
-	for _, contaminated := range []SftQualityScore{
-		SftQualityScore("rejected"),
-		SftQualityScore("needs_human_review"),
-	} {
-		s := goodSftSample()
-		s.QualityScore = contaminated
-		err := ValidateSftSample(s)
-		if err == nil {
-			t.Errorf("contaminated quality_score=%q should fail validation", contaminated)
-			continue
-		}
-		if !errors.Is(err, ErrSftContamination) {
-			t.Errorf("contaminated %q: want errors.Is(err, ErrSftContamination), got %v", contaminated, err)
-		}
-	}
-}
-
-func TestValidateSftSample_AcceptsLegalCategories(t *testing.T) {
-	for _, legal := range []SftQualityScore{SftQualityAccepted, SftQualityPartiallyAccepted} {
-		s := goodSftSample()
-		s.QualityScore = legal
-		if err := ValidateSftSample(s); err != nil {
-			t.Errorf("legal quality_score=%q failed: %v", legal, err)
-		}
-	}
-}
-
-func TestValidateSftSample_RejectsTypoCategory(t *testing.T) {
-	s := goodSftSample()
-	s.QualityScore = "approved" // close to "accepted" but wrong
-	err := ValidateSftSample(s)
-	if err == nil {
-		t.Fatal("typo category should fail validation")
-	}
-	// Typo is NOT contamination — should be a regular ValidationError,
-	// not the firewall sentinel. This distinguishes "you typo'd" from
-	// "you broke the spec."
-	if errors.Is(err, ErrSftContamination) {
-		t.Error("typo should not surface as ErrSftContamination")
-	}
-}
-
-func TestValidateSftSample_RejectsEmptyPair(t *testing.T) {
-	s := goodSftSample()
-	s.Instruction = "  "
-	if err := ValidateSftSample(s); err == nil {
-		t.Error("whitespace-only instruction should fail")
-	}
-
-	s2 := goodSftSample()
-	s2.Response = ""
-	if err := ValidateSftSample(s2); err == nil {
-		t.Error("empty response should fail")
-	}
-}
-
-func TestValidateScoredRun_ReasonsRequired(t *testing.T) {
-	r := ScoredRun{
-		SchemaVersion:  ScoredRunSchemaVersion,
-		EvidenceRunID:  "x",
-		EvidenceTaskID: "y",
-		Category:       CategoryAccepted,
-		Reasons:        nil, // empty — must fail
-		ScoredAt:       "2026-04-29T12:00:00Z",
-		ScorerVersion:  ScorerVersion,
-		Provenance: Provenance{
-			SourceFile: "data/_kb/scrum_reviews.jsonl",
-			SigHash:    "abc",
-			RecordedAt: "2026-04-29T12:00:00Z",
-		},
-	}
-	err := ValidateScoredRun(r)
-	if err == nil {
-		t.Fatal("empty reasons should fail")
-	}
-	if !strings.Contains(err.Error(), "reasons") {
-		t.Errorf("error should mention reasons: %v", err)
-	}
-}
-
-func TestBuildScoredRun_DeterministicSigHash(t *testing.T) {
-	rec := mkRecord("data/_kb/scrum_reviews.jsonl")
-	rec.SuccessMarkers = []string{"accepted_on_attempt_1"}
-
-	r1, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
-	if err != nil {
-		t.Fatal(err)
-	}
-	r2, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if r1.Provenance.SigHash != r2.Provenance.SigHash {
-		t.Errorf("identical EvidenceRecord should produce identical sig_hash: %s vs %s",
-			r1.Provenance.SigHash, r2.Provenance.SigHash)
-	}
-	if r1.Category != CategoryAccepted {
-		t.Errorf("scored category: %q", r1.Category)
-	}
-	if r1.ScorerVersion != ScorerVersion {
-		t.Errorf("scorer version stamped wrong: %q", r1.ScorerVersion)
-	}
-}
-
-func TestScoreRecord_PureFunction_NoMutationOfInput(t *testing.T) {
-	// Belt-and-braces: the contract says "NO mutable state, identical
-	// input → identical output forever." Verify by scoring the same
-	// record twice and ensuring the input hasn't been touched.
-	rec := mkRecord("data/_kb/scrum_reviews.jsonl")
-	rec.SuccessMarkers = []string{"accepted_on_attempt_2"}
-	original := rec
-	out1 := ScoreRecord(rec)
-	out2 := ScoreRecord(rec)
-	if rec.RunID != original.RunID || len(rec.SuccessMarkers) != 1 {
-		t.Error("ScoreRecord mutated its input")
-	}
-	if out1.Category != out2.Category {
-		t.Error("ScoreRecord is non-deterministic")
-	}
-}
-
-// ─── Helpers ─────────────────────────────────────────────────────
-
-func goodSftSample() SftSample {
-	return SftSample{
-		SchemaVersion: SftSampleSchemaVersion,
-		ID:            "sft-1",
-		Instruction:   "summarize the diff",
-		Context:       "diff body...",
-		Response:      "the diff adds a function",
-		SourceRunID:   "run-1",
-		QualityScore:  SftQualityAccepted,
-		CreatedAt:     "2026-04-29T12:00:00Z",
-		Provenance: Provenance{
-			SourceFile: "data/scored-runs/2026/04/29/x.jsonl",
-			SigHash:    "deadbeef",
-			RecordedAt: "2026-04-29T12:00:01Z",
-		},
-	}
-}
-
-func reasonsContain(reasons []string, sub string) bool {
-	for _, r := range reasons {
-		if strings.Contains(r, sub) {
-			return true
-		}
-	}
-	return false
-}
--- a/internal/distillation/types.go
+++ b/internal/distillation/types.go
@ -1,484 +0,0 @@
-// Package distillation is the Go port of the Rust v1.0.0 distillation
-// substrate (frozen at e7636f2). Per ADR-001 #4: port LOGIC, not
-// bit-identical reproducibility.
-//
-// What this package owns (this commit):
-//   - The deterministic scorer: EvidenceRecord → ScoredRun
-//   - Score categories + scorer version constant
-//   - SftSample type + validator with the contamination firewall
-//     (the safety-critical piece — rejected/needs_human_review must
-//     NEVER ship to SFT)
-//
-// What's deferred to follow-up commits:
-//   - Materialization layer (file iteration, jsonl read/write,
-//     date-partitioned storage) — operational tooling on top of
-//     the scorer logic
-//   - export_preference, export_rag (other export shapes)
-//   - acceptance harness (the gate that locks v1.0.0)
-//   - replay, receipts, evidence-index builders
-//
-// The scorer + SftSample validator are the LOAD-BEARING pieces
-// per project_distillation_substrate.md memory. The rest is plumbing
-// that can land incrementally without changing the logic the
-// downstream learning loop depends on.
-
-package distillation
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"strings"
-	"time"
-)
-
-// ScoreCategory is one of the 4 deterministic verdicts. Matches Rust
-// SCORE_CATEGORIES exactly.
-type ScoreCategory string
-
-const (
-	CategoryAccepted          ScoreCategory = "accepted"
-	CategoryPartiallyAccepted ScoreCategory = "partially_accepted"
-	CategoryRejected          ScoreCategory = "rejected"
-	CategoryNeedsHumanReview  ScoreCategory = "needs_human_review"
-)
-
-// AllScoreCategories lists every legal category — used by validators.
-var AllScoreCategories = []ScoreCategory{
-	CategoryAccepted,
-	CategoryPartiallyAccepted,
-	CategoryRejected,
-	CategoryNeedsHumanReview,
-}
-
-// ScorerVersion is hardcoded — the deterministic-output contract
-// requires this. Bump the literal in the same commit as any scoring-
-// rule change so the version stamp moves atomically with logic.
-// Mirrors the Rust SCORER_VERSION (also v1.0.0 at e7636f2).
-const ScorerVersion = "v1.0.0"
-
-// SftQualityScore enumerates the categories LEGAL in SFT exports.
-// SFT_NEVER (defined below) is the inverse — categories that NEVER
-// ship to SFT under any flag combination. The contamination firewall
-// is enforced at the schema layer (ValidateSftSample) AND by the
-// exporter; defense in depth.
-type SftQualityScore string
-
-const (
-	SftQualityAccepted          SftQualityScore = "accepted"
-	SftQualityPartiallyAccepted SftQualityScore = "partially_accepted"
-)
-
-// SftQualityScores lists quality scores legal in SFT samples.
-// Default is SftQualityAccepted only; --include-partial CLI flag
-// expands to both. rejected and needs_human_review are NEVER legal.
-var SftQualityScores = []SftQualityScore{
-	SftQualityAccepted,
-	SftQualityPartiallyAccepted,
-}
-
-// SftNever is the contamination firewall: ScoreCategories that NEVER
-// ship to SFT under ANY caller flag. Enforced at the schema layer
-// (ValidateSftSample) AND at the exporter layer. Per the Rust
-// e7636f2 spec: "Hard non-negotiable: this set never expands. If you
-// find yourself adding 'needs_human_review' or 'rejected' here, stop
-// — that's the contamination the spec forbids."
-//
-// Exported so callers AND the validator share the same source of
-// truth. Modifying this constant changes the contract; reviewers
-// should treat any commit that touches it as a security review.
-var SftNever = []ScoreCategory{
-	CategoryRejected,
-	CategoryNeedsHumanReview,
-}
-
-// SftSampleSchemaVersion bumps when the on-wire SftSample shape
-// changes incompatibly. Match the Rust SFT_SAMPLE_SCHEMA_VERSION.
-const SftSampleSchemaVersion = 1
-
-// ScoredRunSchemaVersion bumps when the on-wire ScoredRun shape
-// changes incompatibly. Match the Rust SCORED_RUN_SCHEMA_VERSION.
-const ScoredRunSchemaVersion = 1
-
-// EvidenceSchemaVersion mirrors the Rust EVIDENCE_SCHEMA_VERSION.
-// This package consumes EvidenceRecord; producing it is a separate
-// concern (the materialization layer not yet ported).
-const EvidenceSchemaVersion = 1
-
-// ModelRole categorizes the kind of model output represented by an
-// EvidenceRecord. Used by the SFT exporter to filter "real model
-// output" from pure-extraction rows.
-type ModelRole string
-
-const (
-	RoleExecutor    ModelRole = "executor"
-	RoleReviewer    ModelRole = "reviewer"
-	RoleExtractor   ModelRole = "extractor"
-	RoleVerifier    ModelRole = "verifier"
-	RoleCategorizer ModelRole = "categorizer"
-	RoleTiebreaker  ModelRole = "tiebreaker"
-	RoleApplier     ModelRole = "applier"
-	RoleEmbedder    ModelRole = "embedder"
-	RoleOther       ModelRole = "other"
-)
-
-// Provenance is the source-linkage every distillation record carries.
-// SourceFile is required (no record without source linkage); other
-// fields are best-effort for de-duplication and trace-back.
-type Provenance struct {
-	SourceFile  string `json:"source_file"`
-	LineOffset  int64  `json:"line_offset,omitempty"`
-	SigHash     string `json:"sig_hash"`
-	RecordedAt  string `json:"recorded_at"` // ISO 8601
-}
-
-// ObserverVerdict is what an observer returned for an executor's
-// output. Matches the Rust enum but as a string type for JSON
-// flexibility.
-type ObserverVerdict string
-
-const (
-	VerdictAccept ObserverVerdict = "accept"
-	VerdictReject ObserverVerdict = "reject"
-	VerdictCycle  ObserverVerdict = "cycle"
-)
-
-// EvidenceRecord is one row in the canonical evidence stream.
-// Producing it (transforms from raw KB streams) is a separate
-// concern; this package consumes it.
-//
-// Fields mirror the Rust EvidenceRecord at e7636f2. Optional fields
-// use Go pointers / slices so missing-vs-empty stays distinguishable
-// for the scorer's heuristics.
-type EvidenceRecord struct {
-	RunID         string `json:"run_id"`
-	TaskID        string `json:"task_id"`
-	Timestamp     string `json:"timestamp"`
-	SchemaVersion int    `json:"schema_version"`
-
-	Provenance Provenance `json:"provenance"`
-
-	ModelName     string    `json:"model_name,omitempty"`
-	ModelProvider string    `json:"model_provider,omitempty"`
-	ModelRole     ModelRole `json:"model_role,omitempty"`
-
-	InputHash  string `json:"input_hash,omitempty"`
-	OutputHash string `json:"output_hash,omitempty"`
-
-	SourceFiles []string `json:"source_files,omitempty"`
-	CommandsRun []string `json:"commands_run,omitempty"`
-
-	RetrievedContext *RetrievedContext `json:"retrieved_context,omitempty"`
-
-	ObserverNotes      []string         `json:"observer_notes,omitempty"`
-	ObserverVerdict    ObserverVerdict  `json:"observer_verdict,omitempty"`
-	ObserverConfidence float64          `json:"observer_confidence,omitempty"`
-	ScratchpadSummary  string           `json:"scratchpad_summary,omitempty"`
-
-	SuccessMarkers []string `json:"success_markers,omitempty"`
-	FailureMarkers []string `json:"failure_markers,omitempty"`
-
-	ValidationResults map[string]any `json:"validation_results,omitempty"`
-
-	HumanOverride *HumanOverride `json:"human_override,omitempty"`
-
-	CostUSD   float64 `json:"cost_usd,omitempty"`
-	LatencyMs int64   `json:"latency_ms,omitempty"`
-	Text      string  `json:"text,omitempty"`
-}
-
-// RetrievedContext captures what the model saw via retrieval. Matches
-// the Rust shape exactly so the JSON round-trips byte-identical (per
-// ADR-001 #4 "logic, not bit-identical" — but on-wire compatibility
-// is desirable for tooling that consumes EvidenceRecord JSONL).
-type RetrievedContext struct {
-	MatrixCorpora             []string `json:"matrix_corpora,omitempty"`
-	MatrixHits                int      `json:"matrix_hits,omitempty"`
-	MatrixChunksKept          int      `json:"matrix_chunks_kept,omitempty"`
-	MatrixChunksDropped       int      `json:"matrix_chunks_dropped,omitempty"`
-	PathwayFingerprintsSeen   int      `json:"pathway_fingerprints_seen,omitempty"`
-}
-
-// HumanOverride captures a human-in-the-loop decision overriding the
-// scorer's verdict. Recorded but doesn't change the scorer's output;
-// downstream consumers (UI, distillation acceptance) decide how to
-// treat it.
-type HumanOverride struct {
-	Overrider     string `json:"overrider"`
-	Decision      string `json:"decision"` // accept|reject|needs_review
-	Reason        string `json:"reason"`
-	OverriddenAt  string `json:"overridden_at"`
-}
-
-// SubScores carries the deterministic scorer's intermediate signals
-// alongside the final ScoreCategory. Persisted on every ScoredRun
-// so a downstream UI can show "why" without re-running the scorer.
-type SubScores struct {
-	CargoGreen              *bool  `json:"cargo_green,omitempty"`
-	AnchorGrounding         *float64 `json:"anchor_grounding,omitempty"`
-	SchemaValid             *bool  `json:"schema_valid,omitempty"`
-	PathwayReplaySucceeded  *bool  `json:"pathway_replay_succeeded,omitempty"`
-	ObserverVerdict         ObserverVerdict `json:"observer_verdict,omitempty"`
-	AcceptedOnAttempt       *int   `json:"accepted_on_attempt,omitempty"`
-	// Extra fields the Rust schema accepted as `[key: string]: unknown`.
-	// Captured here as a free-form map so future signals don't require
-	// type-system changes.
-	Extras map[string]any `json:"-"`
-}
-
-// ScoredRun is the deterministic scorer's output. One per
-// EvidenceRecord. Provenance ties back to the materialized evidence
-// row (not the raw source stream).
-type ScoredRun struct {
-	SchemaVersion    int           `json:"schema_version"`
-	EvidenceRunID    string        `json:"evidence_run_id"`
-	EvidenceTaskID   string        `json:"evidence_task_id"`
-	Category         ScoreCategory `json:"category"`
-	Reasons          []string      `json:"reasons"` // non-empty
-	ScoredAt         string        `json:"scored_at"`
-	ScorerVersion    string        `json:"scorer_version"`
-	SubScores        *SubScores    `json:"sub_scores,omitempty"`
-	Provenance       Provenance    `json:"provenance"`
-}
-
-// SftSample is one entry in exports/sft/instruction_response.jsonl.
-// The contamination firewall lives in ValidateSftSample.
-type SftSample struct {
-	SchemaVersion int             `json:"schema_version"`
-	ID            string          `json:"id"`
-	Instruction   string          `json:"instruction"`
-	Context       string          `json:"context"` // empty allowed; null/missing not
-	Response      string          `json:"response"`
-	SourceRunID   string          `json:"source_run_id"`
-	QualityScore  SftQualityScore `json:"quality_score"`
-	CreatedAt     string          `json:"created_at"`
-	Provenance    Provenance      `json:"provenance"`
-}
-
-// ─── Validators ──────────────────────────────────────────────────
-
-// ValidationError is a single field-level violation.
-type ValidationError struct {
-	Field   string
-	Message string
-}
-
-func (e ValidationError) Error() string {
-	return fmt.Sprintf("%s: %s", e.Field, e.Message)
-}
-
-// ValidationErrors is the joinable error returned by the validators
-// when one or more fields violate the schema.
-type ValidationErrors []ValidationError
-
-func (es ValidationErrors) Error() string {
-	if len(es) == 0 {
-		return "no errors"
-	}
-	parts := make([]string, len(es))
-	for i, e := range es {
-		parts[i] = e.Error()
-	}
-	return strings.Join(parts, "; ")
-}
-
-// HasErrors returns true when one or more errors are present.
-func (es ValidationErrors) HasErrors() bool { return len(es) > 0 }
-
-// ValidateScoredRun mirrors the Rust validateScoredRun. Returns nil
-// on success or a ValidationErrors with the field-level violations.
-func ValidateScoredRun(r ScoredRun) error {
-	var errs ValidationErrors
-	if r.SchemaVersion != ScoredRunSchemaVersion {
-		errs = append(errs, ValidationError{
-			"schema_version",
-			fmt.Sprintf("expected %d, got %d", ScoredRunSchemaVersion, r.SchemaVersion),
-		})
-	}
-	if r.EvidenceRunID == "" {
-		errs = append(errs, ValidationError{"evidence_run_id", "must be non-empty"})
-	}
-	if r.EvidenceTaskID == "" {
-		errs = append(errs, ValidationError{"evidence_task_id", "must be non-empty"})
-	}
-	if !validISOTimestamp(r.ScoredAt) {
-		errs = append(errs, ValidationError{"scored_at", "must be ISO 8601 timestamp"})
-	}
-	if r.ScorerVersion == "" {
-		errs = append(errs, ValidationError{"scorer_version", "must be non-empty"})
-	}
-	if len(r.Reasons) == 0 {
-		errs = append(errs, ValidationError{"reasons", "must be non-empty (every score needs a reason)"})
-	}
-	if !isValidCategory(r.Category) {
-		errs = append(errs, ValidationError{"category", fmt.Sprintf("must be one of %v, got %q", AllScoreCategories, r.Category)})
-	}
-	if err := validateProvenance(r.Provenance, "provenance"); err != nil {
-		errs = append(errs, err...)
-	}
-	if r.SubScores != nil && r.SubScores.AnchorGrounding != nil {
-		ag := *r.SubScores.AnchorGrounding
-		if ag < 0 || ag > 1 {
-			errs = append(errs, ValidationError{"sub_scores.anchor_grounding", "must be in [0, 1]"})
-		}
-	}
-	if errs.HasErrors() {
-		return errs
-	}
-	return nil
-}
-
-// ValidateSftSample is the contamination firewall. Returns ErrSftContamination
-// (wrapped) when quality_score is in SftNever — which is the safety-critical
-// guarantee the spec calls non-negotiable.
-//
-// Other field violations come back as ValidationErrors.
-func ValidateSftSample(s SftSample) error {
-	var errs ValidationErrors
-	if s.SchemaVersion != SftSampleSchemaVersion {
-		errs = append(errs, ValidationError{
-			"schema_version",
-			fmt.Sprintf("expected %d, got %d", SftSampleSchemaVersion, s.SchemaVersion),
-		})
-	}
-	if s.ID == "" {
-		errs = append(errs, ValidationError{"id", "must be non-empty"})
-	}
-	if strings.TrimSpace(s.Instruction) == "" {
-		errs = append(errs, ValidationError{"instruction", "must be non-whitespace (no empty pairs)"})
-	}
-	if strings.TrimSpace(s.Response) == "" {
-		errs = append(errs, ValidationError{"response", "must be non-whitespace (no empty pairs)"})
-	}
-	// Context is required-string but empty is allowed.
-	// (Field is always typed as string in Go, so the only way to
-	// distinguish "set" from "missing" was via the JSON layer; here
-	// empty is fine.)
-	if s.SourceRunID == "" {
-		errs = append(errs, ValidationError{"source_run_id", "must be non-empty"})
-	}
-	if !validISOTimestamp(s.CreatedAt) {
-		errs = append(errs, ValidationError{"created_at", "must be ISO 8601 timestamp"})
-	}
-	if err := validateProvenance(s.Provenance, "provenance"); err != nil {
-		errs = append(errs, err...)
-	}
-
-	// Contamination firewall. Hard non-negotiable per the spec.
-	if !isLegalSftQualityScore(s.QualityScore) {
-		// If it's in SftNever, surface the firewall sentinel — callers
-		// can errors.Is(err, ErrSftContamination) to reliably detect
-		// "the spec said never" as opposed to "you typo'd a category."
-		if isContaminationCategory(s.QualityScore) {
-			return fmt.Errorf("%w: quality_score %q in SftNever (rejected/needs_human_review never legal in SFT)",
-				ErrSftContamination, s.QualityScore)
-		}
-		errs = append(errs, ValidationError{
-			"quality_score",
-			fmt.Sprintf("must be one of %v, got %q", SftQualityScores, s.QualityScore),
-		})
-	}
-
-	if errs.HasErrors() {
-		return errs
-	}
-	return nil
-}
-
-// ErrSftContamination is the firewall sentinel — when ValidateSftSample
-// rejects a sample because its quality_score is in SftNever, callers
-// can errors.Is(err, ErrSftContamination) to reliably distinguish
-// "spec violation" from "typo'd category."
-var ErrSftContamination = errors.New("distillation: SFT contamination — quality_score in SftNever")
-
-// ─── Internal helpers ────────────────────────────────────────────
-
-func isValidCategory(c ScoreCategory) bool {
-	for _, v := range AllScoreCategories {
-		if c == v {
-			return true
-		}
-	}
-	return false
-}
-
-func isLegalSftQualityScore(q SftQualityScore) bool {
-	for _, v := range SftQualityScores {
-		if q == v {
-			return true
-		}
-	}
-	return false
-}
-
-func isContaminationCategory(q SftQualityScore) bool {
-	// Compare as ScoreCategory — the on-wire string is the same; this
-	// just guards the firewall against typos that happen to match
-	// SftNever string-wise.
-	for _, v := range SftNever {
-		if string(v) == string(q) {
-			return true
-		}
-	}
-	return false
-}
-
-func validISOTimestamp(s string) bool {
-	if s == "" {
-		return false
-	}
-	// time.Parse with RFC3339 covers most ISO 8601. We accept both
-	// the basic and nano variants since the Rust producers vary.
-	if _, err := time.Parse(time.RFC3339, s); err == nil {
-		return true
-	}
-	if _, err := time.Parse(time.RFC3339Nano, s); err == nil {
-		return true
-	}
-	return false
-}
-
-func validateProvenance(p Provenance, field string) ValidationErrors {
-	var errs ValidationErrors
-	if p.SourceFile == "" {
-		errs = append(errs, ValidationError{field + ".source_file", "must be non-empty"})
-	}
-	if p.SigHash == "" {
-		errs = append(errs, ValidationError{field + ".sig_hash", "must be non-empty"})
-	}
-	if !validISOTimestamp(p.RecordedAt) {
-		errs = append(errs, ValidationError{field + ".recorded_at", "must be ISO 8601 timestamp"})
-	}
-	return errs
-}
-
-// MarshalSubScores is a shim — Go's encoding/json doesn't merge a
-// "rest" map into the struct's JSON output by default. Callers that
-// need Extras serialized into the same object can use this helper.
-func MarshalSubScores(s *SubScores) ([]byte, error) {
-	if s == nil {
-		return []byte("null"), nil
-	}
-	// First marshal the typed fields normally.
-	type alias SubScores
-	base, err := json.Marshal((*alias)(s))
-	if err != nil {
-		return nil, err
-	}
-	if len(s.Extras) == 0 {
-		return base, nil
-	}
-	// Decode back to a map, merge Extras, re-encode. Less efficient
-	// but keeps the field semantics correct (typed fields override
-	// extras on collision — first-write-wins for known keys).
-	var combined map[string]any
-	if err := json.Unmarshal(base, &combined); err != nil {
-		return nil, err
-	}
-	for k, v := range s.Extras {
-		if _, exists := combined[k]; !exists {
-			combined[k] = v
-		}
-	}
-	return json.Marshal(combined)
-}
--- a/internal/drift/drift.go
+++ b/internal/drift/drift.go
@ -1,151 +0,0 @@
-// Package drift quantifies when historical decisions stop matching
-// current reality. Per the PRD's 5-loop substrate, this is loop 5
-// (drift) — distinct from the rating+distillation loop because
-// drift is about MEASUREMENT, not learning. The learning loop says
-// "this match worked, remember it"; the drift loop says "the
-// playbook entry from 4 months ago — does it still match what the
-// substrate would surface today?"
-//
-// First-shipped drift shape: SCORER drift. When the deterministic
-// scorer's logic changes (ScorerVersion bumped), historical
-// ScoredRuns may no longer match what the current scorer would
-// produce on the same EvidenceRecord. ComputeScorerDrift re-runs
-// the current scorer over a slice of (EvidenceRecord, persisted
-// category) pairs and reports mismatches.
-//
-// Why this matters: the rating+distillation loop only learns
-// forward. Without a drift quantifier, a scorer-rule change
-// silently invalidates the historical training data feeding the
-// loop. With drift quantification, a rule change surfaces a
-// concrete number ("847 of 4701 historical scoredruns now
-// disagree") that triggers a re-score-and-retrain cycle rather
-// than letting the substrate quietly rot.
-//
-// Future drift shapes (not in this commit):
-//   - PLAYBOOK drift: for each playbook entry, re-run its query
-//     through current matrix-search; if the recorded answer is no
-//     longer in top-K, the world has moved.
-//   - EMBEDDING drift: KS-test on the distribution of embedding
-//     vectors at T1 vs T2; large shifts = the corpus has changed
-//     materially.
-//   - AUDIT BASELINE drift: track how PR audit verdicts shift over
-//     scorer/auditor versions; matches the Rust audit_baselines.jsonl
-//     longitudinal signal.
-
-package drift
-
-import (
-	"sort"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
-)
-
-// ScorerDriftEntry is one mismatch — a historical (record, category)
-// pair where the current scorer disagrees with the persisted
-// verdict. Reasons captures the current scorer's explanation so
-// operators can see WHY the verdict changed.
-type ScorerDriftEntry struct {
-	EvidenceRunID    string                       `json:"evidence_run_id"`
-	EvidenceTaskID   string                       `json:"evidence_task_id"`
-	PersistedCategory distillation.ScoreCategory  `json:"persisted_category"`
-	CurrentCategory   distillation.ScoreCategory  `json:"current_category"`
-	CurrentReasons    []string                    `json:"current_reasons"`
-	SourceFile        string                      `json:"source_file"`
-}
-
-// CategoryShift is one cell in the drift matrix — "X persisted
-// records that NOW classify as Y." e.g. "12 records that were
-// 'rejected' yesterday are 'partially_accepted' today."
-type CategoryShift struct {
-	From  distillation.ScoreCategory `json:"from"`
-	To    distillation.ScoreCategory `json:"to"`
-	Count int                        `json:"count"`
-}
-
-// ScorerDriftReport is the summary returned by ComputeScorerDrift.
-// The shape is intentionally machine-readable so a downstream
-// dashboard / alerting layer can threshold on Drifted / TotalChecked
-// without parsing the entries list.
-type ScorerDriftReport struct {
-	ScorerVersion string             `json:"scorer_version"` // current scorer's version
-	TotalChecked  int                `json:"total_checked"`
-	Matched       int                `json:"matched"`        // current == persisted
-	Drifted       int                `json:"drifted"`        // current != persisted
-	DriftRate     float64            `json:"drift_rate"`     // Drifted / TotalChecked
-	ShiftMatrix   []CategoryShift    `json:"shift_matrix,omitempty"`
-	Entries       []ScorerDriftEntry `json:"entries,omitempty"` // mismatches only
-}
-
-// ScorerDriftInput is one (record, persisted_category) pair to check.
-// Caller is responsible for materializing these from disk; this
-// package is pure compute.
-type ScorerDriftInput struct {
-	Record            distillation.EvidenceRecord
-	PersistedCategory distillation.ScoreCategory
-}
-
-// ComputeScorerDrift re-runs distillation.ScoreRecord over each
-// input and reports mismatches. Pure function — no I/O. The caller
-// supplies the inputs (typically by reading a directory of
-// scored-runs JSONL alongside the corresponding evidence JSONL).
-//
-// IncludeEntries controls whether the per-mismatch detail list is
-// populated. For large corpora (e.g. 4,701 fill events) the
-// summary numbers may be all the caller needs; setting this to
-// false avoids allocating the entries slice.
-func ComputeScorerDrift(inputs []ScorerDriftInput, includeEntries bool) ScorerDriftReport {
-	report := ScorerDriftReport{
-		ScorerVersion: distillation.ScorerVersion,
-		TotalChecked:  len(inputs),
-	}
-
-	shiftCounts := make(map[[2]distillation.ScoreCategory]int)
-
-	for _, in := range inputs {
-		out := distillation.ScoreRecord(in.Record)
-		if out.Category == in.PersistedCategory {
-			report.Matched++
-			continue
-		}
-		report.Drifted++
-		shiftCounts[[2]distillation.ScoreCategory{in.PersistedCategory, out.Category}]++
-		if includeEntries {
-			report.Entries = append(report.Entries, ScorerDriftEntry{
-				EvidenceRunID:     in.Record.RunID,
-				EvidenceTaskID:    in.Record.TaskID,
-				PersistedCategory: in.PersistedCategory,
-				CurrentCategory:   out.Category,
-				CurrentReasons:    out.Reasons,
-				SourceFile:        in.Record.Provenance.SourceFile,
-			})
-		}
-	}
-
-	if report.TotalChecked > 0 {
-		report.DriftRate = float64(report.Drifted) / float64(report.TotalChecked)
-	}
-
-	if len(shiftCounts) > 0 {
-		report.ShiftMatrix = make([]CategoryShift, 0, len(shiftCounts))
-		for k, v := range shiftCounts {
-			report.ShiftMatrix = append(report.ShiftMatrix, CategoryShift{
-				From: k[0], To: k[1], Count: v,
-			})
-		}
-		// Sort: largest shifts first, then alphabetical-ish for ties.
-		// Stable ordering matters for downstream display and JSON
-		// determinism in tests.
-		sort.Slice(report.ShiftMatrix, func(i, j int) bool {
-			a, b := report.ShiftMatrix[i], report.ShiftMatrix[j]
-			if a.Count != b.Count {
-				return a.Count > b.Count
-			}
-			if a.From != b.From {
-				return string(a.From) < string(b.From)
-			}
-			return string(a.To) < string(b.To)
-		})
-	}
-
-	return report
-}
--- a/internal/drift/drift_test.go
+++ b/internal/drift/drift_test.go
@ -1,155 +0,0 @@
-package drift
-
-import (
-	"testing"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
-)
-
-func mkInput(sourceFile string, persisted distillation.ScoreCategory, succ []string) ScorerDriftInput {
-	return ScorerDriftInput{
-		Record: distillation.EvidenceRecord{
-			RunID:         "run-x",
-			TaskID:        "task-x",
-			Timestamp:     "2026-01-01T00:00:00Z",
-			SchemaVersion: distillation.EvidenceSchemaVersion,
-			Provenance: distillation.Provenance{
-				SourceFile: sourceFile,
-				SigHash:    "abc",
-				RecordedAt: "2026-01-01T00:00:01Z",
-			},
-			SuccessMarkers: succ,
-		},
-		PersistedCategory: persisted,
-	}
-}
-
-func TestComputeScorerDrift_NoDrift(t *testing.T) {
-	// All inputs have persisted=accepted matching what the current
-	// scrum_review scorer produces on accepted_on_attempt_1.
-	inputs := []ScorerDriftInput{
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
-	}
-	r := ComputeScorerDrift(inputs, true)
-	if r.TotalChecked != 3 || r.Matched != 3 || r.Drifted != 0 {
-		t.Errorf("no-drift case: total=%d matched=%d drifted=%d",
-			r.TotalChecked, r.Matched, r.Drifted)
-	}
-	if r.DriftRate != 0 {
-		t.Errorf("drift_rate: want 0, got %v", r.DriftRate)
-	}
-	if len(r.Entries) != 0 {
-		t.Errorf("entries: want 0, got %d", len(r.Entries))
-	}
-}
-
-func TestComputeScorerDrift_ShiftDetected(t *testing.T) {
-	// Simulate a historical labeling where the persisted scorer
-	// thought attempt-2 acceptances were "accepted" but the current
-	// scorer (this code) categorizes them as "partially_accepted".
-	// Drift should fire on those.
-	inputs := []ScorerDriftInput{
-		// Match: attempt 1 → accepted (still)
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
-		// Drift: persisted thought attempt-2 was accepted, today's scorer says partial
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_3"}),
-		// Drift: persisted thought attempt-5 was accepted, today's scorer says partial (high-cost)
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_5"}),
-	}
-	r := ComputeScorerDrift(inputs, true)
-	if r.TotalChecked != 5 {
-		t.Errorf("total: want 5, got %d", r.TotalChecked)
-	}
-	if r.Matched != 2 {
-		t.Errorf("matched: want 2, got %d", r.Matched)
-	}
-	if r.Drifted != 3 {
-		t.Errorf("drifted: want 3, got %d", r.Drifted)
-	}
-	wantRate := 3.0 / 5.0
-	if r.DriftRate < wantRate-1e-9 || r.DriftRate > wantRate+1e-9 {
-		t.Errorf("drift_rate: want %v, got %v", wantRate, r.DriftRate)
-	}
-	if len(r.Entries) != 3 {
-		t.Errorf("entries: want 3 mismatches, got %d", len(r.Entries))
-	}
-	// Shift matrix should show one shift: accepted → partially_accepted, count=3
-	if len(r.ShiftMatrix) != 1 {
-		t.Errorf("shift matrix: want 1 shift, got %d (%+v)", len(r.ShiftMatrix), r.ShiftMatrix)
-	} else {
-		s := r.ShiftMatrix[0]
-		if s.From != distillation.CategoryAccepted ||
-			s.To != distillation.CategoryPartiallyAccepted ||
-			s.Count != 3 {
-			t.Errorf("shift: got %+v", s)
-		}
-	}
-}
-
-func TestComputeScorerDrift_MultipleShiftsSortedByCount(t *testing.T) {
-	inputs := []ScorerDriftInput{
-		// 3× accepted→partial
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
-		// 1× rejected→needs_human (no marker)
-		{
-			Record: distillation.EvidenceRecord{
-				RunID: "r1", TaskID: "t1",
-				Timestamp:     "2026-01-01T00:00:00Z",
-				SchemaVersion: distillation.EvidenceSchemaVersion,
-				Provenance: distillation.Provenance{
-					SourceFile: "data/_kb/scrum_reviews.jsonl",
-					SigHash:    "x", RecordedAt: "2026-01-01T00:00:01Z",
-				},
-				// no markers → needs_human_review
-			},
-			PersistedCategory: distillation.CategoryRejected,
-		},
-	}
-	r := ComputeScorerDrift(inputs, false)
-	if r.Drifted != 4 {
-		t.Errorf("drifted: want 4, got %d", r.Drifted)
-	}
-	if len(r.ShiftMatrix) != 2 {
-		t.Errorf("shift matrix: want 2 distinct shifts, got %d", len(r.ShiftMatrix))
-	}
-	// Sorted by count desc, so accepted→partial (3) before rejected→needs_human (1)
-	if r.ShiftMatrix[0].Count != 3 || r.ShiftMatrix[1].Count != 1 {
-		t.Errorf("shift order wrong: got %+v", r.ShiftMatrix)
-	}
-}
-
-func TestComputeScorerDrift_IncludeEntriesFalse(t *testing.T) {
-	inputs := []ScorerDriftInput{
-		mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
-	}
-	r := ComputeScorerDrift(inputs, false)
-	if r.Drifted != 1 {
-		t.Errorf("drifted: want 1, got %d", r.Drifted)
-	}
-	if len(r.Entries) != 0 {
-		t.Errorf("entries: want 0 when includeEntries=false, got %d", len(r.Entries))
-	}
-}
-
-func TestComputeScorerDrift_EmptyInput(t *testing.T) {
-	r := ComputeScorerDrift(nil, true)
-	if r.TotalChecked != 0 || r.Drifted != 0 || r.Matched != 0 {
-		t.Errorf("empty: want all-zero, got %+v", r)
-	}
-	if r.DriftRate != 0 {
-		t.Errorf("drift_rate on empty: want 0, got %v", r.DriftRate)
-	}
-}
-
-func TestComputeScorerDrift_ScorerVersionStamped(t *testing.T) {
-	r := ComputeScorerDrift(nil, false)
-	if r.ScorerVersion != distillation.ScorerVersion {
-		t.Errorf("scorer_version: want %q, got %q", distillation.ScorerVersion, r.ScorerVersion)
-	}
-}
--- a/internal/matrix/downgrade.go
+++ b/internal/matrix/downgrade.go
@ -1,137 +0,0 @@
-package matrix
-
-// Strong-model auto-downgrade gate. Port of mode.rs::execute's
-// downgrade block (Rust system, 2026-04-26 pass5).
-//
-// What it does: if the caller resolves `codereview_lakehouse` against
-// a strong model and didn't force the mode, flip to
-// `codereview_isolation` so we don't pollute the prompt with matrix
-// chunks the model demonstrably does better without.
-//
-// Why: pass5 variance test on x-ai/grok-4.1-fast — composing matrix
-// corpora into codereview_lakehouse LOST 5/5 head-to-head reps
-// against matrix-free codereview_isolation, p=0.031. Strong models
-// have enough native capacity that bug fingerprints + adversarial
-// framing + file content carry them; matrix chunks displace
-// depth-of-analysis.
-//
-// Defaults: assume "strong" (downgrade matrix off). The explicit
-// IsWeakModel predicate keeps the weak-list small — anything
-// matching `:free` (OpenRouter free tier) or the local last-resort
-// rungs (qwen3.5/qwen3) stays on the full lakehouse path where
-// matrix demonstrably helped during the 2026-04-26 free-tier
-// bake-off.
-
-import (
-	"os"
-	"strings"
-)
-
-// Mode constants — exported so callers don't string-literal them.
-const (
-	ModeCodeReviewLakehouse = "codereview_lakehouse"
-	ModeCodeReviewIsolation = "codereview_isolation"
-)
-
-// EnvForceFullEnrichment is the env var that bypasses the gate for
-// diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true").
-const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
-
-// IsWeakModel returns true for models matrix-corpus composition
-// demonstrably helped during the 2026-04-26 pass5 bake-off. Strong
-// models (default) get matrix dropped to avoid the "composed lost
-// 5/5 vs isolation" effect.
-//
-// Weak signals:
-//   - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`)
-//   - `:free/` infix (handles routing-prefixed names like `or:free/x`)
-//   - `qwen3.5:latest` / `qwen3:latest` — local last-resort rung
-//
-// Add new weak models by extending this function alongside variance
-// data that justifies it.
-func IsWeakModel(model string) bool {
-	if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") {
-		return true
-	}
-	switch model {
-	case "qwen3.5:latest", "qwen3:latest":
-		return true
-	}
-	return false
-}
-
-// DowngradeInput is what MaybeDowngrade evaluates.
-//
-// ForcedMode: caller explicitly set their mode (mirrors Rust's
-// req.force_mode.is_some()) — treated as opt-in to the chosen mode,
-// skips the downgrade. Experiments need exact-mode control.
-//
-// ForceFullOverride: the LH_FORCE_FULL_ENRICHMENT escape hatch —
-// usually populated from the env var via NewDowngradeInputFromEnv,
-// but the field is explicit so callers can pass it from a config or
-// test deterministically.
-type DowngradeInput struct {
-	Mode              string
-	Model             string
-	ForcedMode        bool
-	ForceFullOverride bool
-}
-
-// DowngradeDecision is the output. DowngradedFrom is non-empty
-// only when a downgrade fired — callers should record it for audit
-// (matches the Rust EnrichmentSources.downgraded_from field).
-//
-// Reason is a short human-readable string for logs/responses;
-// useful for debugging "why did/didn't the gate fire."
-type DowngradeDecision struct {
-	Mode           string `json:"mode"`
-	DowngradedFrom string `json:"downgraded_from,omitempty"`
-	Reason         string `json:"reason"`
-}
-
-// MaybeDowngrade applies the strong-model auto-downgrade gate.
-// Pure function; no env reads. For env-driven callers see
-// NewDowngradeInputFromEnv.
-func MaybeDowngrade(in DowngradeInput) DowngradeDecision {
-	out := DowngradeDecision{Mode: in.Mode}
-	if in.Mode != ModeCodeReviewLakehouse {
-		out.Reason = "mode is not " + ModeCodeReviewLakehouse + "; gate not applicable"
-		return out
-	}
-	if in.ForcedMode {
-		out.Reason = "caller forced mode; skip downgrade"
-		return out
-	}
-	if in.ForceFullOverride {
-		out.Reason = EnvForceFullEnrichment + " bypass"
-		return out
-	}
-	if IsWeakModel(in.Model) {
-		out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)"
-		return out
-	}
-	// Downgrade fires.
-	out.Mode = ModeCodeReviewIsolation
-	out.DowngradedFrom = ModeCodeReviewLakehouse
-	out.Reason = "strong model; matrix composes anti-additively (pass5: composed lost 5/5 vs isolation on grok-4.1-fast, p=0.031)"
-	return out
-}
-
-// NewDowngradeInputFromEnv is a convenience that reads
-// LH_FORCE_FULL_ENRICHMENT from the process environment and returns
-// a populated DowngradeInput. Most production callers want this;
-// tests should construct DowngradeInput directly to avoid env
-// pollution.
-func NewDowngradeInputFromEnv(mode, model string, forcedMode bool) DowngradeInput {
-	return DowngradeInput{
-		Mode:              mode,
-		Model:             model,
-		ForcedMode:        forcedMode,
-		ForceFullOverride: envForceFullEnrichment(),
-	}
-}
-
-func envForceFullEnrichment() bool {
-	v := strings.ToLower(strings.TrimSpace(os.Getenv(EnvForceFullEnrichment)))
-	return v == "1" || v == "true"
-}
--- a/internal/matrix/downgrade_test.go
+++ b/internal/matrix/downgrade_test.go
@ -1,100 +0,0 @@
-package matrix
-
-import "testing"
-
-func TestIsWeakModel(t *testing.T) {
-	cases := []struct {
-		model string
-		weak  bool
-	}{
-		// :free suffix → weak
-		{"openai/gpt-4o:free", true},
-		{"meta-llama/llama-3-8b:free", true},
-		// :free/ infix (routing-prefixed names)
-		{"openrouter:free/anthropic/claude-3.5-sonnet", true},
-		// Local last-resort rungs
-		{"qwen3.5:latest", true},
-		{"qwen3:latest", true},
-		// Strong by default
-		{"x-ai/grok-4.1-fast", false},
-		{"opencode/claude-opus-4-7", false},
-		{"openai/gpt-5", false},
-		{"qwen3-coder:480b", false}, // not the :latest tag
-		{"", false},
-	}
-	for _, c := range cases {
-		got := IsWeakModel(c.model)
-		if got != c.weak {
-			t.Errorf("IsWeakModel(%q): want %v, got %v", c.model, c.weak, got)
-		}
-	}
-}
-
-func TestMaybeDowngrade_TruthTable(t *testing.T) {
-	cases := []struct {
-		name string
-		in   DowngradeInput
-		want DowngradeDecision
-	}{
-		{
-			name: "downgrade fires: lakehouse mode + strong model + no force",
-			in:   DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast"},
-			want: DowngradeDecision{
-				Mode:           ModeCodeReviewIsolation,
-				DowngradedFrom: ModeCodeReviewLakehouse,
-			},
-		},
-		{
-			name: "no downgrade: forced mode bypasses gate",
-			in:   DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForcedMode: true},
-			want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
-		},
-		{
-			name: "no downgrade: env override bypasses gate",
-			in:   DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForceFullOverride: true},
-			want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
-		},
-		{
-			name: "no downgrade: weak model keeps lakehouse",
-			in:   DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "openai/gpt-4o:free"},
-			want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
-		},
-		{
-			name: "no downgrade: non-lakehouse mode (gate not applicable)",
-			in:   DowngradeInput{Mode: "codereview_isolation", Model: "x-ai/grok-4.1-fast"},
-			want: DowngradeDecision{Mode: "codereview_isolation"},
-		},
-	}
-	for _, c := range cases {
-		got := MaybeDowngrade(c.in)
-		if got.Mode != c.want.Mode {
-			t.Errorf("%s: Mode want %q, got %q", c.name, c.want.Mode, got.Mode)
-		}
-		if got.DowngradedFrom != c.want.DowngradedFrom {
-			t.Errorf("%s: DowngradedFrom want %q, got %q", c.name, c.want.DowngradedFrom, got.DowngradedFrom)
-		}
-		if got.Reason == "" {
-			t.Errorf("%s: Reason should be non-empty", c.name)
-		}
-	}
-}
-
-// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
-// multiple bypass conditions hit, ForcedMode wins (explicit caller
-// intent always overrides). Caught a subtle ordering bug in the
-// original Rust code where this was tested only by happy path.
-func TestMaybeDowngrade_ForcedTrumpsOthers(t *testing.T) {
-	in := DowngradeInput{
-		Mode:              ModeCodeReviewLakehouse,
-		Model:             "qwen3.5:latest", // weak — would otherwise hit weak-bypass
-		ForcedMode:        true,
-		ForceFullOverride: true,
-	}
-	got := MaybeDowngrade(in)
-	if got.Mode != ModeCodeReviewLakehouse {
-		t.Errorf("forced mode should keep mode: got %q", got.Mode)
-	}
-	if got.DowngradedFrom != "" {
-		t.Errorf("no downgrade expected; got DowngradedFrom=%q", got.DowngradedFrom)
-	}
-}
--- a/internal/matrix/filter_test.go
+++ b/internal/matrix/filter_test.go
@ -1,95 +0,0 @@
-package matrix
-
-import (
-	"encoding/json"
-	"testing"
-)
-
-func TestMatchesMetadataFilter_NoFilter_KeepsAll(t *testing.T) {
-	meta := json.RawMessage(`{"role":"Forklift Operator","state":"IL"}`)
-	if !matchesMetadataFilter(meta, nil) {
-		t.Error("nil filter should match everything")
-	}
-	if !matchesMetadataFilter(meta, map[string]any{}) {
-		t.Error("empty filter should match everything")
-	}
-}
-
-func TestMatchesMetadataFilter_NoMetadata_AlwaysFails(t *testing.T) {
-	if matchesMetadataFilter(nil, map[string]any{"x": "y"}) {
-		t.Error("missing metadata should fail any filter")
-	}
-}
-
-func TestMatchesMetadataFilter_SingleValueExactMatch(t *testing.T) {
-	meta := json.RawMessage(`{"state":"IL","status":"active","years":5}`)
-	cases := []struct {
-		filter map[string]any
-		want   bool
-	}{
-		{map[string]any{"state": "IL"}, true},
-		{map[string]any{"state": "TX"}, false},
-		{map[string]any{"status": "active"}, true},
-		{map[string]any{"status": "inactive"}, false},
-		// JSON normalizes both sides, so 5 matches 5.0
-		{map[string]any{"years": 5.0}, true},
-		{map[string]any{"years": 5}, true},
-		// Missing key = fail
-		{map[string]any{"city": "Chicago"}, false},
-	}
-	for _, c := range cases {
-		got := matchesMetadataFilter(meta, c.filter)
-		if got != c.want {
-			t.Errorf("filter %v on %s: want %v, got %v", c.filter, meta, c.want, got)
-		}
-	}
-}
-
-func TestMatchesMetadataFilter_AllKeysAND(t *testing.T) {
-	meta := json.RawMessage(`{"state":"IL","status":"active","role":"Forklift Operator"}`)
-	if !matchesMetadataFilter(meta, map[string]any{
-		"state":  "IL",
-		"status": "active",
-	}) {
-		t.Error("both keys match: should pass")
-	}
-	if matchesMetadataFilter(meta, map[string]any{
-		"state":  "IL",
-		"status": "inactive", // mismatch
-	}) {
-		t.Error("one key mismatches: should fail (AND across keys)")
-	}
-}
-
-func TestMatchesMetadataFilter_ListValueOR(t *testing.T) {
-	meta := json.RawMessage(`{"state":"IL"}`)
-	// state in {"IL","WI","IN"} → match
-	if !matchesMetadataFilter(meta, map[string]any{
-		"state": []any{"IL", "WI", "IN"},
-	}) {
-		t.Error("list with matching element: should pass")
-	}
-	// state in {"TX","CA"} → fail
-	if matchesMetadataFilter(meta, map[string]any{
-		"state": []any{"TX", "CA"},
-	}) {
-		t.Error("list with no matching element: should fail")
-	}
-}
-
-func TestMatchesMetadataFilter_BoolMatch(t *testing.T) {
-	meta := json.RawMessage(`{"available":true,"placed":false}`)
-	if !matchesMetadataFilter(meta, map[string]any{"available": true}) {
-		t.Error("bool true match")
-	}
-	if matchesMetadataFilter(meta, map[string]any{"available": false}) {
-		t.Error("bool true should not match false filter")
-	}
-}
-
-func TestMatchesMetadataFilter_MalformedMetadataFails(t *testing.T) {
-	meta := json.RawMessage(`{not valid json}`)
-	if matchesMetadataFilter(meta, map[string]any{"x": "y"}) {
-		t.Error("malformed metadata should fail")
-	}
-}
--- a/internal/matrix/playbook.go
+++ b/internal/matrix/playbook.go
@ -1,196 +0,0 @@
-package matrix
-
-// Playbook memory — SPEC §3.4 component 5 (learning-loop integration).
-//
-// Concept: every time an external system confirms "(query → answer_id)
-// was a successful match," record it. Future similar queries get that
-// answer's score boosted, so the matrix indexer learns from outcomes
-// rather than relying solely on the base embedder's geometry.
-//
-// Per feedback_meta_index_vision.md: this is the north star — a
-// meta-index that LEARNS from playbooks over time, not a static
-// hybrid search engine.
-//
-// Storage shape: a vectord index named DefaultPlaybookCorpus where:
-//   - The vector is embed(query_text)
-//   - The metadata is a serialized PlaybookEntry
-// Retrieval shape: at /matrix/search time, when use_playbook=true,
-// matrixd searches the playbook corpus with the same query vector,
-// looks up each hit's answer_id, and if that answer is in the current
-// matrix-search results, applies a boost to its distance.
-//
-// Composition: this layer is additive on top of the existing
-// retrieve+merge — when use_playbook=false, behavior is unchanged.
-// The boost only re-ranks results that ALREADY surfaced from the
-// regular retrieval. A v1 enhancement would inject playbook hits
-// directly even when they weren't in the top-K (Shape B from the
-// design conversation), but v0 keeps the safer "boost-only" stance.
-
-import (
-	"encoding/json"
-	"errors"
-	"sort"
-	"time"
-)
-
-// DefaultPlaybookCorpus is the vectord index name where playbook
-// entries land by default. Callers can override per-request, but
-// having one default makes the system observable from the outside
-// (operator hits /vectors/index and sees this corpus in the list).
-const DefaultPlaybookCorpus = "playbook_memory"
-
-// DefaultPlaybookTopK is how many similar past queries to consider
-// when applying boost. 3 keeps the influence focused — we want the
-// boost to reward consistent matches, not let one stale playbook
-// dominate. Caller can override.
-const DefaultPlaybookTopK = 3
-
-// DefaultPlaybookMaxDistance is the cosine ceiling for "this past
-// query is similar enough to count." 0.5 lets in genuinely related
-// queries while excluding pure-coincidence neighbors. Caller can
-// override per-request as we learn what works for staffing data.
-const DefaultPlaybookMaxDistance = 0.5
-
-// PlaybookEntry is what gets stored as metadata on each playbook
-// vector. RecordedAt is captured at write time; callers should not
-// set it (the recorder fills it in).
-type PlaybookEntry struct {
-	QueryText    string   `json:"query_text"`
-	AnswerID     string   `json:"answer_id"`
-	AnswerCorpus string   `json:"answer_corpus"`
-	Score        float64  `json:"score"`         // 0..1; higher = better outcome
-	RecordedAtNs int64    `json:"recorded_at_ns"`
-	Tags         []string `json:"tags,omitempty"`
-}
-
-// Validate returns an error if the entry is missing required fields.
-// Callers should validate before storage so bad data doesn't pollute
-// the corpus.
-func (p PlaybookEntry) Validate() error {
-	if p.QueryText == "" {
-		return errors.New("playbook: query_text required")
-	}
-	if p.AnswerID == "" {
-		return errors.New("playbook: answer_id required")
-	}
-	if p.AnswerCorpus == "" {
-		return errors.New("playbook: answer_corpus required")
-	}
-	if p.Score < 0 || p.Score > 1 {
-		return errors.New("playbook: score must be in [0, 1]")
-	}
-	return nil
-}
-
-// BoostFactor returns the multiplier applied to a result's distance
-// when this playbook entry matches it. Lower is better:
-//
-//	score = 0   → 1.0  (no boost)
-//	score = 0.5 → 0.75 (mild boost)
-//	score = 1.0 → 0.5  (halve the distance — strong boost)
-//
-// Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety.
-//
-// Why halving as the maximum boost: a perfect-confidence playbook
-// entry shouldn't completely override the base embedding (that
-// invites runaway feedback loops where one early playbook
-// dominates forever). Halving is enough to move a mid-rank result
-// to the top in most cases without erasing the base ranking
-// signal.
-func (p PlaybookEntry) BoostFactor() float64 {
-	score := p.Score
-	if score < 0 {
-		score = 0
-	}
-	if score > 1 {
-		score = 1
-	}
-	return 1.0 - 0.5*score
-}
-
-// MarshalMetadata serializes the entry as the JSON RawMessage that
-// vectord stores per item. Convenience for the recorder.
-func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) {
-	return json.Marshal(p)
-}
-
-// UnmarshalPlaybookMetadata is the inverse — used when fetching
-// playbook hits to decode their metadata back into entries.
-func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) {
-	var e PlaybookEntry
-	if len(raw) == 0 {
-		return e, errors.New("playbook: empty metadata")
-	}
-	if err := json.Unmarshal(raw, &e); err != nil {
-		return e, err
-	}
-	return e, nil
-}
-
-// NewPlaybookEntry stamps RecordedAtNs to now and returns the entry.
-// Validation happens at storage; this is just construction.
-func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry {
-	return PlaybookEntry{
-		QueryText:    query,
-		AnswerID:     answerID,
-		AnswerCorpus: answerCorpus,
-		Score:        score,
-		RecordedAtNs: time.Now().UnixNano(),
-		Tags:         tags,
-	}
-}
-
-// PlaybookHit is one similarity-search result from the playbook
-// corpus, paired with its decoded entry. Distance is the cosine
-// distance between the current query and this past playbook's
-// query vector — used by the caller to filter out "too far"
-// matches via PlaybookMaxDistance.
-type PlaybookHit struct {
-	PlaybookID string        `json:"playbook_id"`
-	Distance   float32       `json:"distance"`
-	Entry      PlaybookEntry `json:"entry"`
-}
-
-// ApplyPlaybookBoost re-ranks results in place using matched
-// playbook hits. For each hit whose (AnswerID, AnswerCorpus)
-// matches a result, multiply that result's distance by the hit's
-// BoostFactor. If multiple hits match the same result, the highest-
-// score one wins (greatest reduction in distance).
-//
-// After applying boosts, results are re-sorted ascending by
-// distance.
-//
-// Returns the number of distinct results that received a boost.
-// Callers can log this as a signal of "how much the playbook
-// influenced this query."
-func ApplyPlaybookBoost(results []Result, hits []PlaybookHit) int {
-	if len(hits) == 0 || len(results) == 0 {
-		return 0
-	}
-
-	// For each result, find the hit with the lowest BoostFactor
-	// (= largest boost = highest score, since BoostFactor is
-	// 1-0.5*score and we minimize).
-	bestBoost := make(map[int]float64, len(results))
-	for i, r := range results {
-		for _, h := range hits {
-			if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus {
-				continue
-			}
-			bf := h.Entry.BoostFactor()
-			if cur, ok := bestBoost[i]; !ok || bf < cur {
-				bestBoost[i] = bf
-			}
-		}
-	}
-
-	for i, bf := range bestBoost {
-		results[i].Distance = float32(float64(results[i].Distance) * bf)
-	}
-
-	sort.SliceStable(results, func(i, j int) bool {
-		return results[i].Distance < results[j].Distance
-	})
-
-	return len(bestBoost)
-}
--- a/internal/matrix/playbook_test.go
+++ b/internal/matrix/playbook_test.go
@ -1,180 +0,0 @@
-package matrix
-
-import (
-	"encoding/json"
-	"testing"
-)
-
-func TestPlaybookEntry_Validate(t *testing.T) {
-	good := PlaybookEntry{
-		QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 0.5,
-	}
-	if err := good.Validate(); err != nil {
-		t.Errorf("good entry should validate: %v", err)
-	}
-
-	cases := []struct {
-		name  string
-		entry PlaybookEntry
-	}{
-		{"empty query", PlaybookEntry{AnswerID: "y", AnswerCorpus: "z", Score: 0.5}},
-		{"empty answer id", PlaybookEntry{QueryText: "x", AnswerCorpus: "z", Score: 0.5}},
-		{"empty corpus", PlaybookEntry{QueryText: "x", AnswerID: "y", Score: 0.5}},
-		{"score too high", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 1.5}},
-		{"score negative", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: -0.1}},
-	}
-	for _, c := range cases {
-		if err := c.entry.Validate(); err == nil {
-			t.Errorf("%s: expected validation error, got nil", c.name)
-		}
-	}
-}
-
-func TestPlaybookEntry_BoostFactor(t *testing.T) {
-	cases := []struct {
-		score float64
-		want  float64
-	}{
-		{0.0, 1.0},
-		{0.5, 0.75},
-		{1.0, 0.5},
-		{-0.1, 1.0}, // clamped
-		{1.5, 0.5},  // clamped
-	}
-	for _, c := range cases {
-		got := PlaybookEntry{Score: c.score}.BoostFactor()
-		if abs(got-c.want) > 1e-9 {
-			t.Errorf("BoostFactor(score=%.2f): want %.4f, got %.4f", c.score, c.want, got)
-		}
-	}
-}
-
-func TestApplyPlaybookBoost_NoHitsLeaveResultsAlone(t *testing.T) {
-	results := []Result{
-		{ID: "a", Distance: 0.1, Corpus: "x"},
-		{ID: "b", Distance: 0.2, Corpus: "x"},
-	}
-	n := ApplyPlaybookBoost(results, nil)
-	if n != 0 {
-		t.Errorf("expected 0 boosted, got %d", n)
-	}
-	if results[0].ID != "a" || results[1].ID != "b" {
-		t.Errorf("results reordered without hits: %v", results)
-	}
-}
-
-func TestApplyPlaybookBoost_BoostMovesResultUp(t *testing.T) {
-	// Initial: a (0.10) beats b (0.20) beats c (0.30).
-	// Playbook says (answer=c, score=1.0) should be boosted → c's
-	// distance becomes 0.30 * 0.5 = 0.15. New ordering: a, c, b.
-	results := []Result{
-		{ID: "a", Distance: 0.10, Corpus: "x"},
-		{ID: "b", Distance: 0.20, Corpus: "x"},
-		{ID: "c", Distance: 0.30, Corpus: "x"},
-	}
-	hits := []PlaybookHit{
-		{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
-			AnswerID: "c", AnswerCorpus: "x", Score: 1.0,
-		}},
-	}
-	n := ApplyPlaybookBoost(results, hits)
-	if n != 1 {
-		t.Errorf("expected 1 boosted, got %d", n)
-	}
-	if results[0].ID != "a" || results[1].ID != "c" || results[2].ID != "b" {
-		t.Errorf("expected order a,c,b after boost; got %v", idsOf(results))
-	}
-	if abs(float64(results[1].Distance)-0.15) > 1e-6 {
-		t.Errorf("expected c distance 0.15 after boost; got %.4f", results[1].Distance)
-	}
-}
-
-func TestApplyPlaybookBoost_HighestScoreWinsForSameAnswer(t *testing.T) {
-	results := []Result{
-		{ID: "a", Distance: 0.30, Corpus: "x"},
-	}
-	// Two playbook hits both pointing at "a". Score=0.4 (weak boost)
-	// + Score=0.9 (strong boost). Strong should win — distance gets
-	// multiplied by 1-0.5*0.9 = 0.55, not by 1-0.5*0.4 = 0.80.
-	hits := []PlaybookHit{
-		{PlaybookID: "p_weak", Distance: 0.05, Entry: PlaybookEntry{
-			AnswerID: "a", AnswerCorpus: "x", Score: 0.4,
-		}},
-		{PlaybookID: "p_strong", Distance: 0.05, Entry: PlaybookEntry{
-			AnswerID: "a", AnswerCorpus: "x", Score: 0.9,
-		}},
-	}
-	ApplyPlaybookBoost(results, hits)
-	wantDist := 0.30 * 0.55
-	if abs(float64(results[0].Distance)-wantDist) > 1e-6 {
-		t.Errorf("strong-score boost should win: want %.4f, got %.4f", wantDist, results[0].Distance)
-	}
-}
-
-func TestApplyPlaybookBoost_CorpusAttributionRespected(t *testing.T) {
-	// Playbook references answer_id="a" in corpus="x".
-	// Results have answer_id="a" in corpus="y" — DIFFERENT corpus.
-	// Boost should NOT apply; the (id, corpus) tuple is the join key,
-	// not just id (otherwise different-corpus collisions would create
-	// false positives).
-	results := []Result{
-		{ID: "a", Distance: 0.30, Corpus: "y"},
-	}
-	hits := []PlaybookHit{
-		{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
-			AnswerID: "a", AnswerCorpus: "x", Score: 1.0,
-		}},
-	}
-	n := ApplyPlaybookBoost(results, hits)
-	if n != 0 {
-		t.Errorf("cross-corpus collision should not boost: got %d", n)
-	}
-	if abs(float64(results[0].Distance)-0.30) > 1e-6 {
-		// 1e-6 tolerance accounts for float32→float64 conversion;
-		// the assertion that matters is "unchanged from input."
-		t.Errorf("distance should be unchanged: got %.6f", results[0].Distance)
-	}
-}
-
-func TestPlaybookEntry_RoundTripJSON(t *testing.T) {
-	e := NewPlaybookEntry("forklift query", "w-12345", "workers", 0.85, []string{"chicago", "verified"})
-	raw, err := e.MarshalMetadata()
-	if err != nil {
-		t.Fatalf("marshal: %v", err)
-	}
-	got, err := UnmarshalPlaybookMetadata(raw)
-	if err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if got.QueryText != e.QueryText || got.AnswerID != e.AnswerID ||
-		got.AnswerCorpus != e.AnswerCorpus || got.Score != e.Score {
-		t.Errorf("round-trip mismatch: want %+v, got %+v", e, got)
-	}
-	if len(got.Tags) != 2 || got.Tags[0] != "chicago" {
-		t.Errorf("tags lost in round-trip: %v", got.Tags)
-	}
-	if got.RecordedAtNs == 0 {
-		t.Error("RecordedAtNs not set by NewPlaybookEntry")
-	}
-}
-
-func TestUnmarshalPlaybookMetadata_RejectsEmpty(t *testing.T) {
-	if _, err := UnmarshalPlaybookMetadata(json.RawMessage{}); err == nil {
-		t.Error("empty metadata should error")
-	}
-}
-
-func abs(f float64) float64 {
-	if f < 0 {
-		return -f
-	}
-	return f
-}
-
-func idsOf(rs []Result) []string {
-	out := make([]string, len(rs))
-	for i, r := range rs {
-		out[i] = r.ID
-	}
-	return out
-}
--- a/internal/matrix/relevance.go
+++ b/internal/matrix/relevance.go
@ -1,376 +0,0 @@
-package matrix
-
-// Heuristic relevance filter for matrix-retrieved chunks. Port of
-// /home/profit/lakehouse/mcp-server/relevance.ts (Rust system).
-//
-// What it does: drops "adjacency pollution" — chunks that scored
-// well on cosine but are actually about code the focus file IMPORTS,
-// not the focus file itself. Without this, a reviewer LLM
-// hallucinates imported-crate internals as belonging to the focus
-// file ("I see main.rs does X" when X is in queryd::context that
-// main.rs only calls through).
-//
-// IMPORTANT: this filter is CODE-aware. The signals are pub fn,
-// struct, enum, use, import, file paths. It works for the eventual
-// lakehouse_arch_v1 / lakehouse_symbols_v1 / scrum_findings_v1
-// corpora ports. It will NOT meaningfully filter staffing data
-// (candidates, workers, placements) — those need a different
-// mechanism (structured constraints + status gates) that lives
-// outside this package. See the candidates reality test 2026-04-29
-// for the kind of staffing-side mismatch this filter doesn't fix.
-//
-// Scoring signals (all 0..1, additive then can sign-flip):
-//   path_match     +1.0  chunk.source/doc_id encodes focus.path
-//   filename_match +0.6  chunk text mentions focus's filename
-//   defined_match  +0.6  chunk text mentions focus.defined_symbols
-//   token_overlap  +0.4  jaccard of non-stopword tokens
-//   prefix_match   +0.3  chunk source shares first-2-segment prefix
-//   import_penalty -0.5  mentions ONLY imported symbols, no defined ones
-//
-// Threshold default 0.3 — same value the Rust observer ships.
-
-import (
-	"fmt"
-	"regexp"
-	"strings"
-)
-
-// DefaultRelevanceThreshold is the value the Rust observer ships.
-// Empirically tuned to keep direct hits and drop adjacency pollution.
-const DefaultRelevanceThreshold = 0.3
-
-// stopwords is the same list as relevance.ts. Includes English
-// articles + common Rust/TS keywords that would otherwise flood
-// jaccard scores between any two source files.
-var stopwords = func() map[string]struct{} {
-	list := []string{
-		"the", "a", "an", "and", "or", "but", "if", "then", "else", "is", "are", "was", "were",
-		"be", "been", "being", "of", "in", "on", "at", "to", "for", "with", "by", "from", "as",
-		"that", "this", "these", "those", "it", "its", "they", "them", "their", "we", "our",
-		"you", "your", "i", "me", "my", "not", "no", "so", "do", "does", "did", "done",
-		"will", "would", "could", "should", "can", "may", "might", "must", "shall",
-		"fn", "let", "mut", "pub", "use", "mod", "struct", "enum", "trait", "impl", "self",
-		"type", "const", "static", "async", "await", "return", "match", "ok", "err", "some",
-		"none", "into", "from", "ref", "box", "arc", "rc", "vec", "string", "str",
-	}
-	m := make(map[string]struct{}, len(list))
-	for _, s := range list {
-		m[s] = struct{}{}
-	}
-	return m
-}()
-
-// FocusFile is what we're filtering chunks against. Path is required
-// for path_match; Content lets the filter auto-extract Defined and
-// ImportedSymbols when callers haven't already done so.
-type FocusFile struct {
-	Path            string
-	Content         string
-	DefinedSymbols  []string
-	ImportedSymbols []string
-}
-
-// CandidateChunk is a single retrieved item to score. Source is the
-// corpus name; DocID is the chunk identifier; Score is the upstream
-// cosine signal (carried through but not used by this filter — the
-// matrix layer uses cosine for ranking, this filter for retention).
-type CandidateChunk struct {
-	Source string  `json:"source"`
-	DocID  string  `json:"doc_id"`
-	Text   string  `json:"text"`
-	Score  float64 `json:"score"`
-}
-
-// ScoredChunk wraps a chunk with its computed relevance + the list
-// of signals that fired. Reasons makes the filter auditable —
-// debugging "why did this chunk get kept/dropped" is the hard part.
-type ScoredChunk struct {
-	CandidateChunk
-	Relevance float64  `json:"relevance"`
-	Reasons   []string `json:"reasons"`
-}
-
-// FilterResult is the output of FilterChunks. Kept + Dropped are
-// disjoint and together cover the input. TotalIn is for sanity
-// checks; FocusPath echoes input for logging.
-type FilterResult struct {
-	Kept      []ScoredChunk `json:"kept"`
-	Dropped   []ScoredChunk `json:"dropped"`
-	Threshold float64       `json:"threshold"`
-	FocusPath string        `json:"focus_path"`
-	TotalIn   int           `json:"total_in"`
-}
-
-// Tokenize lowercases, splits on identifier boundaries (>=3 chars),
-// and drops stopwords. Used by Jaccard for token_overlap. Mirrors
-// the TS regex /[a-z_][a-z0-9_]{2,}/g — RE2-compatible as written.
-var tokenRe = regexp.MustCompile(`[a-z_][a-z0-9_]{2,}`)
-
-func Tokenize(text string) map[string]struct{} {
-	out := make(map[string]struct{})
-	if text == "" {
-		return out
-	}
-	for _, m := range tokenRe.FindAllString(strings.ToLower(text), -1) {
-		if _, skip := stopwords[m]; skip {
-			continue
-		}
-		out[m] = struct{}{}
-	}
-	return out
-}
-
-// Jaccard returns |A ∩ B| / |A ∪ B|. 0 when either set is empty
-// (matches the TS contract).
-func Jaccard(a, b map[string]struct{}) float64 {
-	if len(a) == 0 || len(b) == 0 {
-		return 0
-	}
-	var inter int
-	for k := range a {
-		if _, ok := b[k]; ok {
-			inter++
-		}
-	}
-	union := len(a) + len(b) - inter
-	if union == 0 {
-		return 0
-	}
-	return float64(inter) / float64(union)
-}
-
-// ExtractDefinedSymbols pulls pub-symbol names from Rust/TS source.
-// Conservative — would rather miss a symbol than over-match. Patterns
-// match exactly the TS impl; \b and (?:...) are RE2-supported. Case-
-// sensitivity matches TS: pub fn is lowercase, struct/enum/trait/etc
-// are PascalCase, const is SCREAMING_CASE. Only the "pub fn" match
-// uses (?i) because TS uses /gi explicitly there (the rest are /g).
-var definedPatterns = []*regexp.Regexp{
-	regexp.MustCompile(`(?i)\bpub\s+(?:async\s+)?fn\s+([a-z_][a-z0-9_]*)`),
-	regexp.MustCompile(`\bpub\s+struct\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bpub\s+enum\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bpub\s+trait\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bpub\s+const\s+([A-Z_][A-Z0-9_]*)`),
-	regexp.MustCompile(`\bpub\s+type\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bexport\s+(?:async\s+)?function\s+([a-z_][a-zA-Z0-9_]*)`),
-	regexp.MustCompile(`\bexport\s+class\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bexport\s+interface\s+([A-Z][A-Za-z0-9_]*)`),
-	regexp.MustCompile(`\bexport\s+(?:const|let|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)`),
-}
-
-func ExtractDefinedSymbols(content string) []string {
-	if content == "" {
-		return nil
-	}
-	seen := make(map[string]struct{})
-	var out []string
-	for _, re := range definedPatterns {
-		for _, m := range re.FindAllStringSubmatch(content, -1) {
-			if len(m) < 2 || m[1] == "" {
-				continue
-			}
-			if _, ok := seen[m[1]]; ok {
-				continue
-			}
-			seen[m[1]] = struct{}{}
-			out = append(out, m[1])
-		}
-	}
-	return out
-}
-
-// rustUseRe matches `use foo::bar::Baz;`, `use foo::{Bar, Baz};`,
-// `use foo::bar as alias;`. Lazy `*?` so we don't run into the next
-// `;` boundary too eagerly.
-var rustUseRe = regexp.MustCompile(`\buse\s+([A-Za-z_][A-Za-z0-9_:{}, \n]*?);`)
-
-// tsImportRe matches `import { X, Y } from "foo"` and `import X from "foo"`.
-var tsImportRe = regexp.MustCompile(`\bimport\s+(?:\{([^}]+)\}|([A-Za-z_][A-Za-z0-9_]*))\s+from`)
-
-// identRe extracts identifiers from a use/import block.
-var identRe = regexp.MustCompile(`[A-Za-z_][A-Za-z0-9_]*`)
-
-func ExtractImportedSymbols(content string) []string {
-	if content == "" {
-		return nil
-	}
-	ignore := map[string]bool{
-		"use": true, "as": true, "crate": true, "super": true, "self": true, "mod": true,
-	}
-	seen := make(map[string]struct{})
-	var out []string
-	add := func(tok string) {
-		if len(tok) <= 2 {
-			return
-		}
-		if ignore[tok] {
-			return
-		}
-		if _, ok := seen[tok]; ok {
-			return
-		}
-		seen[tok] = struct{}{}
-		out = append(out, tok)
-	}
-	for _, m := range rustUseRe.FindAllStringSubmatch(content, -1) {
-		if len(m) < 2 {
-			continue
-		}
-		for _, ident := range identRe.FindAllString(m[1], -1) {
-			add(ident)
-		}
-	}
-	for _, m := range tsImportRe.FindAllStringSubmatch(content, -1) {
-		if len(m) < 3 {
-			continue
-		}
-		block := m[1]
-		if block == "" {
-			block = m[2]
-		}
-		for _, ident := range identRe.FindAllString(block, -1) {
-			add(ident)
-		}
-	}
-	return out
-}
-
-// FilePrefix returns the first two path segments joined by "/" —
-// e.g. "crates/queryd/src/foo.rs" → "crates/queryd". Used for cheap
-// "same crate" comparisons; mirrors pathway_memory's notion.
-func FilePrefix(path string) string {
-	parts := strings.Split(path, "/")
-	if len(parts) > 2 {
-		parts = parts[:2]
-	}
-	return strings.Join(parts, "/")
-}
-
-// ScoreRelevance computes the additive 0..1-ish score plus the list
-// of signals that fired. Negative scores are possible (import_penalty
-// without compensating positive signal). Pure function — no side
-// effects, no I/O.
-func ScoreRelevance(focus FocusFile, chunk CandidateChunk) (float64, []string) {
-	var score float64
-	var reasons []string
-
-	focusPath := focus.Path
-	focusBase := ""
-	if focusPath != "" {
-		parts := strings.Split(focusPath, "/")
-		focusBase = parts[len(parts)-1]
-	}
-	chunkText := chunk.Text
-	chunkSource := chunk.Source
-	chunkDocID := chunk.DocID
-
-	// path_match: chunk's provenance encodes the focus path or filename.
-	if focusPath != "" && (strings.Contains(chunkSource, focusPath) ||
-		strings.Contains(chunkDocID, focusPath) ||
-		strings.Contains(chunkText, focusPath)) {
-		score += 1.0
-		reasons = append(reasons, "path_match")
-	} else if focusBase != "" && len(focusBase) > 4 &&
-		(strings.Contains(chunkText, focusBase) || strings.Contains(chunkDocID, focusBase)) {
-		score += 0.6
-		reasons = append(reasons, "filename_match")
-	}
-
-	// defined_match: chunk text mentions symbols this file actually defines.
-	defined := focus.DefinedSymbols
-	if len(defined) == 0 && focus.Content != "" {
-		defined = ExtractDefinedSymbols(focus.Content)
-	}
-	if len(defined) > 0 {
-		var hits int
-		for _, s := range defined {
-			if len(s) > 2 && strings.Contains(chunkText, s) {
-				hits++
-			}
-		}
-		if hits > 0 {
-			denom := len(defined)
-			if denom < 1 {
-				denom = 1
-			}
-			ratio := float64(hits) / float64(denom)
-			if ratio > 1 {
-				ratio = 1
-			}
-			score += 0.6 * ratio
-			reasons = append(reasons, fmt.Sprintf("defined_match(%d/%d)", hits, len(defined)))
-		}
-	}
-
-	// token_overlap: jaccard of non-stopword tokens.
-	if focus.Content != "" {
-		overlap := Jaccard(Tokenize(focus.Content), Tokenize(chunkText))
-		if overlap > 0.05 {
-			score += 0.4 * overlap
-			reasons = append(reasons, fmt.Sprintf("token_overlap(%.2f)", overlap))
-		}
-	}
-
-	// prefix_match: same first-2-segments (e.g. crates/queryd).
-	if focusPath != "" {
-		fp := FilePrefix(focusPath)
-		if fp != "" && (strings.Contains(chunkSource, fp) ||
-			strings.Contains(chunkDocID, fp) ||
-			strings.Contains(chunkText, fp)) {
-			score += 0.3
-			reasons = append(reasons, "prefix_match")
-		}
-	}
-
-	// import_penalty: chunk mentions only imported symbols, no defined
-	// ones. Strong signal of adjacency pollution — the chunk is about
-	// what we IMPORT, not what we ARE.
-	imported := focus.ImportedSymbols
-	if len(imported) == 0 && focus.Content != "" {
-		imported = ExtractImportedSymbols(focus.Content)
-	}
-	if len(imported) > 0 && len(defined) > 0 {
-		var importHits, definedHits int
-		for _, s := range imported {
-			if len(s) > 2 && strings.Contains(chunkText, s) {
-				importHits++
-			}
-		}
-		for _, s := range defined {
-			if len(s) > 2 && strings.Contains(chunkText, s) {
-				definedHits++
-			}
-		}
-		if importHits > 0 && definedHits == 0 {
-			score -= 0.5
-			reasons = append(reasons, fmt.Sprintf("import_only(%d)", importHits))
-		}
-	}
-
-	return score, reasons
-}
-
-// FilterChunks scores every chunk and partitions by threshold. The
-// caller picks the threshold; pass 0 to keep everything (caller-as-
-// intent contract — no auto-default substitution, since a literal 0
-// is meaningful as "keep everything I scored").
-func FilterChunks(focus FocusFile, chunks []CandidateChunk, threshold float64) FilterResult {
-	kept := make([]ScoredChunk, 0, len(chunks))
-	dropped := make([]ScoredChunk, 0)
-	for _, c := range chunks {
-		score, reasons := ScoreRelevance(focus, c)
-		sc := ScoredChunk{CandidateChunk: c, Relevance: score, Reasons: reasons}
-		if score >= threshold {
-			kept = append(kept, sc)
-		} else {
-			dropped = append(dropped, sc)
-		}
-	}
-	return FilterResult{
-		Kept:      kept,
-		Dropped:   dropped,
-		Threshold: threshold,
-		FocusPath: focus.Path,
-		TotalIn:   len(chunks),
-	}
-}
--- a/internal/matrix/relevance_test.go
+++ b/internal/matrix/relevance_test.go
@ -1,289 +0,0 @@
-package matrix
-
-import (
-	"strings"
-	"testing"
-)
-
-func TestTokenize(t *testing.T) {
-	cases := []struct {
-		text string
-		want []string // expected tokens (sorted check inside)
-	}{
-		{"", nil},
-		{"the quick brown fox", []string{"quick", "brown", "fox"}}, // stopwords dropped
-		{"hello WORLD", []string{"hello", "world"}},                // lowercase
-		{"a b c", nil},                                             // all under 3 chars
-		{"struct Foo", []string{"foo"}},                            // "struct" is a stopword, identifiers OK
-		{"crates/queryd/db.go", []string{"crates", "queryd"}},      // db.go: "db" is 2 chars, "go" is 2 chars
-	}
-	for _, c := range cases {
-		got := Tokenize(c.text)
-		if len(got) != len(c.want) {
-			t.Errorf("Tokenize(%q): want %d tokens %v, got %d %v", c.text, len(c.want), c.want, len(got), got)
-			continue
-		}
-		for _, w := range c.want {
-			if _, ok := got[w]; !ok {
-				t.Errorf("Tokenize(%q): missing token %q in %v", c.text, w, got)
-			}
-		}
-	}
-}
-
-func TestJaccard(t *testing.T) {
-	mk := func(tokens ...string) map[string]struct{} {
-		m := make(map[string]struct{})
-		for _, t := range tokens {
-			m[t] = struct{}{}
-		}
-		return m
-	}
-	cases := []struct {
-		name    string
-		a, b    map[string]struct{}
-		want    float64
-		epsilon float64
-	}{
-		{"both empty", mk(), mk(), 0, 0},
-		{"a empty", mk(), mk("x"), 0, 0},
-		{"identical", mk("x", "y"), mk("x", "y"), 1, 0},
-		{"disjoint", mk("a", "b"), mk("c", "d"), 0, 0},
-		{"half overlap", mk("a", "b"), mk("b", "c"), 1.0 / 3.0, 0.001},
-	}
-	for _, c := range cases {
-		got := Jaccard(c.a, c.b)
-		if got < c.want-c.epsilon || got > c.want+c.epsilon {
-			t.Errorf("%s: want %.3f, got %.3f", c.name, c.want, got)
-		}
-	}
-}
-
-func TestExtractDefinedSymbols(t *testing.T) {
-	rust := `
-pub fn search_chunks(query: &str) -> Vec<Chunk> { todo!() }
-pub async fn build_index() {}
-pub struct ChunkRegistry {}
-pub enum Distance { Cosine, Euclidean }
-pub trait Searcher {}
-pub const MAX_K: usize = 1000;
-pub type ChunkMap = HashMap<String, Chunk>;
-
-fn private_helper() {} // not pub, must NOT match
-struct PrivateOnly {}  // not pub, must NOT match
-`
-	got := ExtractDefinedSymbols(rust)
-	want := []string{"search_chunks", "build_index", "ChunkRegistry", "Distance", "Searcher", "MAX_K", "ChunkMap"}
-	if len(got) != len(want) {
-		t.Errorf("Rust extract: want %v, got %v", want, got)
-	}
-	for _, w := range want {
-		if !contains(got, w) {
-			t.Errorf("Rust: missing %q in %v", w, got)
-		}
-	}
-	// Negative cases — these should NOT match.
-	for _, neg := range []string{"private_helper", "PrivateOnly"} {
-		if contains(got, neg) {
-			t.Errorf("Rust: should not match %q in %v", neg, got)
-		}
-	}
-
-	ts := `
-export function tokenize(text: string) {}
-export async function loadCorpus() {}
-export class IndexRegistry {}
-export interface FocusFile {}
-export const STOPWORDS = new Set();
-export let counter = 0;
-
-function privateTs() {} // not export, must NOT match
-class Internal {}        // not export, must NOT match
-`
-	got = ExtractDefinedSymbols(ts)
-	want = []string{"tokenize", "loadCorpus", "IndexRegistry", "FocusFile", "STOPWORDS", "counter"}
-	for _, w := range want {
-		if !contains(got, w) {
-			t.Errorf("TS: missing %q in %v", w, got)
-		}
-	}
-	for _, neg := range []string{"privateTs", "Internal"} {
-		if contains(got, neg) {
-			t.Errorf("TS: should not match %q in %v", neg, got)
-		}
-	}
-}
-
-func TestExtractImportedSymbols(t *testing.T) {
-	rust := `
-use catalogd::Registry;
-use vectord::{Index, IndexParams};
-use std::collections::HashMap;
-`
-	got := ExtractImportedSymbols(rust)
-	for _, w := range []string{"catalogd", "Registry", "vectord", "Index", "IndexParams", "collections", "HashMap"} {
-		if !contains(got, w) {
-			t.Errorf("Rust use: missing %q in %v", w, got)
-		}
-	}
-	for _, neg := range []string{"use", "as"} {
-		if contains(got, neg) {
-			t.Errorf("Rust use: should not match keyword %q in %v", neg, got)
-		}
-	}
-
-	ts := `
-import { tokenize, jaccard } from "./relevance";
-import express from "express";
-`
-	got = ExtractImportedSymbols(ts)
-	for _, w := range []string{"tokenize", "jaccard", "express"} {
-		if !contains(got, w) {
-			t.Errorf("TS import: missing %q in %v", w, got)
-		}
-	}
-}
-
-func TestFilePrefix(t *testing.T) {
-	cases := []struct {
-		path, want string
-	}{
-		{"crates/queryd/src/foo.rs", "crates/queryd"},
-		{"top.rs", "top.rs"},
-		{"a/b/c/d", "a/b"},
-		{"", ""},
-	}
-	for _, c := range cases {
-		got := FilePrefix(c.path)
-		if got != c.want {
-			t.Errorf("FilePrefix(%q): want %q, got %q", c.path, c.want, got)
-		}
-	}
-}
-
-func TestScoreRelevance_PathMatch(t *testing.T) {
-	focus := FocusFile{Path: "crates/queryd/db.go"}
-	chunk := CandidateChunk{Source: "lakehouse_arch_v1", DocID: "phase:queryd", Text: "code at crates/queryd/db.go does X"}
-	score, reasons := ScoreRelevance(focus, chunk)
-	if score < 1.0 {
-		t.Errorf("path_match should give >=1.0; got %.2f reasons=%v", score, reasons)
-	}
-	if !contains(reasons, "path_match") {
-		t.Errorf("expected path_match in reasons: %v", reasons)
-	}
-}
-
-func TestScoreRelevance_ImportPenalty(t *testing.T) {
-	// Focus defines Foo; chunk only mentions Bar (imported). Should
-	// fire import_only penalty.
-	focus := FocusFile{
-		Path:           "crates/foo/main.go",
-		Content:        "pub fn run() {}\npub struct Foo {}\nuse barlib::Bar;\n",
-		DefinedSymbols: []string{"Foo"},
-		ImportedSymbols: []string{"Bar"},
-	}
-	chunk := CandidateChunk{
-		Source: "barlib_corpus", DocID: "barlib:Bar:42",
-		Text: "Bar handles the actual lookup logic and returns a Result.",
-	}
-	score, reasons := ScoreRelevance(focus, chunk)
-	if !contains(reasons, "import_only(1)") {
-		t.Errorf("expected import_only penalty: reasons=%v score=%.2f", reasons, score)
-	}
-	if score >= 0 {
-		// Without other positive signals, score should be net-negative.
-		t.Errorf("expected negative net score; got %.2f reasons=%v", score, reasons)
-	}
-}
-
-func TestFilterChunks_ThresholdSplitsKeptDropped(t *testing.T) {
-	focus := FocusFile{Path: "crates/queryd/db.go"}
-	chunks := []CandidateChunk{
-		{Source: "code", DocID: "queryd:db.go", Text: "crates/queryd/db.go is the focus"}, // path match → kept
-		{Source: "elsewhere", DocID: "phase:0", Text: "no match anywhere"},                  // dropped
-	}
-	res := FilterChunks(focus, chunks, DefaultRelevanceThreshold)
-	if len(res.Kept) != 1 || len(res.Dropped) != 1 {
-		t.Errorf("split: kept=%d dropped=%d (want 1/1)", len(res.Kept), len(res.Dropped))
-	}
-	if res.TotalIn != 2 {
-		t.Errorf("TotalIn: want 2, got %d", res.TotalIn)
-	}
-	if res.FocusPath != focus.Path {
-		t.Errorf("FocusPath echo: want %q, got %q", focus.Path, res.FocusPath)
-	}
-	// Sanity: everything in Kept has Relevance >= threshold.
-	for _, c := range res.Kept {
-		if c.Relevance < DefaultRelevanceThreshold {
-			t.Errorf("kept chunk below threshold: %v", c)
-		}
-	}
-	for _, c := range res.Dropped {
-		if c.Relevance >= DefaultRelevanceThreshold {
-			t.Errorf("dropped chunk at/above threshold: %v", c)
-		}
-	}
-}
-
-// TestFilterChunks_AdjacencyPollutionScenario is the headline test —
-// the exact case the filter exists to catch. Focus file is
-// crates/queryd/db.go which defines Connector and imports
-// catalogd::Registry. A chunk about catalogd::Registry should be
-// dropped (adjacency); a chunk about Connector should be kept.
-func TestFilterChunks_AdjacencyPollutionScenario(t *testing.T) {
-	focus := FocusFile{
-		Path: "crates/queryd/src/db.go",
-		Content: `
-package queryd
-
-import "catalogd"
-
-pub struct Connector {}
-pub fn open_connector() *Connector { return nil }
-use catalogd::Registry;
-`,
-	}
-	chunks := []CandidateChunk{
-		{
-			Source: "lakehouse_symbols_v1", DocID: "symbol:queryd::struct::Connector",
-			Text: "Connector wraps the DuckDB handle. open_connector creates one.",
-		},
-		{
-			Source: "lakehouse_symbols_v1", DocID: "symbol:catalogd::struct::Registry",
-			Text: "Registry stores manifests. Used by ingestd and queryd.",
-		},
-	}
-	res := FilterChunks(focus, chunks, DefaultRelevanceThreshold)
-	// Connector chunk should be kept (defined_match).
-	keptIDs := make([]string, len(res.Kept))
-	for i, c := range res.Kept {
-		keptIDs[i] = c.DocID
-	}
-	if !contains(keptIDs, "symbol:queryd::struct::Connector") {
-		t.Errorf("expected Connector chunk kept; got %v", keptIDs)
-	}
-	// The Registry chunk MIGHT pass threshold depending on token_overlap
-	// noise (queryd appears in its text too). The load-bearing assertion:
-	// Connector ranks ≥ Registry.
-	connectorRel, registryRel := -999.0, -999.0
-	for _, c := range append(res.Kept, res.Dropped...) {
-		if strings.Contains(c.DocID, "Connector") {
-			connectorRel = c.Relevance
-		}
-		if strings.Contains(c.DocID, "Registry") {
-			registryRel = c.Relevance
-		}
-	}
-	if connectorRel <= registryRel {
-		t.Errorf("Connector should outrank Registry: connector=%.2f registry=%.2f", connectorRel, registryRel)
-	}
-}
-
-func contains(haystack []string, needle string) bool {
-	for _, h := range haystack {
-		if h == needle {
-			return true
-		}
-	}
-	return false
-}
--- a/internal/matrix/retrieve.go
+++ b/internal/matrix/retrieve.go
@ -1,551 +0,0 @@
-// Package matrix is the multi-corpus retrieval layer above vectord.
-// Per docs/SPEC.md §3.4: the matrix indexer composes N single-corpus
-// vectord indexes into one retrieve+merge surface, with corpus
-// attribution preserved per result. Future work in the same package:
-// relevance filter, strong-model downgrade gate, learning-loop
-// integration. This file is component 2 of the dependency-ordered
-// port plan — multi-corpus retrieve+merge, no filter yet.
-//
-// Why corpus-as-shard rather than hash-shard a single index:
-// different corpora have distinct topology and distinct retrieval
-// intent (workers vs candidates vs scrum_findings vs lakehouse_arch).
-// Multi-corpus search merges across them by distance — that IS the
-// matrix indexer's whole purpose. See feedback_meta_index_vision.md
-// and project_small_model_pipeline_vision.md.
-package matrix
-
-import (
-	"bytes"
-	"context"
-	"crypto/sha256"
-	"encoding/hex"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"log/slog"
-	"net/http"
-	"sort"
-	"sync"
-	"time"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord"
-)
-
-// Result is one merged hit with corpus attribution. The corpus field
-// is load-bearing — losing it would defeat the matrix's purpose
-// (knowing WHICH corpus contributed each hit is half the signal).
-type Result struct {
-	ID       string          `json:"id"`
-	Distance float32         `json:"distance"`
-	Corpus   string          `json:"corpus"`
-	Metadata json.RawMessage `json:"metadata,omitempty"`
-}
-
-// SearchRequest is the matrix search input. Either QueryText (matrix
-// embeds it via embedd) or QueryVector (already embedded by caller)
-// must be set; QueryVector takes precedence if both supplied.
-//
-// Playbook fields (component 5 — learning loop):
-//   UsePlaybook=true: after normal retrieve+merge, fetch top similar
-//     past queries from PlaybookCorpus and apply distance boost to
-//     any current results that match a recorded answer.
-//   PlaybookCorpus: index name; empty = DefaultPlaybookCorpus.
-//   PlaybookTopK: number of similar past queries to consider; 0 =
-//     DefaultPlaybookTopK.
-//   PlaybookMaxDistance: cosine ceiling for "similar enough"; 0 =
-//     DefaultPlaybookMaxDistance.
-//
-// Metadata filter (post-retrieval structured gate):
-//   MetadataFilter: map of metadata-field → expected value. Results
-//     whose metadata doesn't match every key are dropped. Addresses
-//     the reality-test gap surfaced in the candidates/workers
-//     experiments — pure semantic retrieval can't gate by status,
-//     state, etc. Caller can compensate for filter shrinkage by
-//     requesting larger PerCorpusK.
-//   Each filter value can be a single value (string|number|bool —
-//     whatever JSON unmarshals to `any`) or a []any meaning "any
-//     of these values" (OR semantics within one key, AND across keys).
-type SearchRequest struct {
-	QueryText           string         `json:"query_text,omitempty"`
-	QueryVector         []float32      `json:"query_vector,omitempty"`
-	Corpora             []string       `json:"corpora"`
-	K                   int            `json:"k"`
-	PerCorpusK          int            `json:"per_corpus_k,omitempty"`
-	Model               string         `json:"model,omitempty"`
-	UsePlaybook         bool           `json:"use_playbook,omitempty"`
-	PlaybookCorpus      string         `json:"playbook_corpus,omitempty"`
-	PlaybookTopK        int            `json:"playbook_top_k,omitempty"`
-	PlaybookMaxDistance float64        `json:"playbook_max_distance,omitempty"`
-	MetadataFilter      map[string]any `json:"metadata_filter,omitempty"`
-}
-
-// SearchResponse wraps the merged results plus per-corpus return
-// counts so callers can detect "this corpus returned nothing"
-// without re-querying. PlaybookBoosted is the count of results that
-// received a boost from playbook memory; useful for telemetry on
-// "how much the learning loop influenced this query."
-// MetadataFilterDropped is the count of results dropped by the
-// post-retrieval structured filter (when set in the request).
-type SearchResponse struct {
-	Results               []Result       `json:"results"`
-	PerCorpusCounts       map[string]int `json:"per_corpus_counts"`
-	PlaybookBoosted       int            `json:"playbook_boosted,omitempty"`
-	MetadataFilterDropped int            `json:"metadata_filter_dropped,omitempty"`
-}
-
-// Retriever holds the HTTP clients to embedd and vectord. Stateless
-// otherwise — safe to share across goroutines.
-type Retriever struct {
-	httpClient *http.Client
-	embeddURL  string
-	vectordURL string
-}
-
-// New returns a Retriever configured to call embedd at embeddURL
-// and vectord at vectordURL (both gateway-internal upstreams,
-// usually 127.0.0.1:3216 and :3215 respectively).
-func New(embeddURL, vectordURL string) *Retriever {
-	return &Retriever{
-		httpClient: &http.Client{Timeout: 30 * time.Second},
-		embeddURL:  embeddURL,
-		vectordURL: vectordURL,
-	}
-}
-
-// Errors surfaced to HTTP handlers.
-var (
-	ErrEmptyCorpora    = errors.New("matrix: corpora must be non-empty")
-	ErrEmptyQuery      = errors.New("matrix: query_text or query_vector required")
-	ErrCorpus          = errors.New("matrix: corpus search failed") // wraps vectord errors
-	ErrEmbed           = errors.New("matrix: embed failed")
-	ErrCorpusNotFound  = errors.New("matrix: corpus not found")     // distinct sentinel for vectord 404
-)
-
-// Search runs the matrix retrieve+merge.
-//
-// Error policy: fail-loud on any corpus error. Silent partial results
-// would lie about what was actually searched, which defeats the
-// indexer's coverage guarantee. Callers that want best-effort can
-// catch the error and re-issue with a smaller corpora list.
-func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
-	if len(req.Corpora) == 0 {
-		return nil, ErrEmptyCorpora
-	}
-	if req.K <= 0 {
-		return nil, errors.New("matrix: k must be > 0")
-	}
-	if req.PerCorpusK <= 0 {
-		req.PerCorpusK = req.K
-	}
-
-	// Resolve query → vector.
-	qvec := req.QueryVector
-	if len(qvec) == 0 {
-		if req.QueryText == "" {
-			return nil, ErrEmptyQuery
-		}
-		v, err := r.embed(ctx, req.QueryText, req.Model)
-		if err != nil {
-			return nil, fmt.Errorf("%w: %v", ErrEmbed, err)
-		}
-		qvec = v
-	}
-
-	// Parallel search across corpora. Each shard is independent;
-	// fan-out + collect with WaitGroup is cleaner than channels-only.
-	type shardResult struct {
-		corpus string
-		hits   []vectord.Result
-		err    error
-	}
-	results := make([]shardResult, len(req.Corpora))
-	var wg sync.WaitGroup
-	for i, c := range req.Corpora {
-		wg.Add(1)
-		go func(i int, corpus string) {
-			defer wg.Done()
-			hits, err := r.searchCorpus(ctx, corpus, qvec, req.PerCorpusK)
-			results[i] = shardResult{corpus: corpus, hits: hits, err: err}
-		}(i, c)
-	}
-	wg.Wait()
-
-	var allHits []Result
-	perCorpus := make(map[string]int, len(req.Corpora))
-	for _, s := range results {
-		if s.err != nil {
-			return nil, fmt.Errorf("%w: %s: %v", ErrCorpus, s.corpus, s.err)
-		}
-		perCorpus[s.corpus] = len(s.hits)
-		for _, h := range s.hits {
-			allHits = append(allHits, Result{
-				ID: h.ID, Distance: h.Distance, Corpus: s.corpus, Metadata: h.Metadata,
-			})
-		}
-	}
-
-	// Stable sort so equal-distance ties keep input order (which is
-	// per-corpus order from vectord's HNSW result heap). This matters
-	// for deterministic test assertions.
-	sort.SliceStable(allHits, func(i, j int) bool {
-		return allHits[i].Distance < allHits[j].Distance
-	})
-
-	// Metadata filter (component B — staffing-side structured gate).
-	// Applied BEFORE top-K truncation so the filter doesn't accidentally
-	// reduce coverage further. Caller can request larger PerCorpusK to
-	// compensate when filters are aggressive.
-	var dropped int
-	if len(req.MetadataFilter) > 0 {
-		filtered := make([]Result, 0, len(allHits))
-		for _, h := range allHits {
-			if matchesMetadataFilter(h.Metadata, req.MetadataFilter) {
-				filtered = append(filtered, h)
-			} else {
-				dropped++
-			}
-		}
-		allHits = filtered
-	}
-
-	if len(allHits) > req.K {
-		allHits = allHits[:req.K]
-	}
-	resp := &SearchResponse{
-		Results:               allHits,
-		PerCorpusCounts:       perCorpus,
-		MetadataFilterDropped: dropped,
-	}
-
-	// Playbook boost (component 5). Reuses the query vector — no
-	// extra embed call. If the playbook corpus doesn't exist (first
-	// search before any Record), the lookup gracefully no-ops.
-	if req.UsePlaybook {
-		hits, err := r.fetchPlaybookHits(ctx, qvec, req)
-		if err != nil {
-			// Don't fail the whole search on playbook errors — the
-			// boost is opportunistic. Log + continue.
-			slog.Warn("matrix: playbook lookup failed; skipping boost", "err", err)
-		} else if len(hits) > 0 {
-			resp.PlaybookBoosted = ApplyPlaybookBoost(resp.Results, hits)
-		}
-	}
-
-	return resp, nil
-}
-
-// fetchPlaybookHits queries the playbook corpus with the same query
-// vector and returns hits whose decoded entries are within
-// PlaybookMaxDistance. A missing playbook corpus returns nil + nil
-// (legitimate no-op state for a system before any Record call).
-func (r *Retriever) fetchPlaybookHits(ctx context.Context, qvec []float32, req SearchRequest) ([]PlaybookHit, error) {
-	corpus := req.PlaybookCorpus
-	if corpus == "" {
-		corpus = DefaultPlaybookCorpus
-	}
-	topK := req.PlaybookTopK
-	if topK <= 0 {
-		topK = DefaultPlaybookTopK
-	}
-	maxDist := req.PlaybookMaxDistance
-	if maxDist <= 0 {
-		maxDist = DefaultPlaybookMaxDistance
-	}
-
-	rawHits, err := r.searchCorpus(ctx, corpus, qvec, topK)
-	if errors.Is(err, ErrCorpusNotFound) {
-		// Cold-start state: no Record call has happened yet, so the
-		// playbook corpus doesn't exist. Legit no-op, not an error.
-		return nil, nil
-	}
-	if err != nil {
-		return nil, err
-	}
-
-	out := make([]PlaybookHit, 0, len(rawHits))
-	for _, h := range rawHits {
-		if float64(h.Distance) > maxDist {
-			continue
-		}
-		entry, err := UnmarshalPlaybookMetadata(h.Metadata)
-		if err != nil {
-			slog.Warn("matrix: skip malformed playbook entry", "id", h.ID, "err", err)
-			continue
-		}
-		out = append(out, PlaybookHit{
-			PlaybookID: h.ID,
-			Distance:   h.Distance,
-			Entry:      entry,
-		})
-	}
-	return out, nil
-}
-
-// Record stores a (query → answer_id) playbook entry in the
-// playbook corpus. Embeds the query via embedd, ensures the corpus
-// exists (idempotent create), and writes the entry as one vectord
-// item with the entry's JSON in metadata.
-//
-// Uses a deterministic ID derived from (query_text, answer_id,
-// answer_corpus) so re-recording the same triple upserts (last
-// score wins). Callers wanting to accumulate distinct samples can
-// vary one of the three.
-//
-// corpus="" defaults to DefaultPlaybookCorpus.
-func (r *Retriever) Record(ctx context.Context, entry PlaybookEntry, corpus string) (string, error) {
-	if err := entry.Validate(); err != nil {
-		return "", err
-	}
-	if corpus == "" {
-		corpus = DefaultPlaybookCorpus
-	}
-
-	qvec, err := r.embed(ctx, entry.QueryText, "")
-	if err != nil {
-		return "", fmt.Errorf("playbook record embed: %w", err)
-	}
-
-	if err := r.ensureCorpus(ctx, corpus, len(qvec)); err != nil {
-		return "", fmt.Errorf("playbook ensure corpus: %w", err)
-	}
-
-	if entry.RecordedAtNs == 0 {
-		entry.RecordedAtNs = time.Now().UnixNano()
-	}
-
-	pbID := playbookID(entry.QueryText, entry.AnswerID, entry.AnswerCorpus)
-
-	meta, err := entry.MarshalMetadata()
-	if err != nil {
-		return "", err
-	}
-
-	if err := r.addItem(ctx, corpus, pbID, qvec, meta); err != nil {
-		return "", fmt.Errorf("playbook add: %w", err)
-	}
-	return pbID, nil
-}
-
-// playbookID is sha256-truncated 8 bytes (16 hex chars) prefixed
-// with "pb-". Deterministic on (query, answer_id, answer_corpus).
-func playbookID(query, answerID, answerCorpus string) string {
-	h := sha256.Sum256([]byte(query + "|" + answerID + "|" + answerCorpus))
-	return "pb-" + hex.EncodeToString(h[:8])
-}
-
-// ensureCorpus creates a vectord index if it doesn't exist.
-// 201 = created; 409 = already exists; both fine for idempotent use.
-func (r *Retriever) ensureCorpus(ctx context.Context, name string, dim int) error {
-	body, err := json.Marshal(map[string]any{
-		"name": name, "dimension": dim, "distance": "cosine",
-	})
-	if err != nil {
-		return err
-	}
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost,
-		r.vectordURL+"/vectors/index", bytes.NewReader(body))
-	if err != nil {
-		return err
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	resp, err := r.httpClient.Do(httpReq)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-	io.Copy(io.Discard, resp.Body)
-	if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict {
-		return nil
-	}
-	return fmt.Errorf("ensure %q: status %d", name, resp.StatusCode)
-}
-
-// addItem POSTs a single-item batch to /vectors/index/{name}/add.
-func (r *Retriever) addItem(ctx context.Context, corpus, id string, vec []float32, meta json.RawMessage) error {
-	body, err := json.Marshal(map[string]any{
-		"items": []map[string]any{
-			{"id": id, "vector": vec, "metadata": meta},
-		},
-	})
-	if err != nil {
-		return err
-	}
-	url := r.vectordURL + "/vectors/index/" + corpus + "/add"
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-	if err != nil {
-		return err
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	resp, err := r.httpClient.Do(httpReq)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		b, _ := io.ReadAll(resp.Body)
-		return fmt.Errorf("add %q: status %d: %s", corpus, resp.StatusCode, b)
-	}
-	return nil
-}
-
-// matchesMetadataFilter reports whether a result's metadata satisfies
-// the filter. Each filter key must be present in the metadata; the
-// value must equal (or for a list filter, contain) the metadata
-// value. Missing key = drop. Type mismatches are JSON-equality
-// checked (e.g. filter wants 1 but metadata has 1.0 → match via
-// canonical JSON form).
-//
-// Filter value semantics:
-//   string|number|bool → exact equality (after JSON normalization)
-//   []any              → OR within key (any element matching wins)
-//
-// AND across keys: every filter key must match.
-func matchesMetadataFilter(rawMeta json.RawMessage, filter map[string]any) bool {
-	if len(filter) == 0 {
-		return true
-	}
-	if len(rawMeta) == 0 {
-		return false // no metadata can't satisfy any filter
-	}
-	var meta map[string]any
-	if err := json.Unmarshal(rawMeta, &meta); err != nil {
-		return false
-	}
-	for k, expected := range filter {
-		got, present := meta[k]
-		if !present {
-			return false
-		}
-		if !valueMatches(got, expected) {
-			return false
-		}
-	}
-	return true
-}
-
-// valueMatches handles single-value and list-value filter semantics.
-// JSON-canonical equality so 1 ≡ 1.0 and "true" != true.
-func valueMatches(got, expected any) bool {
-	if list, ok := expected.([]any); ok {
-		for _, e := range list {
-			if jsonEqual(got, e) {
-				return true
-			}
-		}
-		return false
-	}
-	return jsonEqual(got, expected)
-}
-
-// jsonEqual marshals both sides and compares the canonical forms.
-// Handles the float64-vs-int problem inherent to encoding/json
-// (which decodes all numbers as float64) — both sides go through
-// the same encoder so 1 == 1.0 if both came in as numbers.
-func jsonEqual(a, b any) bool {
-	ab, errA := json.Marshal(a)
-	bb, errB := json.Marshal(b)
-	if errA != nil || errB != nil {
-		return false
-	}
-	return string(ab) == string(bb)
-}
-
-// Corpora returns the list of vectord index names. Thin proxy to
-// GET /vectors/index — exposed at the matrix layer so callers don't
-// need direct vectord access.
-func (r *Retriever) Corpora(ctx context.Context) ([]string, error) {
-	url := r.vectordURL + "/vectors/index"
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
-	if err != nil {
-		return nil, err
-	}
-	resp, err := r.httpClient.Do(httpReq)
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		b, _ := io.ReadAll(resp.Body)
-		return nil, fmt.Errorf("vectord index list: status %d: %s", resp.StatusCode, b)
-	}
-	var out struct {
-		Names []string `json:"names"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
-		return nil, err
-	}
-	return out.Names, nil
-}
-
-// embed POSTs a single-text /embed call. Reuses embedd's batched
-// /embed shape with len(texts)==1; embedd's LRU cache absorbs
-// repeat queries (commit 56844c3).
-func (r *Retriever) embed(ctx context.Context, text, model string) ([]float32, error) {
-	body, err := json.Marshal(map[string]any{"texts": []string{text}, "model": model})
-	if err != nil {
-		return nil, err
-	}
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.embeddURL+"/embed", bytes.NewReader(body))
-	if err != nil {
-		return nil, err
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	resp, err := r.httpClient.Do(httpReq)
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		b, _ := io.ReadAll(resp.Body)
-		return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, b)
-	}
-	var out struct {
-		Vectors [][]float32 `json:"vectors"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
-		return nil, err
-	}
-	if len(out.Vectors) == 0 {
-		return nil, errors.New("embed returned no vectors")
-	}
-	return out.Vectors[0], nil
-}
-
-// searchCorpus calls vectord /vectors/index/{name}/search. Returns
-// ErrCorpusNotFound (wrapped) on HTTP 404 so callers can distinguish
-// "this corpus doesn't exist" from "this corpus errored." Per
-// 2026-04-29 cross-lineage scrum (Opus + Kimi convergent): caught
-// the original strings.Contains "status 404" detection that would
-// silently break if the error format changed.
-func (r *Retriever) searchCorpus(ctx context.Context, corpus string, vec []float32, k int) ([]vectord.Result, error) {
-	body, err := json.Marshal(map[string]any{"vector": vec, "k": k})
-	if err != nil {
-		return nil, err
-	}
-	url := r.vectordURL + "/vectors/index/" + corpus + "/search"
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-	if err != nil {
-		return nil, err
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	resp, err := r.httpClient.Do(httpReq)
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode == http.StatusNotFound {
-		return nil, fmt.Errorf("%w: %s", ErrCorpusNotFound, corpus)
-	}
-	if resp.StatusCode != http.StatusOK {
-		b, _ := io.ReadAll(resp.Body)
-		return nil, fmt.Errorf("status %d: %s", resp.StatusCode, b)
-	}
-	var out struct {
-		Results []vectord.Result `json:"results"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
-		return nil, err
-	}
-	return out.Results, nil
-}
--- a/internal/observer/store.go
+++ b/internal/observer/store.go
@ -1,249 +0,0 @@
-package observer
-
-// Store: in-memory ring buffer + optional JSONL persistor. Same
-// shape as internal/pathway's persistor (afbb506) — opens the file
-// per Append rather than holding an fd, which is fine at the
-// observer's expected write rate (≤ a few hundred ops/min) and
-// keeps the substrate restartable mid-stream.
-
-import (
-	"bufio"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io/fs"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"sync"
-)
-
-// DefaultRingCap is the in-memory ring buffer cap. Mirrors the Rust
-// Phase 24 limit of 2000 (recordExternalOp shifts the head when
-// length > 2000).
-const DefaultRingCap = 2000
-
-// DefaultRecentScenariosCap is how many recent source=scenario ops
-// the Stats endpoint returns. Matches the TS hard-coded slice(-10).
-const DefaultRecentScenariosCap = 10
-
-// Store holds the ring buffer + the optional persistor. Thread-safe
-// via a single RWMutex (read-heavy via Stats; writes via Record).
-type Store struct {
-	mu        sync.RWMutex
-	ring      []ObservedOp
-	cap       int
-	persistor *Persistor
-}
-
-// NewStore returns an empty Store. Pass nil persistor for in-memory
-// only (unit tests, ephemeral runs); pass a real Persistor to enable
-// jsonl-append-on-record.
-func NewStore(persistor *Persistor) *Store {
-	return &Store{
-		ring:      make([]ObservedOp, 0, DefaultRingCap),
-		cap:       DefaultRingCap,
-		persistor: persistor,
-	}
-}
-
-// Record validates + persists + appends. Order matters: persist
-// first so a crash mid-record doesn't leave the ring ahead of the
-// log. Returns ErrInvalidOp on validation failure (no persist, no
-// append).
-func (s *Store) Record(op ObservedOp) error {
-	op.EnsureTimestamp()
-	op.DefaultSource()
-	if err := op.Validate(); err != nil {
-		return err
-	}
-	if s.persistor != nil {
-		if err := s.persistor.Append(op); err != nil {
-			// Best-effort persistence — log but don't fail the
-			// in-memory record. Mirrors the Rust catch{} in
-			// persistOp; the ring buffer is the source of truth in
-			// flight.
-			slog.Warn("observer: persist failed", "err", err)
-		}
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.ring = append(s.ring, op)
-	if len(s.ring) > s.cap {
-		// Shift left by one (drop oldest). Avoids unbounded growth
-		// without a per-write reallocation.
-		copy(s.ring, s.ring[1:])
-		s.ring = s.ring[:len(s.ring)-1]
-	}
-	return nil
-}
-
-// Recent returns a copy of the ring buffer's current state. Most
-// recent entries are at the end (append-order).
-func (s *Store) Recent() []ObservedOp {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	out := make([]ObservedOp, len(s.ring))
-	copy(out, s.ring)
-	return out
-}
-
-// Stats aggregates the ring buffer. Mirrors the Rust /stats
-// response shape exactly.
-func (s *Store) Stats() Stats {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	stats := Stats{
-		Total:    len(s.ring),
-		BySource: make(map[string]int),
-	}
-	for _, op := range s.ring {
-		if op.Success {
-			stats.Successes++
-		} else {
-			stats.Failures++
-		}
-		src := string(op.Source)
-		if src == "" {
-			src = string(SourceMCP)
-		}
-		stats.BySource[src]++
-	}
-
-	// Last N scenario ops (most-recent-first → match Rust slice(-10)).
-	scenarios := make([]ScenarioOpDigest, 0, DefaultRecentScenariosCap)
-	for i := len(s.ring) - 1; i >= 0 && len(scenarios) < DefaultRecentScenariosCap; i-- {
-		op := s.ring[i]
-		if op.Source != SourceScenario {
-			continue
-		}
-		scenarios = append([]ScenarioOpDigest{{
-			TS:      op.Timestamp,
-			OK:      op.Success,
-			Staffer: op.StafferID,
-			Kind:    op.EventKind,
-			Role:    op.Role,
-		}}, scenarios...)
-	}
-	stats.RecentScenarios = scenarios
-
-	return stats
-}
-
-// Load replays the persistor's JSONL log into the ring buffer.
-// Resets the ring (current state is discarded) — same semantics as
-// pathway.Store.Load. Corruption-tolerant: malformed lines log
-// warnings and the load proceeds.
-//
-// Returns the number of ops successfully replayed.
-func (s *Store) Load() (int, error) {
-	if s.persistor == nil {
-		return 0, nil
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.ring = s.ring[:0]
-	return s.persistor.Replay(func(op ObservedOp) error {
-		s.ring = append(s.ring, op)
-		if len(s.ring) > s.cap {
-			copy(s.ring, s.ring[1:])
-			s.ring = s.ring[:len(s.ring)-1]
-		}
-		return nil
-	})
-}
-
-// ─── Persistor ──────────────────────────────────────────────────
-
-// Persistor wraps a single JSONL file. Open-per-append — same
-// pattern as internal/pathway. Each line is one ObservedOp.
-type Persistor struct {
-	path string
-}
-
-// NewPersistor returns a Persistor for the given file path. Parent
-// directory is created on demand. Empty path is invalid (caller
-// passes nil to NewStore for the no-persist case).
-func NewPersistor(path string) (*Persistor, error) {
-	if path == "" {
-		return nil, errors.New("observer: persistor path is empty")
-	}
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		return nil, fmt.Errorf("observer: create dir: %w", err)
-	}
-	return &Persistor{path: path}, nil
-}
-
-// Path returns the file path the persistor writes to.
-func (p *Persistor) Path() string { return p.path }
-
-// Append writes one ObservedOp as a JSONL line.
-func (p *Persistor) Append(op ObservedOp) error {
-	line, err := json.Marshal(op)
-	if err != nil {
-		return fmt.Errorf("observer: marshal op: %w", err)
-	}
-	f, err := os.OpenFile(p.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
-	if err != nil {
-		return fmt.Errorf("observer: open log: %w", err)
-	}
-	defer f.Close()
-	if _, err := f.Write(line); err != nil {
-		return fmt.Errorf("observer: write op: %w", err)
-	}
-	if _, err := f.Write([]byte{'\n'}); err != nil {
-		return fmt.Errorf("observer: write newline: %w", err)
-	}
-	return nil
-}
-
-// Replay reads the log line-by-line and invokes apply for each op.
-// Returns the count successfully applied. Missing file = 0 + nil
-// (legitimate cold-start state). Malformed lines log a warning and
-// the replay continues.
-func (p *Persistor) Replay(apply func(ObservedOp) error) (int, error) {
-	f, err := os.Open(p.path)
-	if errors.Is(err, fs.ErrNotExist) {
-		return 0, nil
-	}
-	if err != nil {
-		return 0, fmt.Errorf("observer: open log: %w", err)
-	}
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	buf := make([]byte, 0, 64*1024)
-	scanner.Buffer(buf, 1<<20) // 1 MiB per line cap
-
-	applied, skipped, lineNo := 0, 0, 0
-	for scanner.Scan() {
-		lineNo++
-		raw := scanner.Bytes()
-		if len(raw) == 0 {
-			continue
-		}
-		var op ObservedOp
-		if err := json.Unmarshal(raw, &op); err != nil {
-			slog.Warn("observer: replay skipped malformed line",
-				"path", p.path, "line", lineNo, "err", err.Error())
-			skipped++
-			continue
-		}
-		if err := apply(op); err != nil {
-			slog.Warn("observer: replay apply failed",
-				"path", p.path, "line", lineNo, "err", err.Error())
-			skipped++
-			continue
-		}
-		applied++
-	}
-	if err := scanner.Err(); err != nil {
-		return applied, fmt.Errorf("observer: scan log: %w", err)
-	}
-	if skipped > 0 {
-		slog.Info("observer: replay completed with skips",
-			"path", p.path, "applied", applied, "skipped", skipped)
-	}
-	return applied, nil
-}
--- a/internal/observer/store_test.go
+++ b/internal/observer/store_test.go
@ -1,193 +0,0 @@
-package observer
-
-import (
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-)
-
-func mkOp(success bool, source Source) ObservedOp {
-	return ObservedOp{
-		Timestamp:     time.Now().UTC().Format(time.RFC3339),
-		Endpoint:      "/v1/test",
-		InputSummary:  "test op",
-		Success:       success,
-		DurationMs:    42,
-		OutputSummary: "ok",
-		Source:        source,
-	}
-}
-
-func TestRecord_RequiresEndpointAndTimestamp(t *testing.T) {
-	s := NewStore(nil)
-	bad := ObservedOp{Endpoint: ""} // EnsureTimestamp will fill, but Endpoint empty stays
-	if err := s.Record(bad); err == nil {
-		t.Error("expected error on empty endpoint")
-	}
-
-	good := mkOp(true, SourceMCP)
-	if err := s.Record(good); err != nil {
-		t.Errorf("good op: %v", err)
-	}
-}
-
-func TestRecord_DefaultsTimestampAndSource(t *testing.T) {
-	s := NewStore(nil)
-	op := ObservedOp{
-		Endpoint:     "/x",
-		InputSummary: "no ts no source",
-		Success:      true,
-	}
-	if err := s.Record(op); err != nil {
-		t.Fatal(err)
-	}
-	stored := s.Recent()[0]
-	if stored.Timestamp == "" {
-		t.Error("Timestamp should be defaulted")
-	}
-	if stored.Source != SourceMCP {
-		t.Errorf("Source: want %q, got %q", SourceMCP, stored.Source)
-	}
-}
-
-func TestStats_Aggregates(t *testing.T) {
-	s := NewStore(nil)
-	for i := 0; i < 5; i++ {
-		_ = s.Record(mkOp(true, SourceMCP))
-	}
-	for i := 0; i < 3; i++ {
-		_ = s.Record(mkOp(false, SourceScenario))
-	}
-	for i := 0; i < 2; i++ {
-		_ = s.Record(mkOp(true, SourceLangfuse))
-	}
-
-	st := s.Stats()
-	if st.Total != 10 {
-		t.Errorf("total: want 10, got %d", st.Total)
-	}
-	if st.Successes != 7 {
-		t.Errorf("successes: want 7, got %d", st.Successes)
-	}
-	if st.Failures != 3 {
-		t.Errorf("failures: want 3, got %d", st.Failures)
-	}
-	if st.BySource["mcp"] != 5 || st.BySource["scenario"] != 3 || st.BySource["langfuse"] != 2 {
-		t.Errorf("by_source mismatch: %+v", st.BySource)
-	}
-	if len(st.RecentScenarios) != 3 {
-		t.Errorf("recent scenarios: want 3, got %d", len(st.RecentScenarios))
-	}
-}
-
-func TestStats_RecentScenariosCappedAndOrdered(t *testing.T) {
-	s := NewStore(nil)
-	// Record 15 scenario ops; only the last 10 should appear.
-	for i := 0; i < 15; i++ {
-		op := mkOp(true, SourceScenario)
-		op.StafferID = "staffer-" + string(rune('a'+i))
-		_ = s.Record(op)
-		time.Sleep(time.Millisecond) // ensure timestamps order-distinguishable
-	}
-	st := s.Stats()
-	if len(st.RecentScenarios) != DefaultRecentScenariosCap {
-		t.Errorf("cap: want %d, got %d", DefaultRecentScenariosCap, len(st.RecentScenarios))
-	}
-	// Last entry should be the most recently added (staffer-o, the 15th).
-	last := st.RecentScenarios[len(st.RecentScenarios)-1]
-	if last.Staffer != "staffer-o" {
-		t.Errorf("most recent: want staffer-o, got %q", last.Staffer)
-	}
-}
-
-func TestRingBuffer_BoundedByDefaultCap(t *testing.T) {
-	s := NewStore(nil)
-	s.cap = 5 // shrink for testability
-	for i := 0; i < 12; i++ {
-		op := mkOp(true, SourceMCP)
-		op.InputSummary = string(rune('a' + i))
-		_ = s.Record(op)
-	}
-	r := s.Recent()
-	if len(r) != 5 {
-		t.Errorf("ring size: want 5, got %d", len(r))
-	}
-	// Oldest 7 dropped; first remaining should have InputSummary "h" (8th).
-	if r[0].InputSummary != "h" {
-		t.Errorf("oldest after rollover: want 'h', got %q", r[0].InputSummary)
-	}
-}
-
-func TestPersistor_RoundTrip(t *testing.T) {
-	dir := t.TempDir()
-	path := filepath.Join(dir, "ops.jsonl")
-	p, err := NewPersistor(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	s := NewStore(p)
-
-	for i := 0; i < 4; i++ {
-		op := mkOp(i%2 == 0, SourceMCP)
-		op.InputSummary = string(rune('a' + i))
-		if err := s.Record(op); err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	// Sanity: file has 4 lines.
-	bs, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	lines := strings.Split(strings.TrimSuffix(string(bs), "\n"), "\n")
-	if len(lines) != 4 {
-		t.Errorf("file lines: want 4, got %d", len(lines))
-	}
-
-	// Rehydrate into a fresh Store.
-	s2 := NewStore(p)
-	n, err := s2.Load()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if n != 4 {
-		t.Errorf("loaded: want 4, got %d", n)
-	}
-	r := s2.Recent()
-	if len(r) != 4 {
-		t.Errorf("rehydrated ring: want 4, got %d", len(r))
-	}
-	// Order preserved.
-	for i, want := range []string{"a", "b", "c", "d"} {
-		if r[i].InputSummary != want {
-			t.Errorf("op %d: want %q, got %q", i, want, r[i].InputSummary)
-		}
-	}
-}
-
-func TestPersistor_CorruptionTolerant(t *testing.T) {
-	dir := t.TempDir()
-	path := filepath.Join(dir, "ops.jsonl")
-	// Pre-seed with one valid + one corrupt + one valid line.
-	valid1 := `{"timestamp":"2026-04-29T12:00:00Z","endpoint":"/x","input_summary":"a","success":true,"duration_ms":1,"output_summary":"ok","source":"mcp"}`
-	corrupt := `{this is not json`
-	valid2 := `{"timestamp":"2026-04-29T12:00:01Z","endpoint":"/y","input_summary":"b","success":false,"duration_ms":2,"output_summary":"err","source":"scenario"}`
-	if err := os.WriteFile(path, []byte(valid1+"\n"+corrupt+"\n"+valid2+"\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	p, err := NewPersistor(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	s := NewStore(p)
-	n, err := s.Load()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if n != 2 {
-		t.Errorf("applied: want 2 (valid pair), got %d (corrupt should skip)", n)
-	}
-}
--- a/internal/observer/types.go
+++ b/internal/observer/types.go
@ -1,131 +0,0 @@
-// Package observer is the Go port of mcp-server/observer.ts (Rust
-// system, 852 lines TS) — the "third-party witness" loop that records
-// every observed operation, surfaces failures, and feeds learnings
-// back into the substrate.
-//
-// What this package owns (this commit):
-//   - ObservedOp data model + ring buffer + JSONL persistence
-//   - Stats aggregation (total / successes / failures / by_source)
-//   - Source taxonomy (mcp / scenario / langfuse / overseer_correction)
-//
-// What's deferred to follow-up commits:
-//   - /review endpoint with cloud-LLM hand-review (the heuristic
-//     plus qwen3-coder fall-back path)
-//   - tailOverseerCorrections (background loop reading
-//     overseer_corrections.jsonl)
-//   - analyzeErrors / consolidatePlaybooks periodic loops
-//   - escalateFailureClusterToLLMTeam (failure clustering trigger)
-//
-// /relevance was already ported in 9588bd8 (component 3 of SPEC §3.4)
-// and lives in internal/matrix/relevance.go; the observer package
-// doesn't re-implement it.
-
-package observer
-
-import (
-	"errors"
-	"time"
-)
-
-// Source is the provenance of an observed op. Empty string defaults
-// to SourceMCP for back-compat with Phase 24 callers.
-type Source string
-
-const (
-	SourceMCP                 Source = "mcp"
-	SourceScenario            Source = "scenario"
-	SourceLangfuse            Source = "langfuse"
-	SourceOverseerCorrection  Source = "overseer_correction"
-)
-
-// ObservedOp is one entry in the observer's ring buffer (and JSONL
-// log when persistence is configured). Mirrors the Rust ObservedOp
-// shape exactly so the on-wire JSON round-trips between the two
-// implementations during the Rust→Go cutover.
-//
-// Optional fields use omitempty so absent values don't bloat the
-// JSONL file. Numeric zero values are intentionally treated as
-// "not set" by the JSON layer; if a real zero needs to be
-// persisted, future schema-version bump can switch to pointers.
-type ObservedOp struct {
-	Timestamp     string `json:"timestamp"`     // ISO 8601
-	Endpoint      string `json:"endpoint"`
-	InputSummary  string `json:"input_summary"`
-	Success       bool   `json:"success"`
-	DurationMs    int64  `json:"duration_ms"`
-	OutputSummary string `json:"output_summary"`
-	Error         string `json:"error,omitempty"`
-
-	Source    Source `json:"source,omitempty"`
-	StafferID string `json:"staffer_id,omitempty"`
-	SigHash   string `json:"sig_hash,omitempty"`
-	EventKind string `json:"event_kind,omitempty"`
-	Role      string `json:"role,omitempty"`
-	City      string `json:"city,omitempty"`
-	State     string `json:"state,omitempty"`
-	Count     int    `json:"count,omitempty"`
-
-	RescueAttempted bool `json:"rescue_attempted,omitempty"`
-	RescueSucceeded bool `json:"rescue_succeeded,omitempty"`
-
-	TaskClass     string `json:"task_class,omitempty"`
-	Correction    string `json:"correction,omitempty"`
-	AppliedAtTurn int    `json:"applied_at_turn,omitempty"`
-}
-
-// Stats is the aggregated view of the ring buffer — useful for
-// dashboards and the GET /stats endpoint. RecentScenarios holds the
-// most recent N source=scenario ops (default cap 10) so operators
-// can see what the staffing scenarios are emitting at a glance.
-type Stats struct {
-	Total           int                `json:"total"`
-	Successes       int                `json:"successes"`
-	Failures        int                `json:"failures"`
-	BySource        map[string]int     `json:"by_source"`
-	RecentScenarios []ScenarioOpDigest `json:"recent_scenario_ops"`
-}
-
-// ScenarioOpDigest is the slim per-op shape returned in
-// Stats.RecentScenarios — matches the TS digest exactly:
-// {ts, ok, staffer, kind, role}.
-type ScenarioOpDigest struct {
-	TS      string `json:"ts"`
-	OK      bool   `json:"ok"`
-	Staffer string `json:"staffer"`
-	Kind    string `json:"kind"`
-	Role    string `json:"role"`
-}
-
-// Errors surfaced to HTTP handlers.
-var (
-	ErrInvalidOp = errors.New("observer: invalid op (timestamp + endpoint required)")
-)
-
-// Validate returns an error if required fields are missing. Called
-// by Record before the op is added to the ring buffer.
-func (op ObservedOp) Validate() error {
-	if op.Timestamp == "" {
-		return ErrInvalidOp
-	}
-	if op.Endpoint == "" {
-		return ErrInvalidOp
-	}
-	return nil
-}
-
-// EnsureTimestamp populates Timestamp with the current UTC ISO 8601
-// time if it's empty. Useful for HTTP handlers that take the body
-// as authoritative but need to default the timestamp when absent.
-func (op *ObservedOp) EnsureTimestamp() {
-	if op.Timestamp == "" {
-		op.Timestamp = time.Now().UTC().Format(time.RFC3339)
-	}
-}
-
-// DefaultSource sets Source to SourceMCP if empty. Mirrors the Rust
-// `op.source ?? "mcp"` pattern in recordExternalOp.
-func (op *ObservedOp) DefaultSource() {
-	if op.Source == "" {
-		op.Source = SourceMCP
-	}
-}
--- a/internal/pathway/persistor.go
+++ b/internal/pathway/persistor.go
@ -1,130 +0,0 @@
-// persistor.go — JSONL append-only persistence for pathway memory.
-//
-// Each event is one JSON line. Append is O(1) (open append, write,
-// close — Go's *os.File default fsync policy is "rely on OS" which
-// is fine here; correctness on power-loss is best-effort, not
-// transactional). Replay reads the file once at startup.
-//
-// Corruption recovery: malformed lines log a warn (counted in
-// Replay's return) but do not stop the load. Partial state is
-// better than no state for an agent substrate.
-//
-// What's NOT here:
-//   - Compaction. JSONL grows linearly with mutations; below 100K
-//     traces this is fine. Compaction will land when needed and
-//     will emit a snapshot file + tail JSONL.
-//   - fsync per write. We rely on the OS's eventual fsync; trace
-//     loss on hard crash is acceptable for the substrate's
-//     "remember most things" guarantee.
-
-package pathway
-
-import (
-	"bufio"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io/fs"
-	"log/slog"
-	"os"
-	"path/filepath"
-)
-
-// Persistor wraps a single JSONL file. Construct with NewPersistor;
-// it does NOT load on construction — callers must call Store.Load()
-// to replay.
-type Persistor struct {
-	path string
-}
-
-// NewPersistor returns a persistor for the given file path. The
-// parent directory is created on demand. The file is created lazily
-// on first Append.
-func NewPersistor(path string) (*Persistor, error) {
-	if path == "" {
-		return nil, errors.New("pathway: persistor path is empty")
-	}
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		return nil, fmt.Errorf("pathway: create dir: %w", err)
-	}
-	return &Persistor{path: path}, nil
-}
-
-// Path returns the underlying file path. Useful for tests + logs.
-func (p *Persistor) Path() string { return p.path }
-
-// Append writes one event to the JSONL log. Each call opens the
-// file in append mode, writes one line, and closes — simple but
-// correct. A pooled persistent fd is a future optimization if
-// profiling shows append-rate matters.
-func (p *Persistor) Append(e event) error {
-	line, err := json.Marshal(e)
-	if err != nil {
-		return fmt.Errorf("pathway: marshal event: %w", err)
-	}
-	f, err := os.OpenFile(p.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
-	if err != nil {
-		return fmt.Errorf("pathway: open log: %w", err)
-	}
-	defer f.Close()
-	if _, err := f.Write(line); err != nil {
-		return fmt.Errorf("pathway: write event: %w", err)
-	}
-	if _, err := f.Write([]byte{'\n'}); err != nil {
-		return fmt.Errorf("pathway: write newline: %w", err)
-	}
-	return nil
-}
-
-// Replay reads the log line-by-line and invokes apply for each
-// event. Returns the count of events successfully applied. A
-// missing file is NOT an error (means "no prior state"); a
-// partially-corrupt file logs warns and continues.
-func (p *Persistor) Replay(apply func(event) error) (int, error) {
-	f, err := os.Open(p.path)
-	if errors.Is(err, fs.ErrNotExist) {
-		return 0, nil
-	}
-	if err != nil {
-		return 0, fmt.Errorf("pathway: open log: %w", err)
-	}
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	// Big buffer for unusually long content — 1 MiB per line cap.
-	buf := make([]byte, 0, 64*1024)
-	scanner.Buffer(buf, 1<<20)
-
-	applied := 0
-	skipped := 0
-	lineNo := 0
-	for scanner.Scan() {
-		lineNo++
-		raw := scanner.Bytes()
-		if len(raw) == 0 {
-			continue
-		}
-		var e event
-		if err := json.Unmarshal(raw, &e); err != nil {
-			slog.Warn("pathway: replay skipped malformed line",
-				"path", p.path, "line", lineNo, "err", err.Error())
-			skipped++
-			continue
-		}
-		if err := apply(e); err != nil {
-			slog.Warn("pathway: replay event apply failed",
-				"path", p.path, "line", lineNo, "op", e.Op, "err", err.Error())
-			skipped++
-			continue
-		}
-		applied++
-	}
-	if err := scanner.Err(); err != nil {
-		return applied, fmt.Errorf("pathway: scan log: %w", err)
-	}
-	if skipped > 0 {
-		slog.Info("pathway: replay completed with skips",
-			"path", p.path, "applied", applied, "skipped", skipped)
-	}
-	return applied, nil
-}
--- a/internal/pathway/persistor_test.go
+++ b/internal/pathway/persistor_test.go
@ -1,184 +0,0 @@
-package pathway
-
-import (
-	"encoding/json"
-	"errors"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-)
-
-// persistor_test covers the corruption-recovery contract per
-// Sprint 2 row 7: malformed JSONL lines must not halt replay.
-
-func TestPersistor_MissingFileIsNotError(t *testing.T) {
-	dir := t.TempDir()
-	path := filepath.Join(dir, "nonexistent.jsonl")
-	p, err := NewPersistor(path)
-	if err != nil {
-		t.Fatalf("NewPersistor on missing file should not error, got %v", err)
-	}
-	n, err := p.Replay(func(event) error { return nil })
-	if err != nil {
-		t.Errorf("Replay on missing file should be 0,nil; got %d, %v", n, err)
-	}
-	if n != 0 {
-		t.Errorf("Replay on missing file replayed %d events, want 0", n)
-	}
-}
-
-func TestPersistor_AppendThenReplay(t *testing.T) {
-	p := mustPersistor(t)
-
-	if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}}); err != nil {
-		t.Fatalf("Append: %v", err)
-	}
-	if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}}); err != nil {
-		t.Fatalf("Append: %v", err)
-	}
-
-	var seen []string
-	n, err := p.Replay(func(e event) error {
-		if e.Trace != nil {
-			seen = append(seen, e.Trace.UID)
-		}
-		return nil
-	})
-	if err != nil {
-		t.Fatalf("Replay: %v", err)
-	}
-	if n != 2 {
-		t.Errorf("Replay applied %d events, want 2", n)
-	}
-	if len(seen) != 2 || seen[0] != "A" || seen[1] != "B" {
-		t.Errorf("seen = %v, want [A B]", seen)
-	}
-}
-
-func TestPersistor_CorruptedLines_Skipped(t *testing.T) {
-	p := mustPersistor(t)
-
-	// Mix of valid and corrupted lines.
-	good1 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}})
-	bad := []byte(`{this is not json}`)
-	good2 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}})
-	emptyLine := []byte(``)
-	good3 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "C", Content: json.RawMessage(`{}`)}})
-
-	contents := []byte{}
-	for _, line := range [][]byte{good1, bad, good2, emptyLine, good3} {
-		contents = append(contents, line...)
-		contents = append(contents, '\n')
-	}
-	if err := os.WriteFile(p.Path(), contents, 0o644); err != nil {
-		t.Fatalf("write file: %v", err)
-	}
-
-	var applied []string
-	n, err := p.Replay(func(e event) error {
-		if e.Trace != nil {
-			applied = append(applied, e.Trace.UID)
-		}
-		return nil
-	})
-	if err != nil {
-		t.Fatalf("Replay: %v", err)
-	}
-	// 3 valid + 1 bad + 1 empty (skipped silently) = 3 applied.
-	if n != 3 {
-		t.Errorf("Replay applied %d, want 3 (1 corrupt line skipped)", n)
-	}
-	if len(applied) != 3 || applied[0] != "A" || applied[1] != "B" || applied[2] != "C" {
-		t.Errorf("applied = %v, want [A B C]", applied)
-	}
-}
-
-func TestPersistor_ApplyError_Skipped(t *testing.T) {
-	// If the apply function returns error for an event, replay
-	// should keep going (the error is logged, not raised).
-	p := mustPersistor(t)
-	_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}})
-	_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}})
-	_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "C", Content: json.RawMessage(`{}`)}})
-
-	count := 0
-	n, err := p.Replay(func(e event) error {
-		if e.Trace != nil && e.Trace.UID == "B" {
-			return errors.New("simulated apply error on B")
-		}
-		count++
-		return nil
-	})
-	if err != nil {
-		t.Fatalf("Replay: %v", err)
-	}
-	if n != 2 || count != 2 {
-		t.Errorf("Replay applied %d (callback called %d), want 2 each (B's error skipped)", n, count)
-	}
-}
-
-func TestPersistor_NewPersistor_EmptyPath_Errors(t *testing.T) {
-	_, err := NewPersistor("")
-	if err == nil {
-		t.Error("NewPersistor with empty path should error")
-	}
-}
-
-func TestPersistor_CreatesParentDir(t *testing.T) {
-	dir := t.TempDir()
-	nested := filepath.Join(dir, "nested", "deep", "pathway.jsonl")
-	p, err := NewPersistor(nested)
-	if err != nil {
-		t.Fatalf("NewPersistor: %v", err)
-	}
-	if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}}); err != nil {
-		t.Fatalf("Append after creating nested dir: %v", err)
-	}
-}
-
-func TestPersistor_LongLine_HandlesUpTo1MiB(t *testing.T) {
-	p := mustPersistor(t)
-
-	// Build a content blob ~750 KiB so the JSON line is ~800 KiB
-	// (under the 1 MiB scanner cap).
-	blob := strings.Repeat("x", 750*1024)
-	bigContent, _ := json.Marshal(map[string]string{"data": blob})
-	tr := &Trace{UID: "BIG", Content: bigContent}
-	if err := p.Append(event{Op: opAdd, Trace: tr}); err != nil {
-		t.Fatalf("Append big trace: %v", err)
-	}
-
-	count := 0
-	n, _ := p.Replay(func(e event) error {
-		if e.Trace != nil && e.Trace.UID == "BIG" {
-			count++
-		}
-		return nil
-	})
-	if n != 1 || count != 1 {
-		t.Errorf("big-line replay: got %d events / %d matches, want 1 each", n, count)
-	}
-}
-
-// ── helpers ──
-
-func mustPersistor(t *testing.T) *Persistor {
-	t.Helper()
-	dir := t.TempDir()
-	path := filepath.Join(dir, "test.jsonl")
-	p, err := NewPersistor(path)
-	if err != nil {
-		t.Fatalf("NewPersistor: %v", err)
-	}
-	return p
-}
-
-func mustMarshal(t *testing.T, e event) []byte {
-	t.Helper()
-	b, err := json.Marshal(e)
-	if err != nil {
-		t.Fatalf("marshal: %v", err)
-	}
-	return b
-}
--- a/internal/pathway/store.go
+++ b/internal/pathway/store.go
@ -1,381 +0,0 @@
-// store.go — the in-memory side of pathway memory. Persistence
-// (load/append-on-mutate) is in persistor.go; the Store can be
-// constructed without persistence for tests and ephemeral uses.
-
-package pathway
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"sync"
-	"time"
-
-	"github.com/google/uuid"
-)
-
-// Store is the in-memory pathway memory. Thread-safe via a single
-// RWMutex (read-heavy workloads are the norm; mutations are
-// individual operations not hot loops).
-type Store struct {
-	mu sync.RWMutex
-	// traces[uid] → *Trace. Single map covers both retired and
-	// active traces; Search filters retired by default.
-	traces map[string]*Trace
-
-	// persistor is optional — nil = in-memory only (test mode
-	// and ephemeral G2 uses).
-	persistor *Persistor
-
-	// nowFn returns "the current time in nanoseconds" — overridden
-	// in tests for deterministic timestamps.
-	nowFn func() int64
-
-	// uidFn generates new UIDs — overridden in tests for
-	// deterministic UID sequences.
-	uidFn func() string
-}
-
-// NewStore builds an empty Store. Pass nil persistor for in-memory
-// mode. The returned store is ready to receive operations; if
-// persistor is non-nil, call Load(ctx) before issuing operations to
-// rehydrate prior state.
-func NewStore(persistor *Persistor) *Store {
-	return &Store{
-		traces:    make(map[string]*Trace),
-		persistor: persistor,
-		nowFn:     func() int64 { return time.Now().UnixNano() },
-		uidFn:     func() string { return uuid.New().String() },
-	}
-}
-
-// Load replays the persistor's JSONL log and rebuilds in-memory
-// state. Safe to call multiple times — each call resets the in-
-// memory state to whatever the log says. Corruption (malformed
-// lines, broken events) is logged-not-fatal: the load proceeds
-// with the partial state it can recover.
-//
-// Returns the number of events successfully applied.
-func (s *Store) Load() (int, error) {
-	if s.persistor == nil {
-		return 0, nil
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.traces = make(map[string]*Trace) // reset
-	return s.persistor.Replay(func(e event) error {
-		return s.applyEventLocked(e)
-	})
-}
-
-// applyEventLocked is the single point where events update the
-// in-memory map. Used by both Load (replaying log) and the
-// mutating methods (after appending to the log). Caller MUST hold
-// s.mu in write mode.
-func (s *Store) applyEventLocked(e event) error {
-	switch e.Op {
-	case opAdd, opRevise:
-		if e.Trace == nil || e.Trace.UID == "" {
-			return ErrInvalidContent
-		}
-		// Add semantics: if UID already exists, this should have been
-		// a replay — but be permissive on Replay to handle older logs.
-		s.traces[e.Trace.UID] = e.Trace
-		return nil
-	case opUpdate:
-		t, ok := s.traces[e.UID]
-		if !ok {
-			return ErrNotFound
-		}
-		t.Content = e.Content
-		t.UpdatedAtNs = s.nowFn()
-		return nil
-	case opRetire:
-		t, ok := s.traces[e.UID]
-		if !ok {
-			return ErrNotFound
-		}
-		t.Retired = true
-		t.UpdatedAtNs = s.nowFn()
-		return nil
-	case opReplay:
-		t, ok := s.traces[e.UID]
-		if !ok {
-			return ErrNotFound
-		}
-		t.ReplayCount++
-		return nil
-	default:
-		return errors.New("pathway: unknown op")
-	}
-}
-
-// Add stores a new trace with a fresh UID and replay_count=1.
-// Returns the stored trace (with UID + timestamps populated).
-func (s *Store) Add(content json.RawMessage, tags ...string) (*Trace, error) {
-	if !json.Valid(content) {
-		return nil, ErrInvalidContent
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	now := s.nowFn()
-	t := &Trace{
-		UID:         s.uidFn(),
-		Content:     content,
-		CreatedAtNs: now,
-		UpdatedAtNs: now,
-		ReplayCount: 1,
-		Tags:        copyTags(tags),
-	}
-	if err := s.appendAndApplyLocked(event{Op: opAdd, Trace: t}); err != nil {
-		return nil, err
-	}
-	// Clone before returning so the caller can't mutate the in-memory
-	// trace through the returned pointer (matches Get's contract).
-	return cloneTrace(t), nil
-}
-
-// AddIdempotent stores a trace under the given UID, OR — if the
-// UID already exists — increments its ReplayCount. Used by agent
-// loops that want to record "I tried this same thing again."
-func (s *Store) AddIdempotent(uid string, content json.RawMessage, tags ...string) (*Trace, error) {
-	if uid == "" {
-		return nil, ErrEmptyUID
-	}
-	if !json.Valid(content) {
-		return nil, ErrInvalidContent
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if existing, ok := s.traces[uid]; ok {
-		// Replay: increment count, persist as opReplay event.
-		if err := s.appendAndApplyLocked(event{Op: opReplay, UID: uid}); err != nil {
-			return nil, err
-		}
-		// Return a copy to avoid the caller mutating the in-memory
-		// trace through the returned pointer.
-		return cloneTrace(existing), nil
-	}
-
-	now := s.nowFn()
-	t := &Trace{
-		UID:         uid,
-		Content:     content,
-		CreatedAtNs: now,
-		UpdatedAtNs: now,
-		ReplayCount: 1,
-		Tags:        copyTags(tags),
-	}
-	if err := s.appendAndApplyLocked(event{Op: opAdd, Trace: t}); err != nil {
-		return nil, err
-	}
-	return cloneTrace(t), nil
-}
-
-// Update replaces the content of an existing trace. Same UID, new
-// content. NOT a revision — use Revise when the new content
-// represents a change-of-belief that should preserve the old.
-func (s *Store) Update(uid string, content json.RawMessage) error {
-	if uid == "" {
-		return ErrEmptyUID
-	}
-	if !json.Valid(content) {
-		return ErrInvalidContent
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if _, ok := s.traces[uid]; !ok {
-		return ErrNotFound
-	}
-	return s.appendAndApplyLocked(event{Op: opUpdate, UID: uid, Content: content})
-}
-
-// Revise creates a new trace whose PredecessorUID points at an
-// existing trace. Old trace stays accessible via Get and History.
-// Returns the new trace.
-func (s *Store) Revise(predecessorUID string, content json.RawMessage, tags ...string) (*Trace, error) {
-	if predecessorUID == "" {
-		return nil, ErrEmptyUID
-	}
-	if !json.Valid(content) {
-		return nil, ErrInvalidContent
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if _, ok := s.traces[predecessorUID]; !ok {
-		return nil, ErrPredecessorMissing
-	}
-	now := s.nowFn()
-	t := &Trace{
-		UID:            s.uidFn(),
-		Content:        content,
-		PredecessorUID: predecessorUID,
-		CreatedAtNs:    now,
-		UpdatedAtNs:    now,
-		ReplayCount:    1,
-		Tags:           copyTags(tags),
-	}
-	if err := s.appendAndApplyLocked(event{Op: opRevise, Trace: t}); err != nil {
-		return nil, err
-	}
-	return cloneTrace(t), nil
-}
-
-// Retire marks a trace as retired. Retired traces are excluded
-// from Search by default but accessible via Get and History.
-func (s *Store) Retire(uid string) error {
-	if uid == "" {
-		return ErrEmptyUID
-	}
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if _, ok := s.traces[uid]; !ok {
-		return ErrNotFound
-	}
-	return s.appendAndApplyLocked(event{Op: opRetire, UID: uid})
-}
-
-// Get returns a copy of the trace with the given UID. Includes
-// retired traces (caller decides what to do with them).
-func (s *Store) Get(uid string) (*Trace, error) {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	t, ok := s.traces[uid]
-	if !ok {
-		return nil, ErrNotFound
-	}
-	return cloneTrace(t), nil
-}
-
-// History returns the chain of traces from this UID backward
-// through PredecessorUID links. Slot 0 is the queried trace; slot
-// 1 is its predecessor; and so on. Cycle-safe: a UID that appears
-// twice during the walk returns ErrCycle (only happens if the
-// persistence file was hand-edited or there's a bug elsewhere).
-func (s *Store) History(uid string) ([]*Trace, error) {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	var chain []*Trace
-	visited := make(map[string]struct{})
-	cursor := uid
-	for cursor != "" {
-		if _, seen := visited[cursor]; seen {
-			return nil, ErrCycle
-		}
-		visited[cursor] = struct{}{}
-
-		t, ok := s.traces[cursor]
-		if !ok {
-			if len(chain) == 0 {
-				return nil, ErrNotFound
-			}
-			// Predecessor missing mid-chain — return what we have.
-			break
-		}
-		chain = append(chain, cloneTrace(t))
-		cursor = t.PredecessorUID
-	}
-	return chain, nil
-}
-
-// Search returns traces matching the filter. Excludes retired by
-// default; pass IncludeRetired: true to include them. Returns a
-// new slice of trace copies — caller can mutate freely.
-func (s *Store) Search(filter SearchFilter) []*Trace {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	var out []*Trace
-	for _, t := range s.traces {
-		if t.Retired && !filter.IncludeRetired {
-			continue
-		}
-		if filter.Tag != "" && !containsTag(t.Tags, filter.Tag) {
-			continue
-		}
-		if filter.ContentContains != "" &&
-			!bytes.Contains(t.Content, []byte(filter.ContentContains)) {
-			continue
-		}
-		if filter.CreatedAfterNs > 0 && t.CreatedAtNs < filter.CreatedAfterNs {
-			continue
-		}
-		if filter.CreatedBeforeNs > 0 && t.CreatedAtNs > filter.CreatedBeforeNs {
-			continue
-		}
-		out = append(out, cloneTrace(t))
-	}
-	return out
-}
-
-// Stats returns lifetime counters useful for /stats endpoints and
-// operator dashboards.
-type Stats struct {
-	Total   int
-	Active  int
-	Retired int
-}
-
-func (s *Store) Stats() Stats {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	st := Stats{Total: len(s.traces)}
-	for _, t := range s.traces {
-		if t.Retired {
-			st.Retired++
-		} else {
-			st.Active++
-		}
-	}
-	return st
-}
-
-// appendAndApplyLocked is the single-point write path: persist the
-// event first (so a crash mid-mutation doesn't leave in-memory
-// state ahead of the log), then apply it in memory. Caller holds
-// s.mu in write mode.
-func (s *Store) appendAndApplyLocked(e event) error {
-	if s.persistor != nil {
-		if err := s.persistor.Append(e); err != nil {
-			return err
-		}
-	}
-	return s.applyEventLocked(e)
-}
-
-// cloneTrace returns a deep copy so callers can't mutate the
-// in-memory trace through the returned pointer.
-func cloneTrace(t *Trace) *Trace {
-	c := *t
-	if t.Content != nil {
-		c.Content = append(json.RawMessage(nil), t.Content...)
-	}
-	if t.Tags != nil {
-		c.Tags = append([]string(nil), t.Tags...)
-	}
-	return &c
-}
-
-func copyTags(in []string) []string {
-	if len(in) == 0 {
-		return nil
-	}
-	out := make([]string, len(in))
-	copy(out, in)
-	return out
-}
-
-func containsTag(tags []string, want string) bool {
-	for _, t := range tags {
-		if t == want {
-			return true
-		}
-	}
-	return false
-}
--- a/internal/pathway/store_test.go
+++ b/internal/pathway/store_test.go
@ -1,398 +0,0 @@
-package pathway
-
-import (
-	"encoding/json"
-	"errors"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"testing"
-)
-
-// Closes Sprint 2 design-bar work from the audit. Tests cover all 7
-// claim rows from claim-coverage-table.md: ADD, UPDATE, REVISE,
-// RETIRE, HISTORY chain cycle-safe, replay-count duplicate ADD,
-// corrupted memory row recovery (corrupted_test.go).
-
-// newTestStore returns an in-memory Store with deterministic UID +
-// time generation for repeatable assertions.
-func newTestStore(t *testing.T) *Store {
-	t.Helper()
-	s := NewStore(nil)
-	var counter int
-	var clock int64
-	s.uidFn = func() string {
-		counter++
-		return "uid-" + strconv.Itoa(counter)
-	}
-	s.nowFn = func() int64 {
-		clock++
-		return clock
-	}
-	return s
-}
-
-func newPersistedStore(t *testing.T) (*Store, string) {
-	t.Helper()
-	dir := t.TempDir()
-	path := filepath.Join(dir, "pathway.jsonl")
-	p, err := NewPersistor(path)
-	if err != nil {
-		t.Fatalf("NewPersistor: %v", err)
-	}
-	s := NewStore(p)
-	var counter int
-	var clock int64
-	s.uidFn = func() string {
-		counter++
-		return "uid-" + strconv.Itoa(counter)
-	}
-	s.nowFn = func() int64 {
-		clock++
-		return clock
-	}
-	return s, path
-}
-
-// ── Sprint 2 row 1: ADD a new pathway trace ────────────────────
-
-func TestAdd_AssignsUIDAndTimestamps(t *testing.T) {
-	s := newTestStore(t)
-	tr, err := s.Add(json.RawMessage(`{"k":"v"}`), "tag-a")
-	if err != nil {
-		t.Fatalf("Add: %v", err)
-	}
-	if tr.UID != "uid-1" {
-		t.Errorf("UID = %q, want uid-1", tr.UID)
-	}
-	if tr.ReplayCount != 1 {
-		t.Errorf("ReplayCount = %d, want 1", tr.ReplayCount)
-	}
-	if tr.Retired {
-		t.Error("freshly-added trace should NOT be retired")
-	}
-	if tr.CreatedAtNs == 0 || tr.UpdatedAtNs == 0 {
-		t.Error("timestamps unset")
-	}
-	if len(tr.Tags) != 1 || tr.Tags[0] != "tag-a" {
-		t.Errorf("Tags = %v, want [tag-a]", tr.Tags)
-	}
-}
-
-func TestAdd_RejectsInvalidJSON(t *testing.T) {
-	s := newTestStore(t)
-	_, err := s.Add(json.RawMessage(`not json`))
-	if !errors.Is(err, ErrInvalidContent) {
-		t.Errorf("expected ErrInvalidContent, got %v", err)
-	}
-}
-
-// ── Sprint 2 row 2: UPDATE replaces existing trace by uid ──────
-
-func TestUpdate_ReplacesContentSameUID(t *testing.T) {
-	s := newTestStore(t)
-	tr, _ := s.Add(json.RawMessage(`{"v":1}`))
-
-	if err := s.Update(tr.UID, json.RawMessage(`{"v":2}`)); err != nil {
-		t.Fatalf("Update: %v", err)
-	}
-
-	got, _ := s.Get(tr.UID)
-	if string(got.Content) != `{"v":2}` {
-		t.Errorf("content = %s, want updated", got.Content)
-	}
-	if got.UpdatedAtNs == tr.UpdatedAtNs {
-		t.Error("UpdatedAtNs should bump on Update")
-	}
-}
-
-func TestUpdate_MissingUID_Errors(t *testing.T) {
-	s := newTestStore(t)
-	err := s.Update("nonexistent", json.RawMessage(`{}`))
-	if !errors.Is(err, ErrNotFound) {
-		t.Errorf("expected ErrNotFound, got %v", err)
-	}
-}
-
-// ── Sprint 2 row 3: REVISE creates a new revision linked via history ──
-
-func TestRevise_LinksToPredecessorViaHistory(t *testing.T) {
-	s := newTestStore(t)
-	root, _ := s.Add(json.RawMessage(`{"v":1}`))
-	rev, err := s.Revise(root.UID, json.RawMessage(`{"v":2}`))
-	if err != nil {
-		t.Fatalf("Revise: %v", err)
-	}
-	if rev.PredecessorUID != root.UID {
-		t.Errorf("PredecessorUID = %q, want %q", rev.PredecessorUID, root.UID)
-	}
-	if rev.UID == root.UID {
-		t.Error("Revise must produce a NEW UID")
-	}
-}
-
-func TestRevise_PredecessorMissing_Errors(t *testing.T) {
-	s := newTestStore(t)
-	_, err := s.Revise("ghost-uid", json.RawMessage(`{}`))
-	if !errors.Is(err, ErrPredecessorMissing) {
-		t.Errorf("expected ErrPredecessorMissing, got %v", err)
-	}
-}
-
-func TestRevise_ChainOfThree_BackwardWalk(t *testing.T) {
-	s := newTestStore(t)
-	a, _ := s.Add(json.RawMessage(`{"v":1}`))
-	b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`))
-	c, _ := s.Revise(b.UID, json.RawMessage(`{"v":3}`))
-
-	chain, err := s.History(c.UID)
-	if err != nil {
-		t.Fatalf("History: %v", err)
-	}
-	want := []string{c.UID, b.UID, a.UID}
-	if len(chain) != 3 {
-		t.Fatalf("chain length = %d, want 3", len(chain))
-	}
-	for i, tr := range chain {
-		if tr.UID != want[i] {
-			t.Errorf("chain[%d].UID = %q, want %q", i, tr.UID, want[i])
-		}
-	}
-}
-
-// ── Sprint 2 row 4: RETIRE marks trace excluded from retrieval ──
-
-func TestRetire_ExcludedFromSearch(t *testing.T) {
-	s := newTestStore(t)
-	a, _ := s.Add(json.RawMessage(`{"v":1}`), "common")
-	b, _ := s.Add(json.RawMessage(`{"v":2}`), "common")
-	if err := s.Retire(a.UID); err != nil {
-		t.Fatalf("Retire: %v", err)
-	}
-
-	results := s.Search(SearchFilter{Tag: "common"})
-	if len(results) != 1 || results[0].UID != b.UID {
-		t.Errorf("Search excluded retired? got %d results, want 1 (active only)", len(results))
-	}
-
-	// IncludeRetired flag returns both.
-	withRetired := s.Search(SearchFilter{Tag: "common", IncludeRetired: true})
-	if len(withRetired) != 2 {
-		t.Errorf("IncludeRetired Search returned %d, want 2", len(withRetired))
-	}
-}
-
-func TestRetire_StillAccessibleViaGet(t *testing.T) {
-	// Per ADR-004: "Retired traces are excluded from Search by default
-	// but accessible via Get and History." Locks that contract.
-	s := newTestStore(t)
-	tr, _ := s.Add(json.RawMessage(`{"v":1}`))
-	s.Retire(tr.UID)
-
-	got, err := s.Get(tr.UID)
-	if err != nil {
-		t.Fatalf("retired trace Get: %v", err)
-	}
-	if !got.Retired {
-		t.Error("Get should preserve retired flag")
-	}
-}
-
-func TestRetire_StillAccessibleViaHistory(t *testing.T) {
-	s := newTestStore(t)
-	a, _ := s.Add(json.RawMessage(`{"v":1}`))
-	b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`))
-	s.Retire(a.UID)
-
-	chain, err := s.History(b.UID)
-	if err != nil {
-		t.Fatalf("History: %v", err)
-	}
-	if len(chain) != 2 {
-		t.Errorf("chain length = %d, want 2 (revision + retired root)", len(chain))
-	}
-	if !chain[1].Retired {
-		t.Error("retired predecessor should still appear in History with Retired=true")
-	}
-}
-
-// ── Sprint 2 row 5: HISTORY chain is cycle-safe ────────────────
-
-func TestHistory_CycleDetected(t *testing.T) {
-	// Cycles can't form via the public API (new UIDs every Revise),
-	// but corruption could create one. Inject one directly into the
-	// internal map and verify History rejects it.
-	s := newTestStore(t)
-	s.traces["A"] = &Trace{UID: "A", PredecessorUID: "B"}
-	s.traces["B"] = &Trace{UID: "B", PredecessorUID: "A"}
-
-	_, err := s.History("A")
-	if !errors.Is(err, ErrCycle) {
-		t.Errorf("expected ErrCycle, got %v", err)
-	}
-}
-
-func TestHistory_PredecessorMissing_TruncatesChain(t *testing.T) {
-	s := newTestStore(t)
-	tr := &Trace{UID: "X", PredecessorUID: "ghost"}
-	s.traces["X"] = tr
-
-	chain, err := s.History("X")
-	if err != nil {
-		t.Fatalf("History on partial chain: %v", err)
-	}
-	if len(chain) != 1 {
-		t.Errorf("partial chain returned %d, want 1 (truncate at missing predecessor)", len(chain))
-	}
-}
-
-func TestHistory_UnknownUID_ErrorsClean(t *testing.T) {
-	s := newTestStore(t)
-	_, err := s.History("nope")
-	if !errors.Is(err, ErrNotFound) {
-		t.Errorf("expected ErrNotFound, got %v", err)
-	}
-}
-
-// ── Sprint 2 row 6: replay_count increments on duplicate ADD ───
-
-func TestAddIdempotent_IncrementsReplayCount(t *testing.T) {
-	s := newTestStore(t)
-
-	first, err := s.AddIdempotent("custom-uid", json.RawMessage(`{"v":1}`))
-	if err != nil {
-		t.Fatalf("first AddIdempotent: %v", err)
-	}
-	if first.ReplayCount != 1 {
-		t.Errorf("first ReplayCount = %d, want 1", first.ReplayCount)
-	}
-
-	second, err := s.AddIdempotent("custom-uid", json.RawMessage(`{"v":"different"}`))
-	if err != nil {
-		t.Fatalf("second AddIdempotent: %v", err)
-	}
-	if second.ReplayCount != 2 {
-		t.Errorf("after second add, ReplayCount = %d, want 2", second.ReplayCount)
-	}
-
-	// Original content preserved (replay does NOT overwrite).
-	if !strings.Contains(string(second.Content), "v") ||
-		!strings.Contains(string(second.Content), "1") {
-		t.Errorf("replay should preserve original content, got %s", second.Content)
-	}
-}
-
-func TestAddIdempotent_RejectsEmptyUID(t *testing.T) {
-	s := newTestStore(t)
-	_, err := s.AddIdempotent("", json.RawMessage(`{}`))
-	if !errors.Is(err, ErrEmptyUID) {
-		t.Errorf("expected ErrEmptyUID, got %v", err)
-	}
-}
-
-// ── Sprint 2 row 7: corrupted memory row recovery ─────────────
-
-func TestPersistor_RoundTrip(t *testing.T) {
-	s, path := newPersistedStore(t)
-
-	a, _ := s.Add(json.RawMessage(`{"v":1}`), "alpha")
-	b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`), "alpha")
-	s.Retire(a.UID)
-	_ = b
-
-	// Open fresh store against same file, replay.
-	p, _ := NewPersistor(path)
-	s2 := NewStore(p)
-	n, err := s2.Load()
-	if err != nil {
-		t.Fatalf("Load: %v", err)
-	}
-	if n != 3 {
-		t.Errorf("replayed %d events, want 3", n)
-	}
-	stats := s2.Stats()
-	if stats.Total != 2 {
-		t.Errorf("Stats.Total = %d, want 2", stats.Total)
-	}
-	if stats.Retired != 1 {
-		t.Errorf("Stats.Retired = %d, want 1", stats.Retired)
-	}
-
-	got, _ := s2.Get(a.UID)
-	if !got.Retired {
-		t.Error("retired flag lost across persistence round-trip")
-	}
-}
-
-// ── Search filter coverage ─────────────────────────────────────
-
-func TestSearch_TagFilter(t *testing.T) {
-	s := newTestStore(t)
-	s.Add(json.RawMessage(`{"v":1}`), "production")
-	s.Add(json.RawMessage(`{"v":2}`), "test")
-	s.Add(json.RawMessage(`{"v":3}`), "production", "edge")
-
-	prodHits := s.Search(SearchFilter{Tag: "production"})
-	if len(prodHits) != 2 {
-		t.Errorf("tag=production returned %d, want 2", len(prodHits))
-	}
-
-	edgeHits := s.Search(SearchFilter{Tag: "edge"})
-	if len(edgeHits) != 1 {
-		t.Errorf("tag=edge returned %d, want 1", len(edgeHits))
-	}
-}
-
-func TestSearch_ContentContainsFilter(t *testing.T) {
-	s := newTestStore(t)
-	s.Add(json.RawMessage(`{"role":"welder","city":"Chicago"}`))
-	s.Add(json.RawMessage(`{"role":"electrician","city":"Detroit"}`))
-	s.Add(json.RawMessage(`{"role":"safety","city":"Chicago"}`))
-
-	chi := s.Search(SearchFilter{ContentContains: "Chicago"})
-	if len(chi) != 2 {
-		t.Errorf("ContentContains=Chicago returned %d, want 2", len(chi))
-	}
-}
-
-func TestStats_TracksAllStates(t *testing.T) {
-	s := newTestStore(t)
-	a, _ := s.Add(json.RawMessage(`{}`))
-	s.Add(json.RawMessage(`{}`))
-	s.Add(json.RawMessage(`{}`))
-	s.Retire(a.UID)
-
-	st := s.Stats()
-	if st.Total != 3 {
-		t.Errorf("Total = %d, want 3", st.Total)
-	}
-	if st.Active != 2 {
-		t.Errorf("Active = %d, want 2", st.Active)
-	}
-	if st.Retired != 1 {
-		t.Errorf("Retired = %d, want 1", st.Retired)
-	}
-}
-
-// ── Concurrency safety ────────────────────────────────────────
-
-func TestStore_ConcurrentAdd(t *testing.T) {
-	s := newTestStore(t)
-	const N = 100
-	done := make(chan bool, N)
-	for i := 0; i < N; i++ {
-		go func() {
-			_, err := s.Add(json.RawMessage(`{"x":1}`))
-			if err != nil {
-				t.Errorf("concurrent Add: %v", err)
-			}
-			done <- true
-		}()
-	}
-	for i := 0; i < N; i++ {
-		<-done
-	}
-	if s.Stats().Total != N {
-		t.Errorf("after %d concurrent Adds, Total = %d", N, s.Stats().Total)
-	}
-}
--- a/internal/pathway/types.go
+++ b/internal/pathway/types.go
@ -1,89 +0,0 @@
-// Package pathway implements Mem0-style versioned trace memory per
-// ADR-004. Pathway memory is an append-only event log of opaque
-// traces with Add / Update / Revise / Retire / History / Search
-// operations. Persisted via JSONL (one event per line) with
-// corruption recovery on load.
-//
-// Why this exists: agents need to remember what they tried and
-// what worked. Mem0 is the lowest-common-denominator memory
-// substrate; building on its surface means agent loops written
-// against any Mem0-aware library work here. See feedback_meta_
-// index_vision.md for the north-star learning-loop framing.
-package pathway
-
-import (
-	"encoding/json"
-	"errors"
-)
-
-// Trace is one entry in pathway memory. Content is opaque to the
-// substrate — callers store whatever JSON shape they want; this
-// layer just preserves and indexes it.
-type Trace struct {
-	UID            string          `json:"uid"`
-	Content        json.RawMessage `json:"content"`
-	PredecessorUID string          `json:"predecessor_uid,omitempty"`
-	CreatedAtNs    int64           `json:"created_at_ns"`
-	UpdatedAtNs    int64           `json:"updated_at_ns"`
-	Retired        bool            `json:"retired"`
-	ReplayCount    int             `json:"replay_count"`
-	Tags           []string        `json:"tags,omitempty"`
-}
-
-// op is the wire-format kind tag for JSONL persistence. Internal
-// to the package — operations exposed publicly are method calls
-// on Store; the JSONL form is its own concern.
-type op string
-
-const (
-	opAdd     op = "add"
-	opUpdate  op = "update"
-	opRevise  op = "revise"
-	opRetire  op = "retire"
-	opReplay  op = "replay"
-)
-
-// event is one line of the JSONL log. Trace is included for ops
-// that introduce or replace a trace; UID alone suffices for retire
-// and replay; Content alone suffices for update (reuses the
-// existing trace's UID via the UID field).
-type event struct {
-	Op      op              `json:"op"`
-	Trace   *Trace          `json:"trace,omitempty"`
-	UID     string          `json:"uid,omitempty"`
-	Content json.RawMessage `json:"content,omitempty"`
-}
-
-// Errors surfaced to callers. Sentinel-based so HTTP handlers (when
-// cmd/pathwayd lands) can map to status codes via errors.Is.
-var (
-	ErrNotFound        = errors.New("pathway: trace not found")
-	ErrAlreadyExists   = errors.New("pathway: trace already exists")
-	ErrPredecessorMissing = errors.New("pathway: predecessor trace missing")
-	ErrCycle           = errors.New("pathway: history cycle detected")
-	ErrEmptyUID        = errors.New("pathway: empty uid")
-	ErrInvalidContent  = errors.New("pathway: invalid content")
-)
-
-// SearchFilter narrows a Search to matching traces. Empty filter
-// returns everything (excluding retired; flip IncludeRetired to
-// override). All set fields are AND-combined.
-type SearchFilter struct {
-	// Tag returns traces whose Tags slice contains this string.
-	Tag string
-
-	// ContentContains returns traces whose Content contains this
-	// substring (treats Content as raw bytes; caller's contract
-	// for whether that's meaningful).
-	ContentContains string
-
-	// CreatedAfterNs returns traces with CreatedAtNs >= this value.
-	CreatedAfterNs int64
-
-	// CreatedBeforeNs returns traces with CreatedAtNs <= this value.
-	// Zero = no upper bound.
-	CreatedBeforeNs int64
-
-	// IncludeRetired flips the default "exclude retired" behavior.
-	IncludeRetired bool
-}
--- a/internal/shared/config.go
+++ b/internal/shared/config.go
@ -26,10 +26,7 @@ type Config struct {
 	Queryd   QuerydConfig  `toml:"queryd"`
 	Vectord  VectordConfig `toml:"vectord"`
 	Embedd   EmbeddConfig  `toml:"embedd"`
-	Pathwayd  PathwaydConfig  `toml:"pathwayd"`
-	Matrixd   MatrixdConfig   `toml:"matrixd"`
-	Observerd ObserverdConfig `toml:"observerd"`
-	S3        S3Config        `toml:"s3"`
+	S3       S3Config      `toml:"s3"`
 	Log      LogConfig     `toml:"log"`
 	Auth     AuthConfig    `toml:"auth"`
 }
@ -53,20 +50,17 @@ type IngestConfig struct {

 // GatewayConfig adds the upstream URLs the reverse proxy fronts.
 // Each route family (/v1/storage, /v1/catalog, /v1/ingest, /v1/sql,
-// /v1/vectors, /v1/embed, /v1/pathway, /v1/matrix, /v1/observer)
-// has its own upstream so we can scale services independently or
-// move them to different boxes without touching gateway code.
+// /v1/vectors, /v1/embed) has its own upstream so we can scale
+// services independently or move them to different boxes without
+// touching gateway code.
 type GatewayConfig struct {
-	Bind         string `toml:"bind"`
-	StoragedURL  string `toml:"storaged_url"`
-	CatalogdURL  string `toml:"catalogd_url"`
-	IngestdURL   string `toml:"ingestd_url"`
-	QuerydURL    string `toml:"queryd_url"`
-	VectordURL   string `toml:"vectord_url"`
-	EmbeddURL    string `toml:"embedd_url"`
-	PathwaydURL  string `toml:"pathwayd_url"`
-	MatrixdURL   string `toml:"matrixd_url"`
-	ObserverdURL string `toml:"observerd_url"`
+	Bind        string `toml:"bind"`
+	StoragedURL string `toml:"storaged_url"`
+	CatalogdURL string `toml:"catalogd_url"`
+	IngestdURL  string `toml:"ingestd_url"`
+	QuerydURL   string `toml:"queryd_url"`
+	VectordURL  string `toml:"vectord_url"`
+	EmbeddURL   string `toml:"embedd_url"`
 }

 // EmbeddConfig drives the embed service. ProviderURL points at the
@ -91,35 +85,6 @@ type VectordConfig struct {
 	StoragedURL string `toml:"storaged_url"`
 }

-// PathwaydConfig drives the pathway-memory service (cmd/pathwayd).
-// PersistPath: file path to the JSONL log; empty = in-memory only
-// (test/dev). Production sets a stable path under /var/lib/lakehouse
-// or similar so traces survive restart.
-type PathwaydConfig struct {
-	Bind        string `toml:"bind"`
-	PersistPath string `toml:"persist_path"`
-}
-
-// MatrixdConfig drives the matrix-indexer service (cmd/matrixd).
-// Per docs/SPEC.md §3.4: multi-corpus retrieve+merge over vectord
-// with embed-via-embedd for query text. Both upstream URLs are
-// required — matrixd has no in-process fallback.
-type MatrixdConfig struct {
-	Bind       string `toml:"bind"`
-	EmbeddURL  string `toml:"embedd_url"`
-	VectordURL string `toml:"vectord_url"`
-}
-
-// ObserverdConfig drives the observer service (cmd/observerd).
-// PersistPath: file path to the JSONL ops log; empty = in-memory
-// only (test/dev). Production sets a stable path under
-// /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
-// Mirrors the PathwaydConfig pattern.
-type ObserverdConfig struct {
-	Bind        string `toml:"bind"`
-	PersistPath string `toml:"persist_path"`
-}
-
 // QuerydConfig adds queryd-specific knobs. queryd talks DuckDB
 // directly to MinIO via DuckDB's httpfs extension (so no storaged
 // URL needed), and reads the catalog over HTTP for view registration.
@ -196,9 +161,6 @@ func DefaultConfig() Config {
 			QuerydURL:   "http://127.0.0.1:3214",
 			VectordURL:  "http://127.0.0.1:3215",
 			EmbeddURL:   "http://127.0.0.1:3216",
-			PathwaydURL: "http://127.0.0.1:3217",
-			MatrixdURL:   "http://127.0.0.1:3218",
-			ObserverdURL: "http://127.0.0.1:3219",
 		},
 		Storaged: ServiceConfig{Bind: "127.0.0.1:3211"},
 		Catalogd: CatalogConfig{Bind: "127.0.0.1:3212", StoragedURL: "http://127.0.0.1:3211"},
@ -218,20 +180,6 @@ func DefaultConfig() Config {
 			DefaultModel: "nomic-embed-text",
 			CacheSize:    10_000, // ~30 MiB at d=768; set to 0 to disable
 		},
-		Pathwayd: PathwaydConfig{
-			Bind: "127.0.0.1:3217",
-			// PersistPath empty by default = in-memory only. Production
-			// sets to e.g. /var/lib/lakehouse/pathway/state.jsonl.
-		},
-		Matrixd: MatrixdConfig{
-			Bind:       "127.0.0.1:3218",
-			EmbeddURL:  "http://127.0.0.1:3216",
-			VectordURL: "http://127.0.0.1:3215",
-		},
-		Observerd: ObserverdConfig{
-			Bind: "127.0.0.1:3219",
-			// PersistPath empty by default = in-memory only.
-		},
 		Queryd: QuerydConfig{
 			Bind:         "127.0.0.1:3214",
 			CatalogdURL:  "http://127.0.0.1:3212",
--- a/internal/vectord/batch_bench_test.go
+++ b/internal/vectord/batch_bench_test.go
@ -1,104 +0,0 @@
-package vectord
-
-import (
-	"fmt"
-	"math/rand"
-	"testing"
-)
-
-// BenchmarkSingleAdd vs BenchmarkBatchAdd quantifies the lock-amortization
-// win for the HTTP-batch shape. Same N items, same vectors; one path
-// takes the lock N times, the other takes it once. Run with:
-//   go test ./internal/vectord/ -bench=. -benchmem -benchtime=1x
-func BenchmarkSingleAdd(b *testing.B) {
-	for _, n := range []int{16, 128, 1024} {
-		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
-			items := makeBatch(n, 768)
-			for i := 0; i < b.N; i++ {
-				idx := mustIndex(b)
-				for _, it := range items {
-					if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
-						b.Fatalf("Add: %v", err)
-					}
-				}
-			}
-		})
-	}
-}
-
-func BenchmarkBatchAdd(b *testing.B) {
-	for _, n := range []int{16, 128, 1024} {
-		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
-			items := makeBatch(n, 768)
-			for i := 0; i < b.N; i++ {
-				idx := mustIndex(b)
-				if err := idx.BatchAdd(items); err != nil {
-					b.Fatalf("BatchAdd: %v", err)
-				}
-			}
-		})
-	}
-}
-
-// TestBatchAdd_IntraBatchDedup guards the 2026-04-29 scrum BLOCK:
-// without dedup, coder/hnsw's "node not added" length-invariant
-// panics when the same ID appears twice in one batch. Last-write-
-// wins semantics; the second vector for a duplicate ID replaces the
-// first.
-func TestBatchAdd_IntraBatchDedup(t *testing.T) {
-	idx := mustIndex(t)
-	items := []BatchItem{
-		{ID: "a", Vector: makeVec(768, 1)},
-		{ID: "b", Vector: makeVec(768, 2)},
-		{ID: "a", Vector: makeVec(768, 99)}, // duplicate — should win
-	}
-	if err := idx.BatchAdd(items); err != nil {
-		t.Fatalf("BatchAdd: %v", err)
-	}
-	if idx.Len() != 2 {
-		t.Errorf("Len: want 2, got %d", idx.Len())
-	}
-	// "a" should hold the LATER vector (the 99 one), not the first.
-	v, _, ok := idx.Lookup("a")
-	if !ok {
-		t.Fatal("a not found")
-	}
-	if v[0] != 99 {
-		t.Errorf("last-write-wins: want vec[0]=99, got %v", v[0])
-	}
-}
-
-func makeVec(dim int, val float32) []float32 {
-	v := make([]float32, dim)
-	v[0] = val
-	v[1] = 1 // non-zero-norm under cosine
-	return v
-}
-
-func mustIndex(tb testing.TB) *Index {
-	tb.Helper()
-	idx, err := NewIndex(IndexParams{
-		Name:      "bench",
-		Dimension: 768,
-		M:         DefaultM,
-		EfSearch:  DefaultEfSearch,
-		Distance:  DistanceCosine,
-	})
-	if err != nil {
-		tb.Fatalf("NewIndex: %v", err)
-	}
-	return idx
-}
-
-func makeBatch(n, dim int) []BatchItem {
-	rng := rand.New(rand.NewSource(int64(n)))
-	out := make([]BatchItem, n)
-	for i := range out {
-		v := make([]float32, dim)
-		for j := range v {
-			v[j] = rng.Float32()*2 - 1
-		}
-		out[i] = BatchItem{ID: fmt.Sprintf("k-%06d", i), Vector: v}
-	}
-	return out
-}
--- a/internal/vectord/index.go
+++ b/internal/vectord/index.go
@ -225,106 +225,6 @@ func validateVector(vec []float32, distance string) error {
 	return nil
 }

-// BatchItem is one entry in a BatchAdd call. Same per-field
-// contract as Add: ID + Vector required, Metadata follows
-// upsert-style semantics (nil = leave existing alone).
-type BatchItem struct {
-	ID       string
-	Vector   []float32
-	Metadata json.RawMessage
-}
-
-// BatchAdd inserts a slice of items under a single write-lock, with
-// one variadic call into coder/hnsw's Graph.Add. Net win vs. a loop
-// of single Add calls: N→1 lock acquisitions per HTTP batch and one
-// variadic library call instead of N.
-//
-// Contract: items MUST be pre-validated by the caller (id non-empty,
-// vector dimension matches, vector finite + non-zero-norm under
-// cosine). Pre-validation lives in the HTTP handler so per-item
-// error messages stay precise; reproducing it here would force
-// position-encoded errors on every consumer.
-//
-// Intra-batch duplicate IDs: dedup'd internally with last-write-wins
-// semantics (matches map-style behavior — second occurrence of an
-// ID replaces the first). Without dedup, coder/hnsw's "node not
-// added" length-invariant panics on the second occurrence. Caught
-// by 2026-04-29 cross-lineage scrum (Opus BLOCK).
-func (i *Index) BatchAdd(items []BatchItem) error {
-	if len(items) == 0 {
-		return nil
-	}
-
-	// Intra-batch dedup, last-write-wins. Walk forward, record the
-	// LAST index for each ID, then keep only items whose index is
-	// the recorded last. Preserves order of last occurrences in the
-	// original positions.
-	if hasDup := containsDuplicateID(items); hasDup {
-		items = dedupBatchLastWins(items)
-	}
-
-	i.mu.Lock()
-	defer i.mu.Unlock()
-
-	// Pre-pass: drop any existing IDs so coder/hnsw's variadic Add
-	// never sees a re-add. Same library-quirk handling as single
-	// Add — Len()==1 needs a full graph reset because Delete of the
-	// last node leaves layers[0] entryless.
-	for _, it := range items {
-		if _, exists := i.g.Lookup(it.ID); exists {
-			if i.g.Len() == 1 {
-				i.resetGraphLocked()
-			} else {
-				i.g.Delete(it.ID)
-			}
-		}
-	}
-
-	nodes := make([]hnsw.Node[string], len(items))
-	for j, it := range items {
-		nodes[j] = hnsw.MakeNode(it.ID, it.Vector)
-	}
-	i.g.Add(nodes...)
-
-	for _, it := range items {
-		if it.Metadata != nil {
-			i.meta[it.ID] = it.Metadata
-		}
-	}
-	return nil
-}
-
-// containsDuplicateID is a fast pre-check — if no dups, skip the
-// dedup allocation. Most batches won't have dups so this is a hot
-// path.
-func containsDuplicateID(items []BatchItem) bool {
-	seen := make(map[string]struct{}, len(items))
-	for _, it := range items {
-		if _, ok := seen[it.ID]; ok {
-			return true
-		}
-		seen[it.ID] = struct{}{}
-	}
-	return false
-}
-
-// dedupBatchLastWins keeps only the last occurrence of each ID,
-// preserving the relative order of those last occurrences. This
-// matches map-style "set X to A then to B" semantics: B wins.
-func dedupBatchLastWins(items []BatchItem) []BatchItem {
-	lastIdx := make(map[string]int, len(items))
-	for j, it := range items {
-		lastIdx[it.ID] = j
-	}
-	out := make([]BatchItem, 0, len(lastIdx))
-	for j, it := range items {
-		if lastIdx[it.ID] == j {
-			out = append(out, it)
-		}
-	}
-	return out
-}
-
 // Delete removes id from the index. Returns true if present.
 func (i *Index) Delete(id string) bool {
 	i.mu.Lock()
--- a/internal/workflow/modes.go
+++ b/internal/workflow/modes.go
@ -1,214 +0,0 @@
-package workflow
-
-// modes.go — adapters that wrap §3.4 capabilities + §3.5 drift +
-// distillation scorer as workflow.Mode functions. Each mode follows
-// the same glue pattern: marshal the generic input map through a
-// typed struct (so workflow YAML schemas are self-documenting and
-// validation errors are clear), call the underlying capability,
-// return a generic output map.
-//
-// Pure modes (no I/O): MatrixRelevance, MatrixDowngrade,
-// DistillationScore, DriftScorer.
-//
-// HTTP modes: MatrixSearch + PlaybookRecord — observerd talks to
-// matrixd over HTTP since the search/record paths need vectord
-// access. Constructed via factory funcs that take the matrixd base
-// URL + an http.Client.
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/drift"
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
-)
-
-// ─── Pure-function wrappers ─────────────────────────────────────
-
-// MatrixRelevance wraps matrix.FilterChunks. Input shape:
-//
-//	{
-//	  "focus":     {"Path":"...", "Content":"...", ...},
-//	  "chunks":    [{"source":"...", "doc_id":"...", "text":"...", "score":0.8}, ...],
-//	  "threshold": 0.3   # optional; default = matrix.DefaultRelevanceThreshold
-//	}
-//
-// Output: {"kept":[...], "dropped":[...], "threshold":N, "total_in":N}.
-func MatrixRelevance(_ Context, input map[string]any) (map[string]any, error) {
-	var req struct {
-		Focus     matrix.FocusFile        `json:"focus"`
-		Chunks    []matrix.CandidateChunk `json:"chunks"`
-		Threshold float64                 `json:"threshold"`
-	}
-	if err := remarshalInput(input, &req); err != nil {
-		return nil, fmt.Errorf("matrix.relevance: %w", err)
-	}
-	threshold := req.Threshold
-	if threshold == 0 {
-		threshold = matrix.DefaultRelevanceThreshold
-	}
-	res := matrix.FilterChunks(req.Focus, req.Chunks, threshold)
-	return map[string]any{
-		"kept":      res.Kept,
-		"dropped":   res.Dropped,
-		"threshold": res.Threshold,
-		"total_in":  res.TotalIn,
-	}, nil
-}
-
-// MatrixDowngrade wraps matrix.MaybeDowngrade. Input shape:
-//
-//	{
-//	  "mode":  "codereview_lakehouse",
-//	  "model": "x-ai/grok-4.1-fast",
-//	  "forced_mode":          false,         # optional
-//	  "force_full_override":  false          # optional
-//	}
-//
-// Output: matrix.DowngradeDecision JSON.
-func MatrixDowngrade(_ Context, input map[string]any) (map[string]any, error) {
-	var req struct {
-		Mode              string `json:"mode"`
-		Model             string `json:"model"`
-		ForcedMode        bool   `json:"forced_mode"`
-		ForceFullOverride bool   `json:"force_full_override"`
-	}
-	if err := remarshalInput(input, &req); err != nil {
-		return nil, fmt.Errorf("matrix.downgrade: %w", err)
-	}
-	if req.Mode == "" || req.Model == "" {
-		return nil, fmt.Errorf("matrix.downgrade: mode and model are required")
-	}
-	dec := matrix.MaybeDowngrade(matrix.DowngradeInput{
-		Mode:              req.Mode,
-		Model:             req.Model,
-		ForcedMode:        req.ForcedMode,
-		ForceFullOverride: req.ForceFullOverride,
-	})
-	return map[string]any{
-		"mode":            dec.Mode,
-		"downgraded_from": dec.DowngradedFrom,
-		"reason":          dec.Reason,
-	}, nil
-}
-
-// DistillationScore wraps distillation.ScoreRecord — re-runs the
-// scorer over a single EvidenceRecord. Useful as a workflow node
-// that grades a freshly-produced evidence row.
-//
-// Input: a JSON EvidenceRecord under the key "record":
-//
-//	{"record": {"run_id":"...", "task_id":"...", ...}}
-//
-// Output: ScoreOutput-ish map with category, reasons, sub_scores.
-func DistillationScore(_ Context, input map[string]any) (map[string]any, error) {
-	var req struct {
-		Record distillation.EvidenceRecord `json:"record"`
-	}
-	if err := remarshalInput(input, &req); err != nil {
-		return nil, fmt.Errorf("distillation.score: %w", err)
-	}
-	if req.Record.RunID == "" {
-		return nil, fmt.Errorf("distillation.score: record.run_id required")
-	}
-	out := distillation.ScoreRecord(req.Record)
-	return map[string]any{
-		"category":   string(out.Category),
-		"reasons":    out.Reasons,
-		"sub_scores": out.SubScores,
-	}, nil
-}
-
-// DriftScorer wraps drift.ComputeScorerDrift. Input shape:
-//
-//	{
-//	  "inputs": [
-//	    {"record": {...EvidenceRecord...}, "persisted_category": "accepted"},
-//	    ...
-//	  ],
-//	  "include_entries": false  # optional, default false
-//	}
-//
-// Output: ScorerDriftReport JSON.
-func DriftScorer(_ Context, input map[string]any) (map[string]any, error) {
-	var req struct {
-		Inputs []drift.ScorerDriftInput `json:"inputs"`
-		IncludeEntries bool              `json:"include_entries"`
-	}
-	if err := remarshalInput(input, &req); err != nil {
-		return nil, fmt.Errorf("drift.scorer: %w", err)
-	}
-	if len(req.Inputs) == 0 {
-		return nil, fmt.Errorf("drift.scorer: inputs must be non-empty")
-	}
-	report := drift.ComputeScorerDrift(req.Inputs, req.IncludeEntries)
-	bs, err := json.Marshal(report)
-	if err != nil {
-		return nil, err
-	}
-	var asMap map[string]any
-	if err := json.Unmarshal(bs, &asMap); err != nil {
-		return nil, err
-	}
-	return asMap, nil
-}
-
-// ─── HTTP-backed modes ──────────────────────────────────────────
-
-// MatrixSearch returns a workflow.Mode bound to a matrixd base URL
-// and HTTP client. The mode posts to /v1/matrix/search via the
-// gateway-internal upstream (caller passes the URL).
-//
-// Input shape mirrors matrix.SearchRequest (see retrieve.go).
-// Output is the matrix.SearchResponse JSON.
-func MatrixSearch(matrixdURL string, hc *http.Client) Mode {
-	return func(ctx Context, input map[string]any) (map[string]any, error) {
-		bs, err := json.Marshal(input)
-		if err != nil {
-			return nil, fmt.Errorf("matrix.search: marshal: %w", err)
-		}
-		req, err := http.NewRequestWithContext(ctx.Ctx, http.MethodPost,
-			matrixdURL+"/matrix/search", bytes.NewReader(bs))
-		if err != nil {
-			return nil, err
-		}
-		req.Header.Set("Content-Type", "application/json")
-		resp, err := hc.Do(req)
-		if err != nil {
-			return nil, fmt.Errorf("matrix.search: %w", err)
-		}
-		defer resp.Body.Close()
-		if resp.StatusCode != http.StatusOK {
-			body, _ := io.ReadAll(resp.Body)
-			return nil, fmt.Errorf("matrix.search: status %d: %s", resp.StatusCode, body)
-		}
-		var out map[string]any
-		if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
-			return nil, fmt.Errorf("matrix.search: decode: %w", err)
-		}
-		return out, nil
-	}
-}
-
-// ─── Helpers ─────────────────────────────────────────────────────
-
-// remarshalInput round-trips a generic input map through JSON into
-// the typed target struct. Same trick as the matrixd handlers — gives
-// us schema validation for free without writing custom field-by-field
-// coercion.
-func remarshalInput(input map[string]any, target any) error {
-	bs, err := json.Marshal(input)
-	if err != nil {
-		return err
-	}
-	return json.Unmarshal(bs, target)
-}
-
-// silence "imported and not used" if context isn't referenced after
-// the MatrixSearch factory is used. Compiler will catch the real case.
-var _ = context.Background
--- a/internal/workflow/modes_test.go
+++ b/internal/workflow/modes_test.go
@ -1,211 +0,0 @@
-package workflow
-
-import (
-	"context"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-)
-
-func TestMatrixRelevance_FiltersAdjacencyPollution(t *testing.T) {
-	input := map[string]any{
-		"focus": map[string]any{
-			"Path": "crates/queryd/src/db.go",
-			"Content": "pub struct Connector {}\nuse catalogd::Registry;",
-		},
-		"chunks": []any{
-			map[string]any{
-				"source":  "lakehouse_symbols_v1",
-				"doc_id":  "symbol:queryd::struct::Connector",
-				"text":    "Connector wraps the DuckDB handle.",
-				"score":   0.9,
-			},
-			map[string]any{
-				"source":  "lakehouse_symbols_v1",
-				"doc_id":  "symbol:catalogd::struct::Registry",
-				"text":    "Registry stores manifests. Used by ingestd.",
-				"score":   0.85,
-			},
-		},
-		"threshold": 0.3,
-	}
-	out, err := MatrixRelevance(Context{}, input)
-	if err != nil {
-		t.Fatalf("MatrixRelevance: %v", err)
-	}
-	if out["total_in"].(int) != 2 {
-		t.Errorf("total_in: want 2, got %v", out["total_in"])
-	}
-	// Connector should be in kept (path/symbol match), Registry in dropped (import-only).
-	keptStr, _ := json.Marshal(out["kept"])
-	if !strings.Contains(string(keptStr), "Connector") {
-		t.Errorf("expected Connector in kept; kept=%s", keptStr)
-	}
-}
-
-func TestMatrixDowngrade_StrongModelDowngrades(t *testing.T) {
-	out, err := MatrixDowngrade(Context{}, map[string]any{
-		"mode":  "codereview_lakehouse",
-		"model": "x-ai/grok-4.1-fast",
-	})
-	if err != nil {
-		t.Fatalf("MatrixDowngrade: %v", err)
-	}
-	if out["mode"] != "codereview_isolation" {
-		t.Errorf("strong model should downgrade; got mode=%v", out["mode"])
-	}
-	if out["downgraded_from"] != "codereview_lakehouse" {
-		t.Errorf("downgraded_from: %v", out["downgraded_from"])
-	}
-}
-
-func TestMatrixDowngrade_WeakModelKept(t *testing.T) {
-	out, err := MatrixDowngrade(Context{}, map[string]any{
-		"mode":  "codereview_lakehouse",
-		"model": "qwen3.5:latest",
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if out["mode"] != "codereview_lakehouse" {
-		t.Errorf("weak model should keep lakehouse; got %v", out["mode"])
-	}
-}
-
-func TestMatrixDowngrade_MissingFieldsError(t *testing.T) {
-	_, err := MatrixDowngrade(Context{}, map[string]any{"mode": "codereview_lakehouse"})
-	if err == nil {
-		t.Error("missing model should error")
-	}
-}
-
-func TestDistillationScore_ScrumReviewAccepted(t *testing.T) {
-	out, err := DistillationScore(Context{}, map[string]any{
-		"record": map[string]any{
-			"run_id":         "r-1",
-			"task_id":        "t-1",
-			"timestamp":      "2026-04-29T12:00:00Z",
-			"schema_version": 1,
-			"provenance": map[string]any{
-				"source_file":  "data/_kb/scrum_reviews.jsonl",
-				"sig_hash":     "abc",
-				"recorded_at":  "2026-04-29T12:00:01Z",
-			},
-			"success_markers": []any{"accepted_on_attempt_1"},
-		},
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if out["category"] != "accepted" {
-		t.Errorf("scrum_review attempt_1: want accepted, got %v", out["category"])
-	}
-	reasons, _ := out["reasons"].([]string)
-	if len(reasons) == 0 || !strings.Contains(reasons[0], "first attempt") {
-		t.Errorf("reasons missing 'first attempt': %v", reasons)
-	}
-}
-
-func TestDistillationScore_RejectsEmptyRecord(t *testing.T) {
-	_, err := DistillationScore(Context{}, map[string]any{
-		"record": map[string]any{},
-	})
-	if err == nil {
-		t.Error("empty record should error")
-	}
-}
-
-func TestDriftScorer_AllMatchedReturnsZeroDrift(t *testing.T) {
-	out, err := DriftScorer(Context{}, map[string]any{
-		"inputs": []any{
-			map[string]any{
-				"Record": map[string]any{
-					"run_id": "r-1", "task_id": "t-1",
-					"timestamp": "2026-04-29T12:00:00Z", "schema_version": 1,
-					"provenance": map[string]any{
-						"source_file": "data/_kb/scrum_reviews.jsonl",
-						"sig_hash": "x", "recorded_at": "2026-04-29T12:00:01Z",
-					},
-					"success_markers": []any{"accepted_on_attempt_1"},
-				},
-				"PersistedCategory": "accepted",
-			},
-		},
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if out["drifted"].(float64) != 0 {
-		t.Errorf("no-drift case: drifted=%v", out["drifted"])
-	}
-	if out["matched"].(float64) != 1 {
-		t.Errorf("matched: want 1, got %v", out["matched"])
-	}
-}
-
-func TestDriftScorer_RequiresInputs(t *testing.T) {
-	_, err := DriftScorer(Context{}, map[string]any{"inputs": []any{}})
-	if err == nil {
-		t.Error("empty inputs should error")
-	}
-}
-
-func TestMatrixSearch_HTTPFlow(t *testing.T) {
-	// Fake matrixd that echoes a canned SearchResponse.
-	mux := http.NewServeMux()
-	mux.HandleFunc("/matrix/search", func(w http.ResponseWriter, r *http.Request) {
-		var body map[string]any
-		_ = json.NewDecoder(r.Body).Decode(&body)
-		w.Header().Set("Content-Type", "application/json")
-		// Echo back deterministically with a synthesized result list.
-		_ = json.NewEncoder(w).Encode(map[string]any{
-			"results": []any{
-				map[string]any{"id": "w-1", "distance": 0.1, "corpus": "workers"},
-			},
-			"per_corpus_counts": map[string]any{"workers": 1},
-			"received_corpora":  body["corpora"], // for round-trip verification
-		})
-	})
-	srv := httptest.NewServer(mux)
-	defer srv.Close()
-
-	mode := MatrixSearch(srv.URL, srv.Client())
-	out, err := mode(
-		Context{Ctx: context.Background()},
-		map[string]any{
-			"query_text": "forklift",
-			"corpora":    []any{"workers"},
-			"k":          5,
-		},
-	)
-	if err != nil {
-		t.Fatalf("MatrixSearch: %v", err)
-	}
-	results, ok := out["results"].([]any)
-	if !ok || len(results) != 1 {
-		t.Errorf("results: %v", out["results"])
-	}
-	if first, ok := results[0].(map[string]any); ok {
-		if first["id"] != "w-1" {
-			t.Errorf("id: %v", first["id"])
-		}
-	}
-}
-
-func TestMatrixSearch_NonOKStatusErrors(t *testing.T) {
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		http.Error(w, "matrixd is down", http.StatusBadGateway)
-	}))
-	defer srv.Close()
-
-	mode := MatrixSearch(srv.URL, srv.Client())
-	_, err := mode(Context{Ctx: context.Background()}, map[string]any{})
-	if err == nil {
-		t.Error("502 should error")
-	}
-	if !strings.Contains(err.Error(), "502") {
-		t.Errorf("error should mention 502: %v", err)
-	}
-}
--- a/internal/workflow/runner.go
+++ b/internal/workflow/runner.go
@ -1,389 +0,0 @@
-package workflow
-
-import (
-	"context"
-	"fmt"
-	"regexp"
-	"strings"
-	"time"
-)
-
-// Runner executes Workflows. Modes are registered up-front; the
-// catalog is immutable after Build (callers compose by registering
-// at startup, then Run() the catalog repeatedly).
-type Runner struct {
-	modes map[string]Mode
-}
-
-// NewRunner returns an empty Runner. Use RegisterMode to populate.
-func NewRunner() *Runner {
-	return &Runner{modes: make(map[string]Mode)}
-}
-
-// RegisterMode adds a capability under the given name. Re-registering
-// the same name overwrites — useful for tests that want to replace a
-// mode with a stub. In production, register-once-at-startup is the
-// expected pattern.
-func (r *Runner) RegisterMode(name string, mode Mode) {
-	r.modes[name] = mode
-}
-
-// Modes returns the currently-registered mode names. Useful for
-// /v1/observer/workflow/modes-style discovery endpoints.
-func (r *Runner) Modes() []string {
-	out := make([]string, 0, len(r.modes))
-	for name := range r.modes {
-		out = append(out, name)
-	}
-	return out
-}
-
-// Run executes a workflow. Validates structure, resolves nodes
-// topologically, executes each node with $-reference substitution,
-// records per-node results in RunResult.
-//
-// Aborting errors (cycle, missing dep, unknown mode) return early
-// with StatusAborted — no nodes execute. Per-node mode errors are
-// recorded in NodeResult.Error and execution continues with
-// independent nodes; downstream nodes that depended on the failing
-// one are SKIPPED with an explanatory error so the cascade is
-// visible in the result rather than silent.
-func (r *Runner) Run(ctx context.Context, w Workflow) (RunResult, error) {
-	if err := w.Validate(); err != nil {
-		return RunResult{
-			Workflow: w.Name, Status: StatusAborted,
-			StartedAt: time.Now(),
-		}, err
-	}
-
-	order, err := topoSort(w.Nodes)
-	if err != nil {
-		return RunResult{
-			Workflow: w.Name, Status: StatusAborted,
-			StartedAt: time.Now(),
-		}, err
-	}
-
-	// Verify every node's mode is registered before starting — fail
-	// loud if someone references a typo'd mode name. Catches the bug
-	// in 5ms instead of after 6 nodes have already run.
-	for _, node := range w.Nodes {
-		modeName := effectiveMode(node)
-		if _, ok := r.modes[modeName]; !ok {
-			return RunResult{
-				Workflow: w.Name, Status: StatusAborted,
-				StartedAt: time.Now(),
-			}, fmt.Errorf("%w: %q (node %q)", ErrUnknownMode, modeName, node.ID)
-		}
-	}
-
-	t0 := time.Now()
-	results := make(map[string]NodeResult, len(w.Nodes))
-	resultsList := make([]NodeResult, 0, len(w.Nodes))
-	failedNodes := make(map[string]bool) // node IDs whose result was Error
-	skippedNodes := make(map[string]bool)
-
-	for _, nodeID := range order {
-		node := findNode(w.Nodes, nodeID)
-		modeName := effectiveMode(node)
-
-		// Skip if any dependency failed or was skipped — cascades
-		// failure visibly so callers can see the chain.
-		var skipReason string
-		for _, dep := range node.DependsOn {
-			if failedNodes[dep] {
-				skipReason = fmt.Sprintf("upstream node %q failed", dep)
-				break
-			}
-			if skippedNodes[dep] {
-				skipReason = fmt.Sprintf("upstream node %q was skipped", dep)
-				break
-			}
-		}
-		if skipReason != "" {
-			res := NodeResult{
-				NodeID: node.ID, Mode: modeName,
-				Error:     skipReason,
-				StartedAt: time.Now(),
-			}
-			results[node.ID] = res
-			resultsList = append(resultsList, res)
-			skippedNodes[node.ID] = true
-			continue
-		}
-
-		nodeStart := time.Now()
-		mode := r.modes[modeName] // pre-validated above; safe lookup
-
-		// Build the mode's input map with $-references resolved.
-		input, refErr := buildInput(node, results)
-		if refErr != nil {
-			res := NodeResult{
-				NodeID: node.ID, Mode: modeName,
-				Error:      refErr.Error(),
-				StartedAt:  nodeStart,
-				DurationMs: time.Since(nodeStart).Milliseconds(),
-			}
-			results[node.ID] = res
-			resultsList = append(resultsList, res)
-			failedNodes[node.ID] = true
-			continue
-		}
-
-		modeCtx := Context{
-			Ctx:          ctx,
-			WorkflowName: w.Name,
-			NodeID:       node.ID,
-			Provider:     w.Provider,
-			Model:        w.Model,
-		}
-
-		output, err := mode(modeCtx, input)
-		res := NodeResult{
-			NodeID:     node.ID,
-			Mode:       modeName,
-			Output:     output,
-			StartedAt:  nodeStart,
-			DurationMs: time.Since(nodeStart).Milliseconds(),
-		}
-		if err != nil {
-			res.Error = err.Error()
-			failedNodes[node.ID] = true
-		}
-		results[node.ID] = res
-		resultsList = append(resultsList, res)
-	}
-
-	status := StatusSucceeded
-	if len(failedNodes) > 0 || len(skippedNodes) > 0 {
-		status = StatusPartial
-	}
-	return RunResult{
-		Workflow:   w.Name,
-		Status:     status,
-		Nodes:      resultsList,
-		StartedAt:  t0,
-		DurationMs: time.Since(t0).Milliseconds(),
-	}, nil
-}
-
-// effectiveMode returns the node's explicit mode if set, else
-// "llm.chat" (the implicit Archon convention).
-func effectiveMode(n Node) string {
-	if n.Mode != "" {
-		return n.Mode
-	}
-	return "llm.chat"
-}
-
-// findNode is O(n) but called once per execution step on already-
-// validated workflows; n is small (typical workflow ≤10 nodes).
-func findNode(nodes []Node, id string) Node {
-	for _, n := range nodes {
-		if n.ID == id {
-			return n
-		}
-	}
-	return Node{} // never reached on a Validated workflow
-}
-
-// ─── Input building + reference substitution ────────────────────
-
-// buildInput composes the input map a mode receives. Builds from
-// node.Inputs (deep-copy with $-refs substituted) plus injects the
-// "prompt" key from node.Prompt with $-refs substituted.
-//
-// $-reference syntax: $node_id.output.key — resolves to that key
-// in the prior node's output map. $node_id.output (no .key)
-// resolves to the whole output map. JSON-stringified inline.
-func buildInput(node Node, results map[string]NodeResult) (map[string]any, error) {
-	out := make(map[string]any, len(node.Inputs)+1)
-	for k, v := range node.Inputs {
-		resolved, err := resolveRefs(v, results)
-		if err != nil {
-			return nil, err
-		}
-		out[k] = resolved
-	}
-	if node.Prompt != "" {
-		resolvedPrompt, err := substituteStringRefs(node.Prompt, results)
-		if err != nil {
-			return nil, err
-		}
-		out["prompt"] = resolvedPrompt
-	}
-	return out, nil
-}
-
-// resolveRefs walks any value (string, map, slice, scalar) and
-// substitutes $-references in any string elements.
-func resolveRefs(v any, results map[string]NodeResult) (any, error) {
-	switch x := v.(type) {
-	case string:
-		return substituteStringRefs(x, results)
-	case map[string]any:
-		out := make(map[string]any, len(x))
-		for k, vv := range x {
-			r, err := resolveRefs(vv, results)
-			if err != nil {
-				return nil, err
-			}
-			out[k] = r
-		}
-		return out, nil
-	case []any:
-		out := make([]any, len(x))
-		for i, vv := range x {
-			r, err := resolveRefs(vv, results)
-			if err != nil {
-				return nil, err
-			}
-			out[i] = r
-		}
-		return out, nil
-	default:
-		return v, nil // numbers, bools, nil — pass through
-	}
-}
-
-// refRe matches $node_id or $node_id.output.key (where key is
-// dotted-path). Captures: 1=node_id, 2=optional ".output[.key]"
-// suffix.
-var refRe = regexp.MustCompile(`\$([a-zA-Z_][a-zA-Z0-9_]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`)
-
-// substituteStringRefs replaces $node.output.key references in a
-// string with the resolved value (JSON-stringified for non-string
-// targets so the result is always a string).
-func substituteStringRefs(s string, results map[string]NodeResult) (string, error) {
-	var firstErr error
-	out := refRe.ReplaceAllStringFunc(s, func(match string) string {
-		if firstErr != nil {
-			return match
-		}
-		// Re-parse the match because ReplaceAllStringFunc gives the
-		// whole match without submatches.
-		m := refRe.FindStringSubmatch(match)
-		nodeID := m[1]
-		path := strings.TrimPrefix(m[2], ".")
-		nodeRes, ok := results[nodeID]
-		if !ok {
-			firstErr = fmt.Errorf("%w: $%s (no such node, or node not yet run)", ErrUnresolvedRef, nodeID)
-			return match
-		}
-		// path "output" or "output.X.Y" walks into nodeRes.Output
-		val, err := walkPath(nodeRes.Output, path)
-		if err != nil {
-			firstErr = fmt.Errorf("%w: $%s — %v", ErrUnresolvedRef, nodeID+m[2], err)
-			return match
-		}
-		return stringifyValue(val)
-	})
-	return out, firstErr
-}
-
-// walkPath resolves a dotted path against a nested map. Empty path
-// returns the whole map. The first segment must be "output" — a
-// convention that matches the SPEC §3.8 reference shape and prevents
-// accidental access to other NodeResult fields.
-func walkPath(output map[string]any, path string) (any, error) {
-	if path == "" {
-		return output, nil // bare $node — entire NodeResult.Output
-	}
-	parts := strings.Split(path, ".")
-	if parts[0] != "output" {
-		return nil, fmt.Errorf("path must start with .output (got %q)", parts[0])
-	}
-	parts = parts[1:]
-	var cur any = output
-	for _, p := range parts {
-		m, ok := cur.(map[string]any)
-		if !ok {
-			return nil, fmt.Errorf("cannot traverse into %T at segment %q", cur, p)
-		}
-		cur, ok = m[p]
-		if !ok {
-			return nil, fmt.Errorf("key %q not found in output", p)
-		}
-	}
-	return cur, nil
-}
-
-// stringifyValue renders a value as a string. For JSON-shaped values
-// (maps, slices, complex types), uses fmt.Sprintf %v which is
-// adequate for prompt-substitution. JSON marshaling would be cleaner
-// for complex types but adds a dep cycle for v0.
-func stringifyValue(v any) string {
-	switch x := v.(type) {
-	case string:
-		return x
-	case nil:
-		return ""
-	default:
-		return fmt.Sprint(x)
-	}
-}
-
-// ─── DAG resolution ──────────────────────────────────────────────
-
-// topoSort returns node IDs in a topologically-sorted order such
-// that every dependency precedes its dependent. Cycles return an
-// error (Validate catches them first; this is defense in depth).
-func topoSort(nodes []Node) ([]string, error) {
-	indeg := make(map[string]int, len(nodes))
-	graph := make(map[string][]string, len(nodes))
-	for _, n := range nodes {
-		if _, ok := indeg[n.ID]; !ok {
-			indeg[n.ID] = 0
-		}
-		for _, dep := range n.DependsOn {
-			graph[dep] = append(graph[dep], n.ID)
-			indeg[n.ID]++
-		}
-	}
-	// Kahn's algorithm — preserve original order for ties so output
-	// is deterministic across runs.
-	queue := make([]string, 0, len(nodes))
-	for _, n := range nodes {
-		if indeg[n.ID] == 0 {
-			queue = append(queue, n.ID)
-		}
-	}
-	out := make([]string, 0, len(nodes))
-	for len(queue) > 0 {
-		cur := queue[0]
-		queue = queue[1:]
-		out = append(out, cur)
-		for _, child := range graph[cur] {
-			indeg[child]--
-			if indeg[child] == 0 {
-				queue = append(queue, child)
-			}
-		}
-	}
-	if len(out) != len(nodes) {
-		// Find a node still with non-zero indeg — that's where the
-		// cycle is reachable from.
-		for id, deg := range indeg {
-			if deg > 0 {
-				return nil, fmt.Errorf("%w: starting at node %q", ErrCycle, id)
-			}
-		}
-		return nil, ErrCycle
-	}
-	return out, nil
-}
-
-// detectCycle is the predicate-only variant called from Validate;
-// returns the offending node ID + true if a cycle exists.
-func detectCycle(nodes []Node) (string, bool) {
-	_, err := topoSort(nodes)
-	if err == nil {
-		return "", false
-	}
-	// Best-effort extract — topoSort wraps the cycle-starting ID in
-	// the error message; for v0 just signal "yes, somewhere."
-	for _, n := range nodes {
-		_ = n
-	}
-	return "(see runner error for details)", true
-}
--- a/internal/workflow/runner_test.go
+++ b/internal/workflow/runner_test.go
@ -1,284 +0,0 @@
-package workflow
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"strings"
-	"testing"
-)
-
-// fixtureEcho returns the input map verbatim. Useful for testing
-// runner mechanics without external dependencies.
-func fixtureEcho(_ Context, input map[string]any) (map[string]any, error) {
-	out := make(map[string]any, len(input))
-	for k, v := range input {
-		out[k] = v
-	}
-	return out, nil
-}
-
-// fixtureFail always errors. Useful for testing skip-on-failed-dep.
-func fixtureFail(_ Context, _ map[string]any) (map[string]any, error) {
-	return nil, fmt.Errorf("fixture: intentional failure")
-}
-
-// fixtureUpper returns {"upper": strings.ToUpper(input["prompt"])}.
-func fixtureUpper(_ Context, input map[string]any) (map[string]any, error) {
-	prompt, _ := input["prompt"].(string)
-	return map[string]any{"upper": strings.ToUpper(prompt)}, nil
-}
-
-func newTestRunner() *Runner {
-	r := NewRunner()
-	r.RegisterMode("fixture.echo", fixtureEcho)
-	r.RegisterMode("fixture.fail", fixtureFail)
-	r.RegisterMode("fixture.upper", fixtureUpper)
-	return r
-}
-
-func TestValidate_RequiresName(t *testing.T) {
-	w := Workflow{Name: "", Nodes: []Node{{ID: "a", Mode: "fixture.echo"}}}
-	if err := w.Validate(); err == nil {
-		t.Error("empty name should fail validation")
-	}
-}
-
-func TestValidate_RequiresNodes(t *testing.T) {
-	w := Workflow{Name: "x"}
-	if err := w.Validate(); err == nil {
-		t.Error("empty nodes should fail validation")
-	}
-}
-
-func TestValidate_DuplicateNodeID(t *testing.T) {
-	w := Workflow{Name: "x", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo"},
-		{ID: "a", Mode: "fixture.echo"},
-	}}
-	if err := w.Validate(); !errors.Is(err, ErrDuplicateNodeID) {
-		t.Errorf("want ErrDuplicateNodeID, got %v", err)
-	}
-}
-
-func TestValidate_MissingDep(t *testing.T) {
-	w := Workflow{Name: "x", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo", DependsOn: []string{"ghost"}},
-	}}
-	if err := w.Validate(); !errors.Is(err, ErrMissingDep) {
-		t.Errorf("want ErrMissingDep, got %v", err)
-	}
-}
-
-func TestValidate_DetectsCycle(t *testing.T) {
-	w := Workflow{Name: "x", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo", DependsOn: []string{"b"}},
-		{ID: "b", Mode: "fixture.echo", DependsOn: []string{"a"}},
-	}}
-	if err := w.Validate(); !errors.Is(err, ErrCycle) {
-		t.Errorf("want ErrCycle, got %v", err)
-	}
-}
-
-func TestRun_SingleNode(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "single", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo", Prompt: "hello"},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if res.Status != StatusSucceeded {
-		t.Errorf("status: want succeeded, got %q", res.Status)
-	}
-	if len(res.Nodes) != 1 {
-		t.Fatalf("nodes: want 1, got %d", len(res.Nodes))
-	}
-	if res.Nodes[0].Output["prompt"] != "hello" {
-		t.Errorf("echo round-trip: %+v", res.Nodes[0].Output)
-	}
-}
-
-func TestRun_DAG_RefSubstitution(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "chain", Nodes: []Node{
-		{ID: "shape", Mode: "fixture.upper", Prompt: "hello world"},
-		{ID: "weakness", Mode: "fixture.echo",
-			Prompt: "Given $shape.output.upper find issue",
-			DependsOn: []string{"shape"}},
-		{ID: "improvement", Mode: "fixture.echo",
-			Prompt: "Based on $weakness.output.prompt do better",
-			DependsOn: []string{"weakness"}},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-	if res.Status != StatusSucceeded {
-		t.Errorf("status: %q", res.Status)
-	}
-	// Order check: shape → weakness → improvement
-	wantOrder := []string{"shape", "weakness", "improvement"}
-	for i, want := range wantOrder {
-		if res.Nodes[i].NodeID != want {
-			t.Errorf("execution order %d: want %q, got %q", i, want, res.Nodes[i].NodeID)
-		}
-	}
-	// shape uppercases "hello world" → "HELLO WORLD"
-	if up := res.Nodes[0].Output["upper"]; up != "HELLO WORLD" {
-		t.Errorf("shape.upper: %q", up)
-	}
-	// weakness sees "Given HELLO WORLD find issue" in its prompt
-	wp, _ := res.Nodes[1].Output["prompt"].(string)
-	if !strings.Contains(wp, "HELLO WORLD") {
-		t.Errorf("weakness ref-substitution failed: %q", wp)
-	}
-	// improvement sees the SUBSTITUTED weakness prompt
-	ip, _ := res.Nodes[2].Output["prompt"].(string)
-	if !strings.Contains(ip, "HELLO WORLD") {
-		t.Errorf("improvement chain-substitution failed: %q", ip)
-	}
-}
-
-func TestRun_FailedNodeSkipsDownstream(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "skipchain", Nodes: []Node{
-		{ID: "a", Mode: "fixture.fail"},
-		{ID: "b", Mode: "fixture.echo", DependsOn: []string{"a"}},
-		{ID: "c", Mode: "fixture.echo"}, // independent of a — should still run
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if res.Status != StatusPartial {
-		t.Errorf("status: want partial, got %q", res.Status)
-	}
-	byID := make(map[string]NodeResult)
-	for _, n := range res.Nodes {
-		byID[n.NodeID] = n
-	}
-	if byID["a"].Error == "" {
-		t.Error("a should have errored")
-	}
-	if byID["b"].Error == "" || !strings.Contains(byID["b"].Error, "upstream") {
-		t.Errorf("b should be skipped with upstream-failure reason; got %q", byID["b"].Error)
-	}
-	if byID["c"].Error != "" {
-		t.Errorf("c is independent; should run successfully; got error: %q", byID["c"].Error)
-	}
-}
-
-func TestRun_UnknownModeAborts(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "bad", Nodes: []Node{
-		{ID: "a", Mode: "fixture.does_not_exist"},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if !errors.Is(err, ErrUnknownMode) {
-		t.Errorf("want ErrUnknownMode, got %v", err)
-	}
-	if res.Status != StatusAborted {
-		t.Errorf("status: want aborted, got %q", res.Status)
-	}
-}
-
-func TestRun_UnresolvedReferenceErrors(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "badref", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo",
-			Prompt: "references $ghost.output but ghost doesn't exist"},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatalf("Run: %v", err)
-	}
-	if res.Nodes[0].Error == "" {
-		t.Error("unresolved $ghost should error the node")
-	}
-	if !strings.Contains(res.Nodes[0].Error, "no such node") {
-		t.Errorf("error should explain no-such-node; got %q", res.Nodes[0].Error)
-	}
-}
-
-func TestRun_ImplicitLLMChatFallback(t *testing.T) {
-	r := NewRunner()
-	r.RegisterMode("llm.chat", fixtureEcho) // pretend llm.chat exists
-	w := Workflow{Name: "implicit", Nodes: []Node{
-		{ID: "a", Prompt: "no Mode field — should default to llm.chat"},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if res.Status != StatusSucceeded {
-		t.Errorf("implicit llm.chat: status %q", res.Status)
-	}
-	if res.Nodes[0].Mode != "llm.chat" {
-		t.Errorf("effective mode: want llm.chat, got %q", res.Nodes[0].Mode)
-	}
-}
-
-func TestRun_ProvenanceRecording(t *testing.T) {
-	r := newTestRunner()
-	w := Workflow{Name: "trace", Nodes: []Node{
-		{ID: "x", Mode: "fixture.echo", Prompt: "trace me"},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatal(err)
-	}
-	n := res.Nodes[0]
-	if n.NodeID != "x" || n.Mode != "fixture.echo" {
-		t.Errorf("provenance: node=%q mode=%q", n.NodeID, n.Mode)
-	}
-	if n.StartedAt.IsZero() {
-		t.Error("started_at should be set")
-	}
-	if n.DurationMs < 0 {
-		t.Errorf("duration_ms: %d", n.DurationMs)
-	}
-}
-
-func TestRun_InputsResolveRefs(t *testing.T) {
-	// Verify that node.Inputs (not just Prompt) honors $-substitution.
-	r := newTestRunner()
-	w := Workflow{Name: "inputs", Nodes: []Node{
-		{ID: "a", Mode: "fixture.echo", Prompt: "first"},
-		{ID: "b", Mode: "fixture.echo",
-			Inputs: map[string]any{
-				"copied": "$a.output.prompt",
-				"static": "literal",
-			},
-			DependsOn: []string{"a"}},
-	}}
-	res, err := r.Run(context.Background(), w)
-	if err != nil {
-		t.Fatal(err)
-	}
-	bOut := res.Nodes[1].Output
-	if bOut["copied"] != "first" {
-		t.Errorf("inputs ref: want 'first', got %q", bOut["copied"])
-	}
-	if bOut["static"] != "literal" {
-		t.Errorf("inputs static: want 'literal', got %q", bOut["static"])
-	}
-}
-
-func TestTopoSort_Stable(t *testing.T) {
-	// Independent nodes preserve their declaration order.
-	nodes := []Node{
-		{ID: "z"}, {ID: "y"}, {ID: "x"},
-	}
-	got, err := topoSort(nodes)
-	if err != nil {
-		t.Fatal(err)
-	}
-	want := []string{"z", "y", "x"}
-	for i := range want {
-		if got[i] != want[i] {
-			t.Errorf("position %d: want %q, got %q", i, want[i], got[i])
-		}
-	}
-}
--- a/internal/workflow/types.go
+++ b/internal/workflow/types.go
@ -1,172 +0,0 @@
-// Package workflow is the Observer-KB workflow runner per SPEC §3.8 —
-// the orchestrator that chains §3.4 modes (matrix.search, relevance,
-// downgrade, distillation.score, drift.scorer) plus free-form llm.chat
-// into multi-pass measurement pipelines.
-//
-// The architectural intent is documented in PRD's "Observer as system
-// resource" section: workflows ARE observation patterns whose every
-// step is recorded as an ObservedOp via observerd. The mode catalog
-// is the registry of capabilities; the runner is the engine that
-// composes them.
-//
-// First slice (this commit): types + DAG runner + reference
-// substitution + a fixture.echo mode for testing the mechanics.
-// Real-mode integrations (matrix.search, distillation.score, etc.)
-// land in follow-up commits.
-//
-// YAML shape mirrors /home/profit/lakehouse/.archon/workflows/
-// lakehouse-architect-review.yaml so existing Archon workflows load
-// directly, with one Go-side addition: an optional `mode` field on
-// each node so the runner can dispatch to non-LLM modes.
-
-package workflow
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"time"
-)
-
-// Workflow is one loadable workflow definition. Matches Archon's
-// YAML shape; Provider + Model are informational in v0 (only used
-// by llm.chat-style modes that need a backend) and ignored by other
-// modes.
-type Workflow struct {
-	Name        string `yaml:"name" json:"name"`
-	Description string `yaml:"description" json:"description"`
-	Provider    string `yaml:"provider" json:"provider,omitempty"`
-	Model       string `yaml:"model" json:"model,omitempty"`
-	Nodes       []Node `yaml:"nodes" json:"nodes"`
-}
-
-// Node is one step in the workflow DAG. ID must be unique within a
-// workflow; DependsOn lists the IDs of nodes that must complete
-// before this one runs.
-//
-// Mode is the registered capability the node dispatches to. When
-// omitted, the runner assumes "llm.chat" using the workflow's
-// Provider+Model (matching Archon's implicit-LLM convention).
-//
-// Inputs is a free-form map passed to the mode after $-reference
-// substitution. The Prompt field is a convenience — it's added to
-// the input map under the key "prompt" before mode dispatch, so
-// llm.chat-style modes get free-form text without a wrapping object.
-type Node struct {
-	ID           string         `yaml:"id" json:"id"`
-	Mode         string         `yaml:"mode" json:"mode,omitempty"`
-	Prompt       string         `yaml:"prompt" json:"prompt,omitempty"`
-	Inputs       map[string]any `yaml:"inputs" json:"inputs,omitempty"`
-	AllowedTools []string       `yaml:"allowed_tools" json:"allowed_tools,omitempty"`
-	Effort       string         `yaml:"effort" json:"effort,omitempty"`
-	IdleTimeoutMs int           `yaml:"idle_timeout" json:"idle_timeout,omitempty"`
-	DependsOn    []string       `yaml:"depends_on" json:"depends_on,omitempty"`
-}
-
-// NodeResult captures one node's execution outcome. Output is the
-// mode's return map; Error is non-nil iff the mode returned an
-// error. StartedAt + DurationMs feed observerd's provenance recording.
-type NodeResult struct {
-	NodeID     string         `json:"node_id"`
-	Mode       string         `json:"mode"`
-	Output     map[string]any `json:"output,omitempty"`
-	Error      string         `json:"error,omitempty"`
-	StartedAt  time.Time      `json:"started_at"`
-	DurationMs int64          `json:"duration_ms"`
-}
-
-// RunResult is the full workflow execution outcome — every node's
-// result in execution order, plus the workflow name and a summary
-// status (succeeded if every node ran without error, partial if any
-// errored).
-type RunResult struct {
-	Workflow string       `json:"workflow"`
-	Status   RunStatus    `json:"status"`
-	Nodes    []NodeResult `json:"nodes"`
-	StartedAt  time.Time  `json:"started_at"`
-	DurationMs int64      `json:"duration_ms"`
-}
-
-// RunStatus tags the overall workflow outcome.
-type RunStatus string
-
-const (
-	StatusSucceeded RunStatus = "succeeded"
-	StatusPartial   RunStatus = "partial"  // some nodes errored, others succeeded
-	StatusAborted   RunStatus = "aborted"  // hard error halted execution (cycle, missing dep, unknown mode)
-)
-
-// Mode is the function signature every registered capability honors.
-// Input + output are generic maps so workflows compose freely; the
-// mode function is responsible for shape-checking its own inputs.
-//
-// Returning an error doesn't abort the whole workflow — the runner
-// records the error in NodeResult and continues with downstream
-// nodes that don't depend on this one. That mirrors observerd's
-// "log + continue" partial-failure semantics so a single mode bug
-// doesn't kill a 7-node measurement chain.
-type Mode func(ctx Context, input map[string]any) (map[string]any, error)
-
-// Context is what a Mode receives. Carries the standard Go
-// context.Context (for cancellation) plus a workflow-scoped
-// metadata bag for cross-mode coordination (e.g. a workflow's
-// model hint that llm.chat-style modes consume).
-type Context struct {
-	Ctx context.Context
-	// WorkflowName is the parent workflow.Name — useful when a mode
-	// records ObservedOps so the source can be traced back to the
-	// workflow that triggered it.
-	WorkflowName string
-	// NodeID is the currently-executing node — paired with
-	// WorkflowName forms a unique provenance key.
-	NodeID string
-	// Provider + Model carry the workflow's defaults; modes that
-	// need them (llm.chat) pull from here, others ignore.
-	Provider string
-	Model    string
-}
-
-// Errors surfaced to callers. Cycle / missing-dependency / unknown-
-// mode are *aborting* errors — the runner can't proceed. Per-node
-// mode errors are recorded but don't abort.
-var (
-	ErrCycle           = errors.New("workflow: dependency cycle detected")
-	ErrMissingDep      = errors.New("workflow: node depends on unknown id")
-	ErrUnknownMode     = errors.New("workflow: unknown mode")
-	ErrDuplicateNodeID = errors.New("workflow: duplicate node id")
-	ErrUnresolvedRef   = errors.New("workflow: unresolved $node.output reference")
-)
-
-// Validate checks structural invariants on a Workflow before
-// execution: unique node IDs, every depends_on points to a known
-// id, no cycles. Returns nil on success or a wrapped sentinel.
-func (w Workflow) Validate() error {
-	if w.Name == "" {
-		return fmt.Errorf("workflow: name is required")
-	}
-	if len(w.Nodes) == 0 {
-		return fmt.Errorf("workflow: at least one node required")
-	}
-	seen := make(map[string]struct{}, len(w.Nodes))
-	for _, n := range w.Nodes {
-		if n.ID == "" {
-			return fmt.Errorf("workflow: node id must be non-empty")
-		}
-		if _, dup := seen[n.ID]; dup {
-			return fmt.Errorf("%w: %q", ErrDuplicateNodeID, n.ID)
-		}
-		seen[n.ID] = struct{}{}
-	}
-	for _, n := range w.Nodes {
-		for _, dep := range n.DependsOn {
-			if _, ok := seen[dep]; !ok {
-				return fmt.Errorf("%w: node %q depends on %q (no such node)",
-					ErrMissingDep, n.ID, dep)
-			}
-		}
-	}
-	if cyclicID, ok := detectCycle(w.Nodes); ok {
-		return fmt.Errorf("%w: starting at node %q", ErrCycle, cyclicID)
-	}
-	return nil
-}
--- a/lakehouse.toml
+++ b/lakehouse.toml
@ -12,9 +12,6 @@ ingestd_url  = "http://127.0.0.1:3213"
 queryd_url   = "http://127.0.0.1:3214"
 vectord_url  = "http://127.0.0.1:3215"
 embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-observerd_url = "http://127.0.0.1:3219"

 [storaged]
 bind = "127.0.0.1:3211"
@ -50,26 +47,6 @@ catalogd_url = "http://127.0.0.1:3212"
 secrets_path = "/etc/lakehouse/secrets-go.toml"
 refresh_every = "30s"

-[pathwayd]
-bind = "127.0.0.1:3217"
-# Empty = in-memory only (dev/test). Production sets a path under
-# /var/lib/lakehouse/pathway/state.jsonl so traces survive restart.
-persist_path = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-# matrixd calls embedd (query-text → vector) and vectord (per-corpus
-# search) directly. Localhost defaults; in distributed deployments
-# these point at the gateway's upstream addresses.
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-
-[observerd]
-bind = "127.0.0.1:3219"
-# Empty = in-memory only (dev/test). Production sets a path under
-# /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
-persist_path = ""
-
 [s3]
 endpoint        = "http://localhost:9000"
 region          = "us-east-1"
--- a/reports/scrum/rerun-2-2026-04-29.md
+++ b/reports/scrum/rerun-2-2026-04-29.md
@ -1,217 +0,0 @@
-# Audit Re-run #2 — 2026-04-29 (after Phases A–H + matrix §3.4 + workflow §3.8)
-
-**Baseline audit:** `reports/scrum/golang-lakehouse-scrum-test.md` at commit `91edd43` — composite **35 / 60**.
-**Rerun-1 head:** `4840c10` — composite **43 / 60** (Δ baseline = +8).
-**Rerun-2 head:** `c7e3124` — **30 commits past rerun-1**. Composite **50 / 60. Δ rerun-1 = +7. Δ baseline = +15.**
-
-This is the second delta document. Both prior reports remain immutable history. Working tree was dirty on entry (5 in-flight files under `cmd/observerd/` + `internal/{observer,workflow}/`); audit ran on stashed-clean `c7e3124` so the score reflects shipped state, not WIP.
-
---
-
-## What landed since rerun-1
-
-| Commit | What |
-|---|---|
-| `4840c10` | (rerun-1 baseline — 04_query refresh-tick race fix) |
-| `125e1c8` | tests close R-002 / R-003 / R-008 — `internal/{shared,storeclient,queryd/db}` Go tests |
-| `6af0520` | A: fail-loud on non-loopback bind — closes worst case of R-001 |
-| `423a381` | D: storaged per-prefix PUT cap — vectord `_vectors/` → 4 GiB (ADR-002) |
-| `0d18ffa` | ADR-003: inter-service auth posture — Bearer + IP allowlist |
-| `1ec85b0` | Batch 2: perf baseline — multi-sample + warmup + MAD threshold |
-| `0f79bce` | Batch 3: `cmd/<bin>/main_test.go × 6` — closes R-005 |
-| `fb08232` | Batch 4: embed fixture-mode — partial R-006 closure |
-| `56844c3` | embed cache — LRU at `/v1/embed` for repeat-query elimination |
-| `8f4c16f` | mcpd: Go MCP SDK port — replaces Bun mcp-server tool surface |
-| `fa56134` | ADR-003 wiring: Bearer token + IP allowlist middleware |
-| `ad1670d` | storaged cap smoke — verifies ADR-002 at 300 MiB |
-| `2a6234f` | ADR-004 + `internal/pathway`: Mem0 versioned trace substrate |
-| `afbb506` | pathwayd: HTTP service over `internal/pathway` · 11/11 smoke gate |
-| `f1c1883` | vectord BatchAdd — single-lock variadic batch |
-| `71b35fb` | SPEC §1 + §3.4: name matrix indexer as a port target |
-| `a7620c8` | PRD: name the product vision — small-model pipeline + 5-loop substrate |
-| `c1d96b7` | matrixd: multi-corpus retrieve+merge — SPEC §3.4 component 2 of 5 |
-| `166470f` | corpusingest: extract reusable text→vector ingest pipeline |
-| `0d1553c` | candidates corpus: first deep-field reality test on real staffing data |
-| `9588bd8` | matrix relevance filter — SPEC §3.4 component 3 of 5 |
-| `3968ec8` | matrix strong-model downgrade gate — SPEC §3.4 component 4 of 5 |
-| `a97881d` | workers corpus + multi-corpus reality test — matrix indexer end-to-end |
-| `31b4088` | multi_corpus_e2e WORKERS_LIMIT knob + embed-text-not-sample-size finding |
-| `06e7152` | matrix playbook memory + boost — SPEC §3.4 component 5 of 5 (LEARNING LOOP) |
-| `a730fc2` | scrum fixes: 4 real findings landed, 4 false positives dismissed |
-| `7f42089` | D: embed-text iteration — clean negative finding (3 variants tested) |
-| `57d0df1` | E (partial): distillation port — scorer + contamination firewall |
-| `be65f85` | F: drift quantification — scorer drift first |
-| `b199093` | B: matrix metadata filter — post-retrieval structured gate |
-| `6392772` | C: bulk playbook record — operational rating wiring |
-| `bc9ab93` | H: observerd — autonomous-iteration witness loop (SPEC §2 port) |
-| `97dd3f8` | SPEC §3.5/§3.6/§3.7/§3.8 — name F/B/C as port targets + Archon-style workflow runner |
-| `e30da6e` | §3.8 first slice: workflow runner skeleton + DAG executor + observerd integration |
-| `c7e3124` | §3.8 second slice: real modes wired (matrix.relevance/downgrade/search, distillation.score, drift.scorer) |
-
-This is the wave that took the system from "G0+G2 substrate plus 500K validation" to **"all five small-model-pipeline loops have at least a first port"** (per `project_small_model_pipeline_vision.md`).
-
---
-
-## Score delta — double column
-
-Same 6 dimensions, scored 0–10 with citations. `Δ R1` = vs rerun-1 (`4840c10`); `Δ Base` = vs original audit (`91edd43`).
-
-| Dimension | Base | R1 | **R2** | Δ R1 | Δ Base | Evidence for the move |
-|---|---:|---:|---:|---:|---:|---|
-| **Reproducibility** | 7 | 9 | **9** | 0 | +2 | `just verify` PASS in 31s wall (`_evidence/rerun2/just_verify.log`) — vet + 30 packages of `go test -short` + 9 core smokes. `just doctor` all-green for go/gcc/minio/ollama/secrets. **8 additional domain smokes also PASS** (pathway, matrix, relevance, downgrade, observer, playbook, workflow, storaged_cap → `_evidence/rerun2/smoke_*.log`). New recipes: `smoke-g2-fixtures` (R-006 partial close) + `smoke-storaged-cap`. **Still −1**: no `.github/workflows/`; no fixture-mode for storage (only embed). |
-| **Test Coverage** | 6 | 8 | **9** | +1 | +3 | **321 Go test functions** across 40 test files (was 13 at baseline, ~77 at R1 — **3× the test surface**). `internal/shared` has 4 test files (`auth_test.go`, `bind_test.go`, `config_test.go`, `server_test.go`); `internal/storeclient/client_test.go` exists; `internal/queryd/db_test.go` + `registrar_test.go` exist — **R-002 / R-003 / R-008 all closed**. Six original cmd binaries now have `main_test.go` (catalogd/embedd/ingestd/queryd/storaged/vectord) — **R-005 mostly closed**. **Still −1**: `cmd/{matrixd,observerd,pathwayd,fake_ollama}/main_test.go` absent — three of those are new daemons that need wiring tests. |
-| **Trust Boundary Safety** | 7 | 7 | **9** | +2 | +2 | **ADR-003 shipped** (`docs/DECISIONS.md` §3): `internal/shared/auth.go` 64-line Bearer middleware with constant-time compare via `crypto/subtle` + IP allowlist (`internal/shared/auth.go:62-64`). 4 auth tests in `auth_test.go` cover wrong-token, raw-token-without-prefix, IP-only, both-required (`internal/shared/auth_test.go:77,86,108,162`). `redactCreds` still scrubs S3 keys from queryd error chain (`internal/queryd/db.go`). One `fmt.Sprintf` SQL site remains (`internal/queryd/registrar.go:153`) — properly escaped via `quoteIdent` + `sqlEscape`. 13 `MaxBytesReader` sites in cmd/, 5 loopback bindings. **Still −1**: auth is opt-in (empty token = G0 dev mode); no CORS posture (R-010); 2 `/home/profit/lakehouse/...` paths in `scripts/staffing_*/main.go` flag-defaults. |
-| **Agent Memory Correctness** | 3 | 4 | **9** | +5 | +6 | **All five SPEC §3.4 components shipped**: corpus builders (`internal/corpusingest`), retrieve+merge (`matrixd /matrix/search`), relevance filter (`internal/matrix/relevance.go` 376 LoC + 289 LoC test), strong-model downgrade gate (`internal/matrix/downgrade.go` 137 LoC + 100 LoC test), playbook memory + boost (`internal/matrix/playbook.go` 196 LoC + 180 LoC test) — including the **learning loop**. Pathway substrate ratified (ADR-004, `internal/pathway/store.go` 381 LoC + 398 LoC test). **Mem0-style ops all proven**: `TestAdd_AssignsUIDAndTimestamps`, `TestUpdate_ReplacesContentSameUID`, `TestRevise_LinksToPredecessorViaHistory`, `TestRevise_ChainOfThree_BackwardWalk`, `TestRetire_ExcludedFromSearch`, `TestRetire_StillAccessibleViaGet`, `TestHistory_CycleDetected`, `TestHistory_PredecessorMissing_TruncatesChain`, `TestAddIdempotent_RejectsEmptyUID` — **every Sprint 2 design-bar acceptance has a test**. Observer ported (`internal/observer/store.go` 249 LoC + 193 LoC test). pathway smoke 11/11. **Still −1**: distillation port partial (scorer + firewall only — `57d0df1` "E (partial)"); drift is "scorer drift first" (`be65f85`) not full quantification. |
-| **Deployment Readiness** | 4 | 5 | **5** | 0 | +1 | `just doctor` actionable per-dep install (`scripts/doctor.sh`); `just install-hooks` documented; pre-push hook still installed. **Still −5**: no `REPLICATION.md`, no `secrets-go.toml.example`, no `deploy/systemd/*.service`, no `Dockerfile`, no readiness vs. liveness split. Sprint 4 stories all open. |
-| **Maintainability** | 8 | 8 | **9** | +1 | +1 | **4 ADRs ratified** (was 1 at R1): ADR-001 foundational, ADR-002 storaged per-prefix cap, ADR-003 auth posture, ADR-004 pathway data model — **the auth + cap + memory-model decisions are locked before downstream code retrofits them**. Every binary still 100–400 LoC (no god-files). Per-package test files: every `internal/` package has ≥1 test file (was: 5 packages had zero at baseline). `CLAUDE_REFACTOR_GUARDRAILS.md` codifies the maintenance discipline. `tests/proof/FINAL_REPORT.md` answers the 9 mandated questions. **Still −1**: no `CONTRIBUTING.md`; the proof harness adds 24-claim maintenance surface that needs keeping current. |
-
-**Composite: 35 → 43 → 50. 83% of max.**
-
---
-
-## Code surface delta
-
-| Metric | Baseline (`91edd43`) | R1 (`4840c10`) | **R2 (`c7e3124`)** | Δ R1 |
-|---|---:|---:|---:|---:|
-| Total Go LoC | ~6,587 | ~7,800 (est) | **19,381** | ~2.5× |
-| Go files | ~50 | ~62 | **93** | +31 |
-| Test files | 13 | ~22 | **40** | +18 |
-| Go test functions | ~77 | ~109 | **321** | +212 |
-| `cmd/<bin>/` | 7 | 7 | **12** | +5 |
-| `internal/<pkg>/` | 11 | 11 | **18** | +7 |
-| Smoke scripts | 9 | 9 | **21** | +12 |
-| ADRs ratified | 0 | 1 | **4** | +3 |
-| Routes (cmd-level) | ~22 | ~22 | **37** | +15 |
-| Untested cmd binaries | 6 / 7 | 6 / 7 | **4 / 12** | −2 abs, −1/3 ratio |
-
-The wave is **substrate-bearing**, not throughput-bearing. Every internal package has tests; the gap is now the **wiring layer** for the 3 new daemons.
-
---
-
-## Risk register status updates
-
-12 risks in `reports/scrum/risk-register.md`. Status table at `c7e3124`:
-
-| Risk | Severity | Before R2 | After R2 | Evidence |
-|---|---|---|---|---|
-| R-001 queryd /sql RCE-eq off-loopback | HIGH | open | **partial** | `6af0520` fail-loud on non-loopback bind (closes worst case); ADR-003 + `internal/shared/auth.go` available to wrap; **but auth is opt-in** — needs deploy story decision before fully closing |
-| R-002 internal/shared zero tests | HIGH | open | **CLOSED** | 4 test files (`auth_test.go` + `bind_test.go` + `config_test.go` + `server_test.go`), all PASS in `just verify` |
-| R-003 internal/storeclient zero tests | HIGH | open | **CLOSED** | `internal/storeclient/client_test.go`, PASS |
-| R-004 smokes not gated | MED | closed (R1) | **CLOSED** | unchanged from R1 |
-| R-005 6/7 cmd/main.go untested | MED | partial | **partial** | 6 of original 7 closed (`0f79bce` Batch 3); 4 new daemons (`fake_ollama`/`matrixd`/`observerd`/`pathwayd`) reopen the gap on different surface |
-| R-006 no fixture-only smokes | MED | open | **partial** | `scripts/g2_smoke_fixtures.sh` (`fb08232`) closes embed half via fake_ollama; storage half deferred |
-| R-007 zero auth middleware | MED | open | **partial** | `internal/shared/auth.go` shipped with 4 tests (`fa56134`); opt-in by default until deploy posture decision |
-| R-008 queryd/db.go untested | MED | open | **CLOSED** | `internal/queryd/db_test.go` + `registrar_test.go` (`125e1c8`) |
-| R-009 registrar.go fmt.Sprintf SQL | LOW | open | open | unchanged — escaping via `quoteIdent`+`sqlEscape` is correct, regression test still missing |
-| R-010 no CORS posture | LOW | open | open | unchanged — no `Access-Control-*` headers anywhere |
-| R-011 g2 smoke model assertion | LOW | note | note | unchanged |
-| R-012 empty tests/ dir | LOW | closed (R1) | **CLOSED** | unchanged from R1 |
-
-**Net since R1: 3 closed (R-002, R-003, R-008), 3 advanced to partial (R-001, R-006, R-007), R-005 stays partial on different surface, 3 unchanged.**
-
---
-
-## Sprint backlog progress
-
-### Sprint 0 — Reproducibility Gate
-| Story | R1 | R2 |
-|---|---|---|
-| S0.1 `just doctor` | DONE | DONE |
-| S0.2 `just smoke-fixtures` | open | **partial** (`smoke-g2-fixtures`) |
-| S0.3 `just verify` + pre-push | DONE | DONE |
-| S0.4 `cmd/<bin>/main_test.go` × 6 | partial | **partial → mostly DONE** (6 of original 7; 3 new daemons absent) |
-| S0.5 internal/shared, storeclient, queryd/db tests | open | **DONE** |
-| S0.6 `tests/` dir cleanup | DONE | DONE |
-
-**4 of 6 done, 2 partial.** Highest-leverage open work: tests for the 3 new daemons + storage-half of fixture mode.
-
-### Sprint 1 — Trust Boundary Gate
- Replace SQL string interp with parameterized: still 1 site, properly escaped (R-009 LOW)
- Observer fail-open → `degraded`/`cycle`: not yet codified — observer is ported but ADR-002-style fail-safe ADR not written
- Auth/localhost-only guardrails: **shipped** (ADR-003 + auth.go), opt-in posture
- Schema validation per public endpoint: per-handler validation exists (validateKey etc.); not framework-level
-
-**Status: ~60% of Sprint 1 closed, observer fail-safe semantics ADR is the outstanding doc-only piece.**
-
-### Sprint 2 — Memory Correctness Gate
-| Story | R1 | R2 |
-|---|---|---|
-| ADD/UPDATE/REVISE/RETIRE/HISTORY tests | design-bar | **DONE** (`internal/pathway/store_test.go`) |
-| Cycle detection tests | design-bar | **DONE** (`TestHistory_CycleDetected`) |
-| Retired-trace exclusion tests | design-bar | **DONE** (`TestRetire_ExcludedFromSearch`) |
-| Duplicate trace replay_count tests | design-bar | partial (`TestAddIdempotent_RejectsEmptyUID`; replay_count semantics) |
-| Corrupted memory row recovery test | design-bar | open |
-
-**Status: Sprint 2 acceptance criteria mostly green — the core invariants are tested. Audit/event receipt on every memory mutation is the missing piece.**
-
-### Sprint 3 — Agent Loop Reality Gate
- Deterministic mini corpus: `tests/proof/fixtures/` exists
- search → verify → observer review → playbook seal → second-run retrieval: `scripts/multi_corpus_e2e.sh` + `scripts/playbook_smoke.sh` exercise this; full chain via `scripts/workflow_smoke.sh`
- Negative case observer rejects hallucinated claim: covered by observer_smoke (semantics open for review)
- Health endpoint content-type regression: covered by proof harness `00_health`
-
-**Status: Sprint 3 has working substrate; explicit "single command proves the full loop" with input/output/verdict/receipt evidence is partial.**
-
-### Sprint 4 — Deployment Gate
-**Status: unchanged from R1.** No `REPLICATION.md`, no `.env.example`, no `*.service` units, no `Dockerfile`. `just doctor` is the closest piece. This is the largest open Sprint.
-
---
-
-## New findings from this rerun
-
-Two real findings worth recording.
-
-### F1 — 3 new daemons lack `cmd/<bin>/main_test.go`
- **Where:** `cmd/matrixd/`, `cmd/observerd/`, `cmd/pathwayd/`
- **What:** Same gap-class as R-005 was, just on net-new code. Each daemon mounts ≥4 routes (matrixd: 6, observerd: 4, pathwayd: 9 → 19 routes total) with no wiring test.
- **Severity:** MEDIUM. The internal packages backing each daemon (`internal/matrix`, `internal/observer`, `internal/pathway`) have full unit tests — but no test proves `cmd/pathwayd/main.go` actually wires `/pathway/revise` to `(*pathway.Store).Revise`. A handler-rename refactor would silently break the route surface.
- **Action:** Re-open R-005 against the new daemons. ~1 hr to add three `main_test.go` files patterned on `cmd/storaged/main_test.go`.
-
-### F2 — `scripts/staffing_*/main.go` has hardcoded data paths in flag defaults
- **Where:** `scripts/staffing_candidates/main.go:217` and `scripts/staffing_workers/main.go:269` reference `/home/profit/lakehouse/data/datasets/{candidates,workers_500k}.parquet`.
- **What:** Flag defaults reach into the Rust legacy tree at `/home/profit/lakehouse/...`. Throwaway driver scripts (not services), and the values are flag-overridable, but they couple the Go repo to the Rust filesystem layout.
- **Severity:** LOW. Doesn't affect any service. Worth noting because audit Sprint 4 explicitly calls out "no hardcoded `/home/profit` paths" as an acceptance criterion.
- **Action:** Either move the parquet under `golangLAKEHOUSE/data/` (preferred for self-containment) or document the cross-tree dependency in `RESEARCH_LOG_2026-04-28.md` and accept it.
-
---
-
-## What this rerun does NOT change
-
- **Sprint 4 (deployment) remains the largest open gap.** R-1 said this; R-2 says this; without `REPLICATION.md` + systemd units, the cutover from Rust at `devop.live/lakehouse/` (G5) cannot be operator-validated.
- **Auth is opt-in.** Empty-token default is fine for G0 development but means the moment any Go binary binds non-loopback in prod, a posture decision is required. R-001 + R-007 cannot fully close until that decision is recorded.
- **CORS posture (R-010) is still unspecified.** The Bun-served Rust UI handles browser CORS today; if a Go service ever fronts a browser, this needs a decision.
- **Distillation and drift are first-port-only.** `57d0df1` ships scorer + contamination firewall (E partial); `be65f85` ships scorer-drift only (F first slice). The full distillation pipeline (sample export, audit_baselines lineage) and full drift signal are not yet ported.
-
---
-
-## Recommended next moves (ordered by leverage / cost)
-
-1. **Three `main_test.go` files for `matrixd` + `observerd` + `pathwayd`** (~1 hr). Closes the regenerated R-005, ratchets every future route addition through `just verify`.
-2. **ADR-005: observer fail-safe semantics** (~30 min, doc-only). The observer is ported (`internal/observer/store.go`), but the upstream "verdict:accept on crash" anti-pattern still has no Go-side decision locked. Doing this now is half the cost of doing it after a regression.
-3. **Auth posture decision for non-loopback deploy** (~1 hr, ADR or annotated decision in `RESEARCH_LOG`). Locks R-001 + R-007 from "opt-in middleware exists" to "wired-by-default for X, opt-in for Y". Required input for any G5 cutover plan.
-4. **Sprint 4 minimal first slice** (~3 hr): `secrets-go.toml.example` + `deploy/systemd/<bin>.service.tmpl` × 12 binaries + `REPLICATION.md` skeleton. Highest-leverage Sprint 4 starter; the systemd units mostly mirror Rust's layout.
-5. **Storage-half of fixture mode** (~3 hr): `MockS3Storage` interface satisfying `internal/storaged.Bucket`, smoke variant that points storaged at it. Closes R-006 fully and decouples CI from MinIO.
-
-The remaining items (full drift port, full distillation port, observer audit-event receipt, corrupted-memory recovery test) are real engineering — Sprint 2/3 followups, not Sprint-0 polish.
-
---
-
-## Methodology note — same as prior reports
-
-All claims cite a file, line, or command. Evidence captured under `reports/scrum/_evidence/rerun2/`:
-
- `just_verify.log` — full vet + 30 packages × `go test -short` + 9 core smokes, exit 0, 31s wall
- `just_doctor.log` — 5 dependency probes, all green
- `govet.log` — `go vet ./...` exit 0
- `gotest_short.log` — full short-test pass
- `just_list.log` — recipe inventory
- `smoke_{pathway,matrix,relevance,downgrade,observer,playbook,workflow,storaged_cap}.log` — 8 additional domain smokes, all PASS
-
-What was NOT inspected this round (deferred):
- Cross-binary failure cascades (kill matrixd mid-search, observe observerd state) — Sprint 1 follow-up
- Supply-chain audit of go.sum diffs since R1
- Performance regression vs the perf baseline shipped in `1ec85b0` — `just proof performance` exists, not run here
-
---
-
-_Rerun-2 produced under the same "no vibes" rule as the original audit. The 50/60 reflects what's verifiably shipped at `c7e3124`, not what's planned. Working tree restored from stash after audit completion._
--- a/scripts/candidates_e2e.sh
+++ b/scripts/candidates_e2e.sh
@ -1,98 +0,0 @@
-#!/usr/bin/env bash
-# Candidates end-to-end — first deep-field reality test.
-#
-# Spins up storaged + embedd + vectord + matrixd + gateway, ingests
-# the 1000-candidate corpus from
-# /home/profit/lakehouse/data/datasets/candidates.parquet via the
-# corpusingest substrate, then runs a real staffing query through
-# /v1/matrix/search and prints the top 5 hits.
-#
-# Requires: Ollama on :11434 with nomic-embed-text loaded. If absent,
-# this script exits 0 with a "skipped" message — same contract as
-# g2_smoke.
-#
-# Usage: ./scripts/candidates_e2e.sh
-#        ./scripts/candidates_e2e.sh "your custom query here"
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-QUERY="${1:-Python AWS Docker engineer in Chicago available now}"
-
-if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
-  echo "[candidates-e2e] Ollama not reachable on :11434 — skipping (matches g2_smoke contract)"
-  exit 0
-fi
-
-echo "[candidates-e2e] building binaries..."
-go build -o bin/ ./cmd/storaged ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway ./scripts/staffing_candidates
-
-pkill -f "bin/(storaged|embedd|vectord|matrixd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/e2e.toml"
-cleanup() {
-  echo "[candidates-e2e] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-# Custom toml: vectord persistence disabled so the candidates index
-# doesn't survive the run. Without this, re-running pollutes the
-# shared MinIO `_vectors/` prefix and breaks g1p_smoke's "this is
-# the only persisted index" assertion (caught 2026-04-29).
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[candidates-e2e] launching stack..."
-./bin/storaged -config "$CFG" > /tmp/storaged.log 2>&1 & PIDS+=($!)
-poll_health 3211 || { echo "storaged failed"; tail /tmp/storaged.log; exit 1; }
-
-./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
-poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
-
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
-
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
-
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-echo "[candidates-e2e] stack up; running ingest + reality test query..."
-echo
-./bin/staffing_candidates -query "$QUERY"
--- a/scripts/downgrade_smoke.sh
+++ b/scripts/downgrade_smoke.sh
@ -1,159 +0,0 @@
-#!/usr/bin/env bash
-# Downgrade smoke — strong-model auto-downgrade gate via matrixd.
-# All assertions go through gateway :3110 → /v1/matrix/downgrade.
-#
-# Validates the 5-row truth table from mode.rs::execute pass5:
-#   1. Lakehouse + strong + no force                → DOWNGRADE
-#   2. Lakehouse + strong + forced_mode=true        → keep
-#   3. Lakehouse + strong + force_full_override     → keep
-#   4. Lakehouse + weak (qwen3.5:latest)            → keep
-#   5. Non-lakehouse mode                           → gate not applicable
-#   6. Negative path: empty mode → 400
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[downgrade-smoke] building matrixd + vectord + gateway..."
-go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
-
-pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/downgrade.toml"
-
-cleanup() {
-  echo "[downgrade-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[downgrade-smoke] launching vectord → matrixd → gateway..."
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
-PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; exit 1; }
-
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
-PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; exit 1; }
-
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; exit 1; }
-
-FAILED=0
-URL=http://127.0.0.1:3110/v1/matrix/downgrade
-
-# Helper for body→{mode, downgraded_from} extraction.
-post() {
-  curl -sS -X POST "$URL" -H 'Content-Type: application/json' -d "$1"
-}
-
-# ── 1. Downgrade fires ───────────────────────────────────────────
-echo "[downgrade-smoke] strong model + no force → downgrade fires:"
-RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast"}')"
-M="$(echo "$RESP" | jq -r '.mode')"
-D="$(echo "$RESP" | jq -r '.downgraded_from')"
-if [ "$M" = "codereview_isolation" ] && [ "$D" = "codereview_lakehouse" ]; then
-  echo "  ✓ codereview_lakehouse → codereview_isolation (downgraded_from=lakehouse)"
-else
-  echo "  ✗ mode=$M downgraded_from=$D"; FAILED=1
-fi
-
-# ── 2. Forced mode bypasses ──────────────────────────────────────
-echo "[downgrade-smoke] forced_mode=true bypasses:"
-RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","forced_mode":true}')"
-M="$(echo "$RESP" | jq -r '.mode')"
-D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
-if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
-  echo "  ✓ caller-forced mode preserved, no downgrade"
-else
-  echo "  ✗ mode=$M downgraded_from=$D"; FAILED=1
-fi
-
-# ── 3. force_full_override bypasses ──────────────────────────────
-echo "[downgrade-smoke] force_full_override=true bypasses:"
-RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","force_full_override":true}')"
-M="$(echo "$RESP" | jq -r '.mode')"
-D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
-if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
-  echo "  ✓ env-override bypass, no downgrade"
-else
-  echo "  ✗ mode=$M downgraded_from=$D"; FAILED=1
-fi
-
-# ── 4. Weak model bypasses ───────────────────────────────────────
-echo "[downgrade-smoke] weak model (qwen3.5:latest) bypasses:"
-RESP="$(post '{"mode":"codereview_lakehouse","model":"qwen3.5:latest"}')"
-M="$(echo "$RESP" | jq -r '.mode')"
-D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
-if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
-  echo "  ✓ weak model keeps lakehouse"
-else
-  echo "  ✗ mode=$M downgraded_from=$D"; FAILED=1
-fi
-
-# ── 5. Non-lakehouse mode → gate not applicable ──────────────────
-echo "[downgrade-smoke] non-lakehouse mode → gate not applicable:"
-RESP="$(post '{"mode":"codereview_isolation","model":"x-ai/grok-4.1-fast"}')"
-M="$(echo "$RESP" | jq -r '.mode')"
-D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
-R="$(echo "$RESP" | jq -r '.reason')"
-if [ "$M" = "codereview_isolation" ] && [ "$D" = "" ] && echo "$R" | grep -q "not applicable"; then
-  echo "  ✓ codereview_isolation passes through unchanged"
-else
-  echo "  ✗ mode=$M downgraded_from=$D reason='$R'"; FAILED=1
-fi
-
-# ── 6. Negative: empty mode → 400 ────────────────────────────────
-echo "[downgrade-smoke] empty mode → 400:"
-HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST "$URL" \
-  -H 'Content-Type: application/json' -d '{"mode":"","model":"x"}')"
-if [ "$HTTP" = "400" ]; then
-  echo "  ✓ empty mode → 400"
-else
-  echo "  ✗ got $HTTP"; FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[downgrade-smoke] Downgrade gate acceptance: PASSED"
-  exit 0
-else
-  echo "[downgrade-smoke] Downgrade gate acceptance: FAILED"
-  exit 1
-fi
--- a/scripts/matrix_smoke.sh
+++ b/scripts/matrix_smoke.sh
@ -1,230 +0,0 @@
-#!/usr/bin/env bash
-# Matrix smoke — multi-corpus retrieve+merge via matrixd (SPEC §3.4).
-# All assertions go through gateway :3110.
-#
-# Validates:
-#   - Multi-corpus search returns hits from BOTH corpora
-#   - Each result carries its corpus attribution (load-bearing — losing
-#     it defeats the matrix's purpose)
-#   - Merged top-k is ordered by distance across corpora
-#   - /matrix/corpora lists known indexes
-#   - Empty corpora list → 400
-#   - Bad corpus name → 502 (matrix bubbles vectord's 404 as upstream error)
-#
-# Uses query_vector (not query_text) to skip the embedd dependency so
-# this smoke runs without Ollama. End-to-end embed→matrix→search has
-# its own integration test (next commit).
-#
-# Usage: ./scripts/matrix_smoke.sh
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[matrix-smoke] building matrixd + vectord + gateway..."
-go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
-
-pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/matrix.toml"
-
-cleanup() {
-  echo "[matrix-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-# Custom toml: vectord persistence disabled (don't pollute storaged
-# state with the test corpora).
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[matrix-smoke] launching vectord → matrixd → gateway..."
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
-PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
-
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
-PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
-
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-FAILED=0
-DIM=4
-
-# Create two corpora — corpus_a and corpus_b — each with a few
-# vectors at known distances from a chosen query vector.
-echo "[matrix-smoke] create two corpora:"
-for c in corpus_a corpus_b; do
-  HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/vectors/index \
-    -H 'Content-Type: application/json' \
-    -d "{\"name\":\"$c\",\"dimension\":$DIM,\"distance\":\"euclidean\"}")"
-  if [ "$HTTP" != "201" ]; then echo "  ✗ create $c → $HTTP"; FAILED=1; fi
-done
-echo "  ✓ corpus_a and corpus_b created"
-
-# Add vectors. Use euclidean distance for predictable arithmetic.
-# Query vector will be [1,0,0,0]. Distances from it:
-#   corpus_a/a-near : [1.1, 0, 0, 0]   ≈ 0.1
-#   corpus_a/a-mid  : [1, 0.5, 0, 0]   ≈ 0.5
-#   corpus_a/a-far  : [3, 0, 0, 0]     ≈ 2.0
-#   corpus_b/b-near : [1.05, 0, 0, 0]  ≈ 0.05  (closest globally)
-#   corpus_b/b-mid  : [1, 0.7, 0, 0]   ≈ 0.7
-#   corpus_b/b-far  : [4, 0, 0, 0]     ≈ 3.0
-echo "[matrix-smoke] add vectors to both corpora:"
-curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_a/add" \
-  -H 'Content-Type: application/json' \
-  -d '{"items":[
-    {"id":"a-near","vector":[1.1,0,0,0],"metadata":{"label":"a near"}},
-    {"id":"a-mid","vector":[1,0.5,0,0],"metadata":{"label":"a mid"}},
-    {"id":"a-far","vector":[3,0,0,0],"metadata":{"label":"a far"}}
-  ]}'
-curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_b/add" \
-  -H 'Content-Type: application/json' \
-  -d '{"items":[
-    {"id":"b-near","vector":[1.05,0,0,0],"metadata":{"label":"b near"}},
-    {"id":"b-mid","vector":[1,0.7,0,0],"metadata":{"label":"b mid"}},
-    {"id":"b-far","vector":[4,0,0,0],"metadata":{"label":"b far"}}
-  ]}'
-echo "  ✓ 3 + 3 vectors loaded"
-
-# ── 1. /matrix/corpora lists both ─────────────────────────────────
-echo "[matrix-smoke] /matrix/corpora lists both:"
-RESP="$(curl -sS http://127.0.0.1:3110/v1/matrix/corpora)"
-COUNT="$(echo "$RESP" | jq -r '.count')"
-HAS_A="$(echo "$RESP" | jq -r '.corpora | index("corpus_a") != null')"
-HAS_B="$(echo "$RESP" | jq -r '.corpora | index("corpus_b") != null')"
-if [ "$COUNT" = "2" ] && [ "$HAS_A" = "true" ] && [ "$HAS_B" = "true" ]; then
-  echo "  ✓ count=2, both corpora listed"
-else
-  echo "  ✗ resp: $RESP"; FAILED=1
-fi
-
-# ── 2. multi-corpus search returns hits from BOTH ─────────────────
-echo "[matrix-smoke] /matrix/search multi-corpus retrieve+merge:"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d '{"query_vector":[1,0,0,0],"corpora":["corpus_a","corpus_b"],"k":4,"per_corpus_k":3}')"
-RESULTS_LEN="$(echo "$RESP" | jq -r '.results | length')"
-A_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_a')"
-B_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_b')"
-HAS_A_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_a")] | length > 0')"
-HAS_B_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_b")] | length > 0')"
-if [ "$RESULTS_LEN" = "4" ] && [ "$A_COUNT" = "3" ] && [ "$B_COUNT" = "3" ] && [ "$HAS_A_RESULT" = "true" ] && [ "$HAS_B_RESULT" = "true" ]; then
-  echo "  ✓ 4 merged results · 3+3 per-corpus · both corpora represented"
-else
-  echo "  ✗ len=$RESULTS_LEN per_corpus={a:$A_COUNT b:$B_COUNT} a_hit=$HAS_A_RESULT b_hit=$HAS_B_RESULT"
-  echo "    full: $RESP"
-  FAILED=1
-fi
-
-# ── 3. distance-merged top-k correct across corpora ───────────────
-echo "[matrix-smoke] top hit comes from corpus_b (b-near is globally closest):"
-TOP_ID="$(echo "$RESP" | jq -r '.results[0].id')"
-TOP_CORPUS="$(echo "$RESP" | jq -r '.results[0].corpus')"
-if [ "$TOP_ID" = "b-near" ] && [ "$TOP_CORPUS" = "corpus_b" ]; then
-  echo "  ✓ top hit: id=b-near corpus=corpus_b (closer than corpus_a's a-near)"
-else
-  echo "  ✗ top: id=$TOP_ID corpus=$TOP_CORPUS (expected b-near/corpus_b)"
-  FAILED=1
-fi
-
-# ── 4. corpus attribution preserved in metadata ───────────────────
-echo "[matrix-smoke] metadata preserved on merged results:"
-TOP_LABEL="$(echo "$RESP" | jq -r '.results[0].metadata.label')"
-if [ "$TOP_LABEL" = "b near" ]; then
-  echo "  ✓ metadata.label round-trips through matrix"
-else
-  echo "  ✗ label=$TOP_LABEL"; FAILED=1
-fi
-
-# ── 5. distances ascending in result list ─────────────────────────
-echo "[matrix-smoke] results sorted by distance ascending:"
-ASCENDING="$(echo "$RESP" | jq -r '[.results[].distance] | . == (sort)')"
-if [ "$ASCENDING" = "true" ]; then
-  echo "  ✓ distances ascending"
-else
-  echo "  ✗ distances not sorted: $(echo "$RESP" | jq -c '[.results[].distance]')"
-  FAILED=1
-fi
-
-# ── 6. negative paths ─────────────────────────────────────────────
-echo "[matrix-smoke] empty corpora → 400:"
-HTTP_400="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d '{"query_vector":[1,0,0,0],"corpora":[],"k":4}')"
-echo "[matrix-smoke] missing corpus name → 502:"
-HTTP_502="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d '{"query_vector":[1,0,0,0],"corpora":["does_not_exist"],"k":4}')"
-echo "[matrix-smoke] no query (empty text and vector) → 400:"
-HTTP_400b="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d '{"corpora":["corpus_a"],"k":4}')"
-if [ "$HTTP_400" = "400" ] && [ "$HTTP_502" = "502" ] && [ "$HTTP_400b" = "400" ]; then
-  echo "  ✓ empty=400, missing-corpus=502, no-query=400"
-else
-  echo "  ✗ empty=$HTTP_400 missing=$HTTP_502 noquery=$HTTP_400b"
-  FAILED=1
-fi
-
-# ── 7. metadata filter (component B — staffing-side structured gate)
-echo "[matrix-smoke] metadata_filter drops non-matching results:"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d '{"query_vector":[1,0,0,0],"corpora":["corpus_a","corpus_b"],"k":4,"per_corpus_k":3,
-       "metadata_filter":{"label":["a near","b near"]}}')"
-RESULTS_LEN="$(echo "$RESP" | jq -r '.results | length')"
-DROPPED="$(echo "$RESP" | jq -r '.metadata_filter_dropped')"
-KEPT_LABELS="$(echo "$RESP" | jq -r '[.results[].metadata.label] | sort | join(",")')"
-if [ "$RESULTS_LEN" = "2" ] && [ "$DROPPED" = "4" ] && [ "$KEPT_LABELS" = "a near,b near" ]; then
-  echo "  ✓ filter kept 2 ('a near' + 'b near'), dropped 4 mid/far entries"
-else
-  echo "  ✗ len=$RESULTS_LEN dropped=$DROPPED labels=$KEPT_LABELS"
-  echo "    full: $RESP"
-  FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[matrix-smoke] Matrix acceptance gate: PASSED"
-  exit 0
-else
-  echo "[matrix-smoke] Matrix acceptance gate: FAILED"
-  exit 1
-fi
--- a/scripts/multi_corpus_e2e.sh
+++ b/scripts/multi_corpus_e2e.sh
@ -1,132 +0,0 @@
-#!/usr/bin/env bash
-# Multi-corpus reality test — first deep-field test with TWO real
-# staffing corpora composed via /v1/matrix/search.
-#
-# Pipeline:
-#   - Bring up the Go stack (storaged, embedd, vectord, matrixd, gateway)
-#   - Ingest workers (5000 rows from workers_500k.parquet)
-#   - Ingest candidates (1000 rows from candidates.parquet)
-#   - Run a real query through /v1/matrix/search with both corpora
-#   - Print the merged top-k with corpus attribution
-#
-# Headline assertion: results include hits from BOTH corpora (the
-# whole point of multi-corpus matrix retrieval).
-#
-# Requires: Ollama on :11434 with nomic-embed-text loaded. Skips
-# (exit 0) when Ollama is absent.
-#
-# Usage: ./scripts/multi_corpus_e2e.sh
-#        ./scripts/multi_corpus_e2e.sh "your custom query"
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-QUERY="${1:-Forklift operator with OSHA-30 certification, warehouse experience}"
-WORKERS_LIMIT="${WORKERS_LIMIT:-5000}"
-
-if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
-  echo "[multi-corpus-e2e] Ollama not reachable on :11434 — skipping"
-  exit 0
-fi
-
-echo "[multi-corpus-e2e] building binaries..."
-go build -o bin/ ./cmd/storaged ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway \
-                  ./scripts/staffing_workers ./scripts/staffing_candidates
-
-pkill -f "bin/(storaged|embedd|vectord|matrixd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/e2e.toml"
-
-cleanup() {
-  echo "[multi-corpus-e2e] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-# Ephemeral mode (vectord storaged_url=""); same rationale as
-# candidates_e2e — don't pollute MinIO _vectors/ between runs.
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[multi-corpus-e2e] launching stack..."
-./bin/storaged -config "$CFG" > /tmp/storaged.log 2>&1 & PIDS+=($!)
-poll_health 3211 || { echo "storaged failed"; exit 1; }
-./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
-poll_health 3216 || { echo "embedd failed"; exit 1; }
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; exit 1; }
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; exit 1; }
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; exit 1; }
-
-echo
-echo "[multi-corpus-e2e] ingest workers (limit=$WORKERS_LIMIT)..."
-./bin/staffing_workers -limit "$WORKERS_LIMIT"
-
-echo
-echo "[multi-corpus-e2e] ingest candidates..."
-./bin/staffing_candidates -skip-populate=false -query "$QUERY" 2>&1 | grep -v "^\[candidates\]\(matrix\|reality\)" || true
-
-echo
-echo "[multi-corpus-e2e] /matrix/corpora — confirm both registered:"
-curl -sS http://127.0.0.1:3110/v1/matrix/corpora | jq -c
-
-echo
-echo "[multi-corpus-e2e] multi-corpus query: $QUERY"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d "{\"query_text\":\"$QUERY\",\"corpora\":[\"workers\",\"candidates\"],\"k\":8,\"per_corpus_k\":6}")"
-
-# Sanity / headline assertions
-WORKER_HITS="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="workers")] | length')"
-CAND_HITS="$(echo "$RESP" | jq -r   '[.results[] | select(.corpus=="candidates")] | length')"
-TOTAL="$(echo "$RESP" | jq -r '.results | length')"
-
-echo
-echo "[multi-corpus-e2e] merged top-$TOTAL: workers=$WORKER_HITS candidates=$CAND_HITS"
-echo "$RESP" | jq -r '.results[] | "  \(.corpus | .[0:1])  d=\(.distance | tostring | .[0:6])  \(.id)  \(.metadata.role // .metadata.skills // "n/a")"'
-
-if [ "$WORKER_HITS" -gt 0 ] && [ "$CAND_HITS" -gt 0 ]; then
-  echo
-  echo "[multi-corpus-e2e] PASS: both corpora represented in merged top-$TOTAL"
-  exit 0
-else
-  echo
-  echo "[multi-corpus-e2e] FAIL: corpus mix was workers=$WORKER_HITS candidates=$CAND_HITS"
-  exit 1
-fi
--- a/scripts/observer_smoke.sh
+++ b/scripts/observer_smoke.sh
@ -1,142 +0,0 @@
-#!/usr/bin/env bash
-# Observer smoke — autonomous-iteration witness service end-to-end.
-# All assertions go through gateway :3110.
-#
-# Validates:
-#   - POST /observer/event records an op (success path + scenario source)
-#   - GET /observer/stats aggregates by source + counts successes/failures
-#   - Stats.recent_scenario_ops surfaces scenario digests
-#   - Validation: empty endpoint → 400
-#   - Persistence: kill+restart observerd preserves ops via JSONL replay
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[observer-smoke] building observerd + gateway..."
-go build -o bin/ ./cmd/observerd ./cmd/gateway
-
-pkill -f "bin/(observerd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-PERSIST="$TMP/ops.jsonl"
-CFG="$TMP/observer.toml"
-
-cleanup() {
-  echo "[observer-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-observerd_url = "http://127.0.0.1:3219"
-
-[observerd]
-bind = "127.0.0.1:3219"
-persist_path = "$PERSIST"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-launch_observerd() {
-  ./bin/observerd -config "$CFG" > /tmp/observerd.log 2>&1 &
-  OBSERVERD_PID=$!
-  PIDS+=($OBSERVERD_PID)
-  poll_health 3219 || { echo "observerd failed"; tail /tmp/observerd.log; return 1; }
-}
-
-echo "[observer-smoke] launching observerd → gateway..."
-launch_observerd
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-FAILED=0
-
-# ── 1. Record 5 ops: 3 success + 2 fail across 2 sources ─────────
-echo "[observer-smoke] record 5 ops:"
-for i in 1 2 3; do
-  curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/observer/event \
-    -H 'Content-Type: application/json' \
-    -d "{\"endpoint\":\"/v1/test\",\"input_summary\":\"ok-$i\",\"success\":true,\"duration_ms\":10,\"output_summary\":\"ok\",\"source\":\"mcp\"}"
-done
-for i in 1 2; do
-  curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/observer/event \
-    -H 'Content-Type: application/json' \
-    -d "{\"endpoint\":\"/v1/test\",\"input_summary\":\"fail-$i\",\"success\":false,\"duration_ms\":10,\"output_summary\":\"err\",\"error\":\"boom\",\"source\":\"scenario\",\"staffer_id\":\"st-$i\",\"event_kind\":\"fill\",\"role\":\"Forklift\"}"
-done
-echo "  ✓ 5 events posted"
-
-# ── 2. Stats aggregation ─────────────────────────────────────────
-echo "[observer-smoke] /observer/stats aggregates correctly:"
-STATS="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
-TOT="$(echo "$STATS" | jq -r '.total')"
-OK="$(echo "$STATS" | jq -r '.successes')"
-ERR="$(echo "$STATS" | jq -r '.failures')"
-MCP="$(echo "$STATS" | jq -r '.by_source.mcp')"
-SCEN="$(echo "$STATS" | jq -r '.by_source.scenario')"
-RECENT_LEN="$(echo "$STATS" | jq -r '.recent_scenario_ops | length')"
-if [ "$TOT" = "5" ] && [ "$OK" = "3" ] && [ "$ERR" = "2" ] && [ "$MCP" = "3" ] && [ "$SCEN" = "2" ] && [ "$RECENT_LEN" = "2" ]; then
-  echo "  ✓ total=5 (3 ok + 2 fail) · by_source: mcp=3 scenario=2 · 2 scenario digests"
-else
-  echo "  ✗ total=$TOT ok=$OK err=$ERR mcp=$MCP scen=$SCEN recent=$RECENT_LEN"
-  echo "    full: $STATS"
-  FAILED=1
-fi
-
-# ── 3. Validation: empty endpoint → 400 ──────────────────────────
-echo "[observer-smoke] empty endpoint → 400:"
-HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/observer/event \
-  -H 'Content-Type: application/json' \
-  -d '{"endpoint":"","input_summary":"x","success":true,"duration_ms":1,"output_summary":"x"}')"
-if [ "$HTTP" = "400" ]; then
-  echo "  ✓ empty endpoint rejected"
-else
-  echo "  ✗ got $HTTP"; FAILED=1
-fi
-
-# ── 4. Persistence: kill + restart preserves ops ─────────────────
-echo "[observer-smoke] kill + restart observerd → ops survive:"
-kill $OBSERVERD_PID 2>/dev/null || true
-wait $OBSERVERD_PID 2>/dev/null || true
-sleep 0.3
-launch_observerd
-sleep 0.2
-STATS2="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
-TOT2="$(echo "$STATS2" | jq -r '.total')"
-OK2="$(echo "$STATS2" | jq -r '.successes')"
-ERR2="$(echo "$STATS2" | jq -r '.failures')"
-if [ "$TOT2" = "5" ] && [ "$OK2" = "3" ] && [ "$ERR2" = "2" ]; then
-  echo "  ✓ total=5 ok=3 err=2 preserved through restart"
-else
-  echo "  ✗ post-restart total=$TOT2 ok=$OK2 err=$ERR2"; FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[observer-smoke] Observer acceptance gate: PASSED"
-  exit 0
-else
-  echo "[observer-smoke] Observer acceptance gate: FAILED"
-  exit 1
-fi
--- a/scripts/pathway_smoke.sh
+++ b/scripts/pathway_smoke.sh
@ -1,248 +0,0 @@
-#!/usr/bin/env bash
-# Pathway smoke — pathwayd Mem0-style versioned trace memory (ADR-004).
-# All assertions go through gateway :3110.
-#
-# Validates:
-#   - All 9 HTTP routes (add, add_idempotent, update, revise, retire,
-#     get, history, search, stats)
-#   - Revise creates a predecessor link; History walks the chain
-#     backward (the audit-trail property pathway memory exists for)
-#   - Retire excludes from Search default; still accessible via Get
-#   - AddIdempotent on existing UID bumps replay_count, doesn't replace
-#   - Negative paths: 404 on unknown UIDs, 404 on missing predecessor,
-#     400 on invalid content
-#   - Persistence: kill + restart pathwayd → all traces survive
-#
-# Usage: ./scripts/pathway_smoke.sh
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[pathway-smoke] building pathwayd + gateway..."
-go build -o bin/ ./cmd/pathwayd ./cmd/gateway
-
-pkill -f "bin/(pathwayd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-PERSIST="$TMP/pathway.jsonl"
-CFG="$TMP/pathwayd.toml"
-
-cleanup() {
-  echo "[pathway-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-# Custom toml — same defaults as lakehouse.toml but with persist_path
-# pointing at the temp file so kill+restart actually rehydrates.
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-
-[pathwayd]
-bind = "127.0.0.1:3217"
-persist_path = "$PERSIST"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-launch_pathwayd() {
-  ./bin/pathwayd -config "$CFG" > /tmp/pathwayd.log 2>&1 &
-  PATHWAYD_PID=$!
-  PIDS+=($PATHWAYD_PID)
-  poll_health 3217 || { echo "pathwayd failed"; tail /tmp/pathwayd.log; return 1; }
-}
-
-launch_gateway() {
-  ./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-  PIDS+=($!)
-  poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; return 1; }
-}
-
-echo "[pathway-smoke] launching pathwayd → gateway..."
-launch_pathwayd
-launch_gateway
-
-FAILED=0
-
-# ── 1. Add ────────────────────────────────────────────────────────
-echo "[pathway-smoke] Add → fresh UID + replay_count=1:"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/add \
-  -H 'Content-Type: application/json' \
-  -d '{"content":{"approach":"forklift-OSHA-30","outcome":"hired"},"tags":["staffing","fill"]}')"
-UID_A="$(echo "$RESP" | jq -r '.uid')"
-RC_A="$(echo "$RESP" | jq -r '.replay_count')"
-if [ -n "$UID_A" ] && [ "$UID_A" != "null" ] && [ "$RC_A" = "1" ]; then
-  echo "  ✓ uid=$UID_A replay_count=1"
-else
-  echo "  ✗ resp: $RESP"; FAILED=1
-fi
-
-# ── 2. Get ────────────────────────────────────────────────────────
-echo "[pathway-smoke] Get → returns same trace:"
-RESP="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A")"
-APPROACH="$(echo "$RESP" | jq -r '.content.approach')"
-if [ "$APPROACH" = "forklift-OSHA-30" ]; then
-  echo "  ✓ content.approach round-trips"
-else
-  echo "  ✗ resp: $RESP"; FAILED=1
-fi
-
-# ── 3. AddIdempotent (replay) ─────────────────────────────────────
-echo "[pathway-smoke] AddIdempotent same UID → replay_count++:"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/add_idempotent \
-  -H 'Content-Type: application/json' \
-  -d "{\"uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30\",\"outcome\":\"hired\"}}")"
-RC_REPLAY="$(echo "$RESP" | jq -r '.replay_count')"
-if [ "$RC_REPLAY" = "2" ]; then
-  echo "  ✓ replay_count bumped to 2"
-else
-  echo "  ✗ replay_count=$RC_REPLAY"; FAILED=1
-fi
-
-# ── 4. Update ─────────────────────────────────────────────────────
-echo "[pathway-smoke] Update → in-place content replace:"
-HTTP="$(curl -sS -o "$TMP/upd.json" -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/update \
-  -H 'Content-Type: application/json' \
-  -d "{\"uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30\",\"outcome\":\"hired\",\"note\":\"cert verified\"}}")"
-if [ "$HTTP" = "200" ]; then
-  NOTE="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A" | jq -r '.content.note')"
-  if [ "$NOTE" = "cert verified" ]; then
-    echo "  ✓ Update applied and persisted"
-  else
-    echo "  ✗ note=$NOTE after update"; FAILED=1
-  fi
-else
-  echo "  ✗ Update HTTP=$HTTP"; FAILED=1
-fi
-
-# ── 5. Revise → predecessor link ──────────────────────────────────
-echo "[pathway-smoke] Revise → new UID with predecessor link:"
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/revise \
-  -H 'Content-Type: application/json' \
-  -d "{\"predecessor_uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30+CDL\",\"outcome\":\"upgraded\"},\"tags\":[\"staffing\",\"revision\"]}")"
-UID_B="$(echo "$RESP" | jq -r '.uid')"
-PRED="$(echo "$RESP" | jq -r '.predecessor_uid')"
-if [ "$UID_B" != "$UID_A" ] && [ "$PRED" = "$UID_A" ]; then
-  echo "  ✓ revision uid=$UID_B predecessor=$UID_A"
-else
-  echo "  ✗ uid=$UID_B pred=$PRED"; FAILED=1
-fi
-
-# ── 6. History → 2-trace chain ────────────────────────────────────
-echo "[pathway-smoke] History → walks chain backward:"
-RESP="$(curl -sS "http://127.0.0.1:3110/v1/pathway/history/$UID_B")"
-LEN="$(echo "$RESP" | jq -r '.length')"
-HEAD="$(echo "$RESP" | jq -r '.chain[0].uid')"
-TAIL="$(echo "$RESP" | jq -r '.chain[1].uid')"
-if [ "$LEN" = "2" ] && [ "$HEAD" = "$UID_B" ] && [ "$TAIL" = "$UID_A" ]; then
-  echo "  ✓ chain length=2, [0]=$UID_B [1]=$UID_A"
-else
-  echo "  ✗ len=$LEN head=$HEAD tail=$TAIL"; FAILED=1
-fi
-
-# ── 7. Search by tag ──────────────────────────────────────────────
-echo "[pathway-smoke] Search tag=staffing → finds both traces:"
-COUNT="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-  -H 'Content-Type: application/json' -d '{"tag":"staffing"}' | jq -r '.count')"
-if [ "$COUNT" = "2" ]; then
-  echo "  ✓ tag search count=2"
-else
-  echo "  ✗ count=$COUNT"; FAILED=1
-fi
-
-# ── 8. Retire → excluded from search default, still in Get ────────
-echo "[pathway-smoke] Retire → excluded from Search but Get-able:"
-HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/retire \
-  -H 'Content-Type: application/json' -d "{\"uid\":\"$UID_A\"}")"
-if [ "$HTTP" != "204" ]; then echo "  ✗ retire HTTP=$HTTP"; FAILED=1; fi
-
-# Default search excludes retired → only revision (UID_B) remains
-COUNT_DEFAULT="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-  -H 'Content-Type: application/json' -d '{"tag":"staffing"}' | jq -r '.count')"
-# IncludeRetired=true brings UID_A back
-COUNT_ALL="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-  -H 'Content-Type: application/json' -d '{"tag":"staffing","include_retired":true}' | jq -r '.count')"
-# Get on retired UID still returns the trace (audit trail intact)
-RETIRED_FLAG="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A" | jq -r '.retired')"
-if [ "$COUNT_DEFAULT" = "1" ] && [ "$COUNT_ALL" = "2" ] && [ "$RETIRED_FLAG" = "true" ]; then
-  echo "  ✓ retired excluded from default Search, included with flag, still Get-able"
-else
-  echo "  ✗ default=$COUNT_DEFAULT all=$COUNT_ALL retired=$RETIRED_FLAG"; FAILED=1
-fi
-
-# ── 9. Stats ──────────────────────────────────────────────────────
-echo "[pathway-smoke] Stats → total/active/retired counters:"
-STATS="$(curl -sS http://127.0.0.1:3110/v1/pathway/stats)"
-T="$(echo "$STATS" | jq -r '.Total')"
-A="$(echo "$STATS" | jq -r '.Active')"
-R="$(echo "$STATS" | jq -r '.Retired')"
-if [ "$T" = "2" ] && [ "$A" = "1" ] && [ "$R" = "1" ]; then
-  echo "  ✓ total=2 active=1 retired=1"
-else
-  echo "  ✗ total=$T active=$A retired=$R"; FAILED=1
-fi
-
-# ── 10. Negative paths ────────────────────────────────────────────
-echo "[pathway-smoke] Negative paths → 4xx semantics:"
-GET_404="$(curl -sS -o /dev/null -w '%{http_code}' http://127.0.0.1:3110/v1/pathway/get/no-such-uid)"
-UPD_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/update \
-  -H 'Content-Type: application/json' -d '{"uid":"no-such-uid","content":{}}')"
-REV_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/revise \
-  -H 'Content-Type: application/json' -d '{"predecessor_uid":"no-such-uid","content":{}}')"
-RET_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/retire \
-  -H 'Content-Type: application/json' -d '{"uid":"no-such-uid"}')"
-ADD_400="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/add \
-  -H 'Content-Type: application/json' -d '{"content":not-json}')"
-if [ "$GET_404" = "404" ] && [ "$UPD_404" = "404" ] && [ "$REV_404" = "404" ] && [ "$RET_404" = "404" ] && [ "$ADD_400" = "400" ]; then
-  echo "  ✓ get/update/revise/retire on unknown → 404; bad content → 400"
-else
-  echo "  ✗ get=$GET_404 upd=$UPD_404 rev=$REV_404 ret=$RET_404 add=$ADD_400"; FAILED=1
-fi
-
-# ── 11. Persistence → kill + restart preserves all traces ─────────
-echo "[pathway-smoke] kill + restart pathwayd → state survives:"
-kill $PATHWAYD_PID 2>/dev/null || true
-wait $PATHWAYD_PID 2>/dev/null || true
-sleep 0.3
-launch_pathwayd
-sleep 0.2
-
-# Both traces should reappear, retired flag preserved, replay_count preserved
-RESP_A="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A")"
-RESP_B="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_B")"
-RC_AFTER="$(echo "$RESP_A" | jq -r '.replay_count')"
-RETIRED_AFTER="$(echo "$RESP_A" | jq -r '.retired')"
-PRED_AFTER="$(echo "$RESP_B" | jq -r '.predecessor_uid')"
-if [ "$RC_AFTER" = "2" ] && [ "$RETIRED_AFTER" = "true" ] && [ "$PRED_AFTER" = "$UID_A" ]; then
-  echo "  ✓ replay_count, retired flag, predecessor link all preserved"
-else
-  echo "  ✗ replay_count=$RC_AFTER retired=$RETIRED_AFTER pred=$PRED_AFTER"; FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[pathway-smoke] Pathway acceptance gate: PASSED"
-  exit 0
-else
-  echo "[pathway-smoke] Pathway acceptance gate: FAILED"
-  exit 1
-fi
--- a/scripts/playbook_smoke.sh
+++ b/scripts/playbook_smoke.sh
@ -1,198 +0,0 @@
-#!/usr/bin/env bash
-# Playbook smoke — learning-loop integration end-to-end.
-# All assertions go through gateway :3110.
-#
-# Validates the full boost cycle:
-#   1. Build a test corpus with 3 items
-#   2. Query → get baseline ranking
-#   3. Record a playbook: query → bottom-ranked answer with score=1.0
-#   4. Re-query with use_playbook=true
-#   5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost
-#      math: distance' = distance × (1 - 0.5×score))
-#   6. Assert: PlaybookBoosted >= 1 in the response
-#
-# Requires Ollama on :11434 with nomic-embed-text loaded — Record
-# embeds the query_text. Skips (exit 0) when Ollama is absent.
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
-  echo "[playbook-smoke] Ollama not reachable on :11434 — skipping"
-  exit 0
-fi
-
-echo "[playbook-smoke] building stack..."
-go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway
-
-pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/playbook.toml"
-
-cleanup() {
-  echo "[playbook-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..."
-./bin/embedd  -config "$CFG" > /tmp/embedd.log  2>&1 & PIDS+=($!)
-poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-FAILED=0
-
-# Embed three corpus items + the query, all via /v1/embed.
-echo "[playbook-smoke] embedding 3 corpus items + query..."
-EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-  -H 'Content-Type: application/json' \
-  -d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')"
-V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')"
-V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')"
-V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')"
-V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')"
-
-# Build corpus
-echo "[playbook-smoke] create corpus widgets + add 3 items..."
-curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \
-  -H 'Content-Type: application/json' \
-  -d '{"name":"widgets","dimension":768,"distance":"cosine"}'
-curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \
-  -H 'Content-Type: application/json' \
-  -d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \
-    '{items:[
-      {id:"widget-a", vector:$va, metadata:{label:"a"}},
-      {id:"widget-b", vector:$vb, metadata:{label:"b"}},
-      {id:"widget-c", vector:$vc, metadata:{label:"c"}}
-    ]}')"
-
-# Baseline matrix search (no playbook) — using query_vector to skip
-# embedd round-trip and keep the test deterministic on the geometry
-# we know.
-echo "[playbook-smoke] baseline search (no playbook):"
-BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")"
-BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')"
-BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
-echo "  baseline order: $BASE_ORDER  widget-c distance=$BASE_C_DIST"
-
-# Record a playbook entry for the query → widget-c (use the same
-# query_text that the playbook will be re-queried by, exact match).
-QUERY_TEXT="alpha staffing query test full prompt"
-echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0"
-RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \
-  -H 'Content-Type: application/json' \
-  -d "$(jq -n --arg q "$QUERY_TEXT" \
-    '{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")"
-PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')"
-if [ -z "$PB_ID" ]; then
-  echo "  ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1
-else
-  echo "  ✓ playbook_id=$PB_ID"
-fi
-
-# Re-search with use_playbook=true. Use query_text so matrixd embeds
-# it again (proves end-to-end). The newly-recorded playbook entry has
-# the SAME query_text → cosine distance ~0 → boost applies to widget-c.
-echo "[playbook-smoke] boosted search (use_playbook=true):"
-BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-  -H 'Content-Type: application/json' \
-  -d "$(jq -n --arg q "$QUERY_TEXT" \
-    '{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")"
-BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')"
-BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
-PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')"
-echo "  boosted order: $BOOST_ORDER  widget-c distance=$BOOST_C_DIST  playbook_boosted=$PB_BOOSTED"
-
-# ── Assertion 1: PlaybookBoosted >= 1 ────────────────────────────
-if [ "$PB_BOOSTED" -ge 1 ]; then
-  echo "  ✓ playbook_boosted=$PB_BOOSTED ≥ 1"
-else
-  echo "  ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1
-fi
-
-# ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor)
-# Allow some tolerance because the query and recorded query may not
-# be byte-identical depending on Ollama's tokenization stability.
-RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')"
-echo "  widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)"
-WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')"
-if [ "$WITHIN" = "true" ]; then
-  echo "  ✓ ratio in [0.40, 0.60] — boost applied correctly"
-else
-  echo "  ✗ ratio out of band: $RATIO"; FAILED=1
-fi
-
-# ── 4. /matrix/playbooks/bulk — component C (operational rating wiring)
-echo "[playbook-smoke] bulk record 3 entries:"
-BULK_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/bulk \
-  -H 'Content-Type: application/json' \
-  -d "$(jq -n '{
-    entries: [
-      {query_text: "alpha test query",   answer_id: "widget-a", answer_corpus: "widgets", score: 0.9},
-      {query_text: "bravo test query",   answer_id: "widget-b", answer_corpus: "widgets", score: 0.8},
-      {query_text: "",                   answer_id: "x",        answer_corpus: "widgets", score: 0.5}
-    ]
-  }')")"
-RECORDED="$(echo "$BULK_RESP" | jq -r '.recorded')"
-FAIL="$(echo "$BULK_RESP" | jq -r '.failed')"
-GOT_PB_A="$(echo "$BULK_RESP" | jq -r '.results[0].playbook_id // empty')"
-ERR_BAD="$(echo "$BULK_RESP" | jq -r '.results[2].error // empty')"
-if [ "$RECORDED" = "2" ] && [ "$FAIL" = "1" ] && [ -n "$GOT_PB_A" ] && [ -n "$ERR_BAD" ]; then
-  echo "  ✓ 2 recorded, 1 failed (empty query_text caught), per-entry IDs/errors returned"
-else
-  echo "  ✗ recorded=$RECORDED failed=$FAIL pb_a=$GOT_PB_A err=$ERR_BAD"
-  echo "    full: $BULK_RESP"
-  FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[playbook-smoke] Playbook acceptance gate: PASSED"
-  exit 0
-else
-  echo "[playbook-smoke] Playbook acceptance gate: FAILED"
-  exit 1
-fi
--- a/scripts/relevance_smoke.sh
+++ b/scripts/relevance_smoke.sh
@ -1,156 +0,0 @@
-#!/usr/bin/env bash
-# Relevance smoke — code-relevance filter via matrixd /matrix/relevance.
-# All assertions go through gateway :3110.
-#
-# Validates the headline adjacency-pollution scenario:
-#   Focus: crates/queryd/src/db.go which defines Connector.
-#   Chunk A is about Connector → kept (defined_match).
-#   Chunk B is about catalogd::Registry which db.go imports → outranked
-#     by Chunk A.
-#   Chunk C is unrelated → dropped (no signals fire).
-#
-# Plus negative paths:
-#   - Empty chunks → 400
-#   - Threshold honored when set explicitly
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[relevance-smoke] building matrixd + vectord + gateway..."
-go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
-
-pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/relevance.toml"
-
-cleanup() {
-  echo "[relevance-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-# Custom toml: vectord persistence disabled. /matrix/relevance doesn't
-# touch vectord at all, but matrixd config requires the URL anyway.
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-
-[vectord]
-bind = "127.0.0.1:3215"
-storaged_url = ""
-
-[matrixd]
-bind = "127.0.0.1:3218"
-embedd_url  = "http://127.0.0.1:3216"
-vectord_url = "http://127.0.0.1:3215"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[relevance-smoke] launching vectord → matrixd → gateway..."
-./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
-PIDS+=($!)
-poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
-
-./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
-PIDS+=($!)
-poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
-
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-FAILED=0
-
-# ── 1. Adjacency-pollution scenario ──────────────────────────────
-echo "[relevance-smoke] adjacency-pollution: Connector outranks Registry, junk dropped:"
-PAYLOAD='{
-  "focus": {
-    "Path": "crates/queryd/src/db.go",
-    "Content": "pub struct Connector {}\npub fn open_connector() *Connector { return nil }\nuse catalogd::Registry;"
-  },
-  "chunks": [
-    {"source":"lakehouse_symbols_v1","doc_id":"symbol:queryd::struct::Connector","text":"Connector wraps the DuckDB handle. open_connector creates one.","score":0.9},
-    {"source":"lakehouse_symbols_v1","doc_id":"symbol:catalogd::struct::Registry","text":"Registry stores manifests. Used by ingestd.","score":0.85},
-    {"source":"lakehouse_symbols_v1","doc_id":"symbol:totally_other::Thing","text":"completely unrelated text about something else entirely","score":0.7}
-  ],
-  "threshold": 0.3
-}'
-RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD")"
-
-# Connector chunk should be in kept
-CONNECTOR_KEPT="$(echo "$RESP" | jq -r '[.kept[] | select(.doc_id | contains("Connector"))] | length')"
-# The unrelated junk chunk should be in dropped
-JUNK_DROPPED="$(echo "$RESP" | jq -r '[.dropped[] | select(.doc_id | contains("Thing"))] | length')"
-# Connector should outrank Registry (whichever bucket they end up in)
-CONN_REL="$(echo "$RESP" | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Connector"))] | .[0].relevance // -999')"
-REG_REL="$(echo "$RESP"  | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Registry"))]  | .[0].relevance // -999')"
-TOTAL_IN="$(echo "$RESP" | jq -r '.total_in')"
-
-CONN_OUTRANKS_REG="$(awk -v a="$CONN_REL" -v b="$REG_REL" 'BEGIN{print (a>b)?"true":"false"}')"
-
-if [ "$CONNECTOR_KEPT" = "1" ] && [ "$JUNK_DROPPED" = "1" ] && [ "$CONN_OUTRANKS_REG" = "true" ] && [ "$TOTAL_IN" = "3" ]; then
-  echo "  ✓ Connector kept, junk dropped, Connector ($CONN_REL) > Registry ($REG_REL)"
-else
-  echo "  ✗ kept_connector=$CONNECTOR_KEPT dropped_junk=$JUNK_DROPPED conn=$CONN_REL reg=$REG_REL total=$TOTAL_IN"
-  echo "    full: $RESP"
-  FAILED=1
-fi
-
-# ── 2. Empty chunks → 400 ────────────────────────────────────────
-echo "[relevance-smoke] empty chunks → 400:"
-HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/relevance \
-  -H 'Content-Type: application/json' \
-  -d '{"focus":{"Path":"x"},"chunks":[]}')"
-if [ "$HTTP" = "400" ]; then
-  echo "  ✓ 400 on empty chunks"
-else
-  echo "  ✗ got $HTTP"; FAILED=1
-fi
-
-# ── 3. Threshold honored ─────────────────────────────────────────
-echo "[relevance-smoke] threshold=10 (impossibly high) drops everything:"
-PAYLOAD2='{
-  "focus": {"Path": "x.go", "Content": "pub fn known() {}", "DefinedSymbols": ["known"]},
-  "chunks": [
-    {"source":"s","doc_id":"d1","text":"known appears here","score":0.9}
-  ],
-  "threshold": 10
-}'
-RESP2="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD2")"
-KEPT_COUNT="$(echo "$RESP2" | jq -r '.kept | length')"
-DROP_COUNT="$(echo "$RESP2" | jq -r '.dropped | length')"
-if [ "$KEPT_COUNT" = "0" ] && [ "$DROP_COUNT" = "1" ]; then
-  echo "  ✓ threshold=10 drops everything (0 kept / 1 dropped)"
-else
-  echo "  ✗ kept=$KEPT_COUNT dropped=$DROP_COUNT"; FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[relevance-smoke] Relevance acceptance gate: PASSED"
-  exit 0
-else
-  echo "[relevance-smoke] Relevance acceptance gate: FAILED"
-  exit 1
-fi
--- a/scripts/staffing_500k/main.go
+++ b/scripts/staffing_500k/main.go
@ -1,14 +1,13 @@
-// Staffing co-pilot scale test driver — workers_500k corpus.
+// Staffing co-pilot scale test driver.
 //
-// Pipeline: workers_500k.csv → /v1/embed → /v1/vectors/index/workers_500k/add.
-// The pipeline itself lives in internal/corpusingest; this driver
-// provides the CSV → Row mapping and the post-ingest semantic queries
-// that are the human-readable check ("does forklift OSHA-30 actually
-// retrieve forklift workers?").
+// Pipeline: workers_500k.csv → /v1/embed (batched, parallel) →
+// /v1/vectors/index/workers_500k/add (batched). Then runs a handful
+// of semantic queries against the populated index and prints the
+// top hits — the human-readable check that "find workers like X"
+// actually returns relevant workers.
 //
-// Designed to be re-run safely; index gets DELETEd at the start
-// when -drop is set so leftover state doesn't bias recall.
-
+// Designed to be re-run; index gets DELETEd at the start so leftover
+// state from prior runs doesn't bias recall.
 package main

 import (
@ -16,138 +15,69 @@ import (
 	"context"
 	"encoding/csv"
 	"encoding/json"
-	"errors"
 	"flag"
 	"fmt"
+	"io"
 	"log"
 	"net/http"
 	"os"
 	"strings"
+	"sync"
+	"sync/atomic"
 	"time"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
 )

 const (
 	indexName = "workers_500k"
 	dim       = 768

-	// Column indexes in workers_500k.csv. Stable contract; if the CSV
-	// schema changes these need updating.
-	colWorkerID = 0
-	colName     = 1
-	colRole     = 2
-	colCity     = 5
-	colState    = 6
-	colSkills   = 8
-	colCerts    = 9
-	colResume   = 17
+	embedConcurrency = 8    // matches Ollama-on-A4000 sweet spot
+	embedBatchSize   = 16   // texts per /v1/embed call
+	addBatchSize     = 1000 // items per /v1/vectors/index/add call
+
+	maxColPhone   = 4
+	maxColCity    = 5
+	maxColState   = 6
+	maxColRole    = 2
+	maxColSkills  = 8
+	maxColCerts   = 9
+	maxColResume  = 17
+	colWorkerID   = 0
+	colName       = 1
 )

-// workersCSV implements corpusingest.Source. CSV reader state +
-// row → Row mapping live here; the embed/add pipeline is generic.
-type workersCSV struct {
-	cr *csv.Reader
-}
-
-func (s *workersCSV) Next() (corpusingest.Row, error) {
-	for {
-		row, err := s.cr.Read()
-		if err != nil {
-			return corpusingest.Row{}, err
-		}
-		if len(row) <= colResume {
-			continue // skip malformed rows; matches prior behavior
-		}
-		id := strings.TrimSpace(row[colWorkerID])
-		return corpusingest.Row{
-			ID:   "w-" + id,
-			Text: buildWorkerText(row),
-			Metadata: map[string]any{
-				"name":  row[colName],
-				"role":  row[colRole],
-				"city":  row[colCity],
-				"state": row[colState],
-			},
-		}, nil
-	}
-}
-
-// buildWorkerText concatenates staffing-relevant columns into the
-// embed-text. Order: role first (most semantically dense), then
-// location, skills, certs, prose resume. Embedding models weight
-// earlier tokens slightly more, so the front matter matters.
-func buildWorkerText(row []string) string {
-	var b strings.Builder
-	b.WriteString(row[colRole])
-	b.WriteString(" in ")
-	b.WriteString(row[colCity])
-	b.WriteString(", ")
-	b.WriteString(row[colState])
-	b.WriteString(". Skills: ")
-	b.WriteString(row[colSkills])
-	b.WriteString(". Certifications: ")
-	b.WriteString(row[colCerts])
-	b.WriteString(". ")
-	b.WriteString(row[colResume])
-	return b.String()
-}
-
 func main() {
 	var (
-		gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
-		csvPath = flag.String("csv", "/tmp/rs/workers_500k.csv", "path to workers CSV")
-		limit   = flag.Int("limit", 0, "limit rows (0 = all)")
-		queries = flag.String("queries", "default", "default | <semicolon-separated query strings>")
-		skipPop = flag.Bool("skip-populate", false, "skip embed+add, only run queries")
-		drop    = flag.Bool("drop", true, "DELETE index before populate (default true for clean recall)")
+		gateway  = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
+		csvPath  = flag.String("csv", "/tmp/rs/workers_500k.csv", "path to workers CSV")
+		limit    = flag.Int("limit", 0, "limit rows (0 = all)")
+		queries  = flag.String("queries", "default", "default | <semicolon-separated query strings>")
+		skipPop  = flag.Bool("skip-populate", false, "skip embed+add, only run queries")
 	)
 	flag.Parse()

 	hc := &http.Client{Timeout: 5 * time.Minute}
-	ctx := context.Background()

 	if !*skipPop {
-		f, err := os.Open(*csvPath)
-		if err != nil {
-			log.Fatalf("open csv: %v", err)
-		}
-		defer f.Close()
-		cr := csv.NewReader(f)
-		cr.FieldsPerRecord = -1
-		if _, err := cr.Read(); err != nil { // skip header
-			log.Fatalf("read header: %v", err)
-		}
+		// Tear down any prior index so recall is on a fresh build.
+		fmt.Printf("[sc] DELETE %s/v1/vectors/index/%s (idempotent cleanup)\n", *gateway, indexName)
+		_ = httpDelete(hc, *gateway+"/v1/vectors/index/"+indexName)

-		stats, err := corpusingest.Run(ctx, corpusingest.Config{
-			GatewayURL:   *gateway,
-			IndexName:    indexName,
-			Dimension:    dim,
-			Distance:     "cosine",
-			EmbedBatch:   16,   // matches Ollama-on-A4000 sweet spot
-			EmbedWorkers: 8,    // matches Ollama-on-A4000 sweet spot
-			AddBatch:     1000, // empirically fine; vectord BatchAdd lock-amortized at f1c1883
-			Limit:        *limit,
-			DropExisting: *drop,
-			HTTPClient:   hc,
-			LogProgress:  10 * time.Second,
-		}, &workersCSV{cr: cr})
-		if err != nil {
-			// ErrPartialFailure means SOME batches failed but we still
-			// have a corpus to query. Report and continue rather than
-			// nuking the run for transient Ollama hiccups.
-			if errors.Is(err, corpusingest.ErrPartialFailure) {
-				fmt.Printf("[sc] WARN partial failure: %v\n", err)
-			} else {
-				log.Fatalf("ingest: %v", err)
-			}
+		// Create the index.
+		body := map[string]any{"name": indexName, "dimension": dim, "distance": "cosine"}
+		if code, msg := httpPostJSON(hc, *gateway+"/v1/vectors/index", body); code != 201 {
+			log.Fatalf("create index: %d %s", code, msg)
 		}
-		fmt.Printf("[sc] populate done: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
-			stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
-			stats.Wall.Round(time.Millisecond))
+		fmt.Println("[sc] created index workers_500k dim=768 cosine")
+
+		t0 := time.Now()
+		if err := populate(hc, *gateway, *csvPath, *limit); err != nil {
+			log.Fatal(err)
+		}
+		fmt.Printf("[sc] populate complete in %v\n", time.Since(t0))
 	}

-	// Validate semantic queries against the populated index.
+	// Validate semantic queries.
 	qs := defaultQueries()
 	if *queries != "default" {
 		qs = strings.Split(*queries, ";")
@ -167,35 +97,196 @@ func defaultQueries() []string {
 	}
 }

-// runQuery embeds a query, searches the index, prints top hits.
-// Stays in this driver (not corpusingest) — query validation is
-// per-corpus concern, not part of the ingest pipeline.
-func runQuery(hc *http.Client, gateway, q string) {
-	t0 := time.Now()
-	body, _ := json.Marshal(map[string]any{"texts": []string{q}})
-	req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/embed", bytes.NewReader(body))
+func populate(hc *http.Client, gateway, csvPath string, limit int) error {
+	f, err := os.Open(csvPath)
+	if err != nil {
+		return fmt.Errorf("open csv: %w", err)
+	}
+	defer f.Close()
+	cr := csv.NewReader(f)
+	cr.FieldsPerRecord = -1
+	if _, err := cr.Read(); err != nil { // header
+		return fmt.Errorf("read header: %w", err)
+	}
+
+	type job struct {
+		ids   []string
+		texts []string
+		metas []json.RawMessage
+	}
+
+	jobs := make(chan job, embedConcurrency*2)
+	var wg sync.WaitGroup
+	var (
+		totalEmbedded int64
+		totalAdded    int64
+	)
+
+	for i := 0; i < embedConcurrency; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for j := range jobs {
+				vecs, err := embedBatch(hc, gateway, j.texts)
+				if err != nil {
+					log.Printf("embed batch (%d items): %v", len(j.texts), err)
+					continue
+				}
+				atomic.AddInt64(&totalEmbedded, int64(len(vecs)))
+				if err := addBatch(hc, gateway, j.ids, vecs, j.metas); err != nil {
+					log.Printf("add batch (%d items): %v", len(j.ids), err)
+					continue
+				}
+				atomic.AddInt64(&totalAdded, int64(len(j.ids)))
+			}
+		}()
+	}
+
+	progressTicker := time.NewTicker(10 * time.Second)
+	go func() {
+		for range progressTicker.C {
+			fmt.Printf("[sc] progress: embedded=%d added=%d\n",
+				atomic.LoadInt64(&totalEmbedded), atomic.LoadInt64(&totalAdded))
+		}
+	}()
+	defer progressTicker.Stop()
+
+	curIDs := make([]string, 0, embedBatchSize)
+	curTexts := make([]string, 0, embedBatchSize)
+	curMetas := make([]json.RawMessage, 0, embedBatchSize)
+	rows := 0
+	for {
+		row, err := cr.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return fmt.Errorf("csv read row %d: %w", rows, err)
+		}
+		if len(row) <= maxColResume {
+			continue
+		}
+		id := strings.TrimSpace(row[colWorkerID])
+		text := buildSearchText(row)
+		meta, _ := json.Marshal(map[string]any{
+			"name":  row[colName],
+			"role":  row[maxColRole],
+			"city":  row[maxColCity],
+			"state": row[maxColState],
+		})
+		curIDs = append(curIDs, "w-"+id)
+		curTexts = append(curTexts, text)
+		curMetas = append(curMetas, meta)
+
+		if len(curIDs) >= embedBatchSize {
+			jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
+			curIDs = make([]string, 0, embedBatchSize)
+			curTexts = make([]string, 0, embedBatchSize)
+			curMetas = make([]json.RawMessage, 0, embedBatchSize)
+		}
+		rows++
+		if limit > 0 && rows >= limit {
+			break
+		}
+	}
+	if len(curIDs) > 0 {
+		jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
+	}
+	close(jobs)
+	wg.Wait()
+
+	fmt.Printf("[sc] final: scanned=%d embedded=%d added=%d\n",
+		rows, atomic.LoadInt64(&totalEmbedded), atomic.LoadInt64(&totalAdded))
+	return nil
+}
+
+// buildSearchText concatenates the staffing-relevant columns into
+// the text that gets embedded. Order: role first (most semantically
+// dense), then skills + certs, city/state, finally the prose
+// resume_text. Embedding models weight earlier tokens slightly more.
+func buildSearchText(row []string) string {
+	var b strings.Builder
+	b.WriteString(row[maxColRole])
+	b.WriteString(" in ")
+	b.WriteString(row[maxColCity])
+	b.WriteString(", ")
+	b.WriteString(row[maxColState])
+	b.WriteString(". Skills: ")
+	b.WriteString(row[maxColSkills])
+	b.WriteString(". Certifications: ")
+	b.WriteString(row[maxColCerts])
+	b.WriteString(". ")
+	b.WriteString(row[maxColResume])
+	return b.String()
+}
+
+func embedBatch(hc *http.Client, gateway string, texts []string) ([][]float32, error) {
+	body := map[string]any{"texts": texts}
+	bs, _ := json.Marshal(body)
+	req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/embed", bytes.NewReader(bs))
 	req.Header.Set("Content-Type", "application/json")
 	resp, err := hc.Do(req)
 	if err != nil {
-		fmt.Printf("[sc] query %q: embed err: %v\n", q, err)
-		return
+		return nil, err
 	}
 	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
+		return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, string(preview))
+	}
 	var er struct {
 		Vectors [][]float32 `json:"vectors"`
 	}
-	if err := json.NewDecoder(resp.Body).Decode(&er); err != nil || len(er.Vectors) == 0 {
-		fmt.Printf("[sc] query %q: embed decode err: %v\n", q, err)
+	if err := json.NewDecoder(resp.Body).Decode(&er); err != nil {
+		return nil, err
+	}
+	return er.Vectors, nil
+}
+
+type addItem struct {
+	ID       string          `json:"id"`
+	Vector   []float32       `json:"vector"`
+	Metadata json.RawMessage `json:"metadata"`
+}
+
+func addBatch(hc *http.Client, gateway string, ids []string, vecs [][]float32, metas []json.RawMessage) error {
+	items := make([]addItem, len(ids))
+	for i := range ids {
+		items[i] = addItem{ID: ids[i], Vector: vecs[i], Metadata: metas[i]}
+	}
+	bs, _ := json.Marshal(map[string]any{"items": items})
+	req, _ := http.NewRequest(http.MethodPost,
+		gateway+"/v1/vectors/index/"+indexName+"/add", bytes.NewReader(bs))
+	req.Header.Set("Content-Type", "application/json")
+	resp, err := hc.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
+		return fmt.Errorf("add status %d: %s", resp.StatusCode, string(preview))
+	}
+	return nil
+}
+
+func runQuery(hc *http.Client, gateway, q string) {
+	t0 := time.Now()
+	// 1. Embed the query.
+	vecs, err := embedBatch(hc, gateway, []string{q})
+	if err != nil || len(vecs) == 0 {
+		fmt.Printf("[sc] query %q: embed err: %v\n", q, err)
 		return
 	}
 	embedDur := time.Since(t0)
-
 	t1 := time.Now()
-	body, _ = json.Marshal(map[string]any{"vector": er.Vectors[0], "k": 5})
-	req, _ = http.NewRequest(http.MethodPost,
-		gateway+"/v1/vectors/index/"+indexName+"/search", bytes.NewReader(body))
+	// 2. Search.
+	body := map[string]any{"vector": vecs[0], "k": 5}
+	bs, _ := json.Marshal(body)
+	req, _ := http.NewRequest(http.MethodPost,
+		gateway+"/v1/vectors/index/"+indexName+"/search", bytes.NewReader(bs))
 	req.Header.Set("Content-Type", "application/json")
-	resp, err = hc.Do(req)
+	resp, err := hc.Do(req)
 	if err != nil {
 		fmt.Printf("[sc] query %q: search err: %v\n", q, err)
 		return
@ -219,3 +310,29 @@ func runQuery(hc *http.Client, gateway, q string) {
 	}
 }

+func httpPostJSON(hc *http.Client, url string, body any) (int, string) {
+	bs, _ := json.Marshal(body)
+	req, _ := http.NewRequest(http.MethodPost, url, bytes.NewReader(bs))
+	req.Header.Set("Content-Type", "application/json")
+	resp, err := hc.Do(req)
+	if err != nil {
+		return 0, err.Error()
+	}
+	defer resp.Body.Close()
+	preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
+	return resp.StatusCode, string(preview)
+}
+
+func httpDelete(hc *http.Client, url string) error {
+	req, _ := http.NewRequest(http.MethodDelete, url, nil)
+	resp, err := hc.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+	io.Copy(io.Discard, resp.Body)
+	return nil
+}
+
+// keep context.Background reachable in case future paths use it
+var _ = context.Background
--- a/scripts/staffing_candidates/main.go
+++ b/scripts/staffing_candidates/main.go
@ -1,303 +0,0 @@
-// Staffing candidates corpus driver — second corpus on the Go side
-// after workers_500k. Validates the corpusingest substrate against
-// real production-shape parquet data and gives the matrix indexer a
-// second corpus to compose against.
-//
-// Source: /home/profit/lakehouse/data/datasets/candidates.parquet
-// (1000 candidates, 11 columns including skills + status + years).
-//
-// IDs are prefixed "c-" so merged matrix results across corpora
-// stay unambiguous (workers use "w-").
-//
-// Post-ingest: runs a real staffing query through /v1/matrix/search
-// against just the candidates corpus — first deep-field reality test
-// using the new pipeline.
-
-package main
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"errors"
-	"flag"
-	"fmt"
-	"io"
-	"log"
-	"net/http"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/apache/arrow-go/v18/arrow/array"
-	"github.com/apache/arrow-go/v18/arrow/memory"
-	"github.com/apache/arrow-go/v18/parquet/file"
-	"github.com/apache/arrow-go/v18/parquet/pqarrow"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
-)
-
-const (
-	indexName = "candidates"
-	dim       = 768
-)
-
-// candidatesSource implements corpusingest.Source over an in-memory
-// arrow.Table loaded from candidates.parquet. 1000 rows fits
-// comfortably in RAM; a chunked-record-batch reader is the next
-// abstraction when a multi-million-row parquet shows up.
-type candidatesSource struct {
-	cols struct {
-		id, firstName, lastName, email, phone, city, state, skills, status *array.String
-		years, rate                                                        *array.Int64
-	}
-	n   int
-	cur int
-}
-
-func newCandidatesSource(path string) (*candidatesSource, func(), error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return nil, nil, fmt.Errorf("open parquet: %w", err)
-	}
-	pf, err := file.NewParquetReader(f)
-	if err != nil {
-		f.Close()
-		return nil, nil, fmt.Errorf("parquet reader: %w", err)
-	}
-	fr, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
-	if err != nil {
-		pf.Close()
-		f.Close()
-		return nil, nil, fmt.Errorf("arrow reader: %w", err)
-	}
-	table, err := fr.ReadTable(context.Background())
-	if err != nil {
-		pf.Close()
-		f.Close()
-		return nil, nil, fmt.Errorf("read table: %w", err)
-	}
-
-	src := &candidatesSource{n: int(table.NumRows())}
-	schema := table.Schema()
-
-	stringColByName := func(name string) (*array.String, error) {
-		idx := schema.FieldIndices(name)
-		if len(idx) == 0 {
-			return nil, fmt.Errorf("column %q not found", name)
-		}
-		ch := table.Column(idx[0]).Data()
-		if ch.Len() == 0 {
-			return nil, fmt.Errorf("column %q empty", name)
-		}
-		// Single-chunk assumption — ReadTable on a single-row-group
-		// 1000-row parquet returns one chunk. If parquets get larger,
-		// switch to RecordReader and iterate chunks.
-		if n := len(ch.Chunks()); n != 1 {
-			return nil, fmt.Errorf("column %q has %d chunks; only 1 supported here", name, n)
-		}
-		s, ok := ch.Chunk(0).(*array.String)
-		if !ok {
-			return nil, fmt.Errorf("column %q is %T, want *array.String", name, ch.Chunk(0))
-		}
-		return s, nil
-	}
-	int64ColByName := func(name string) (*array.Int64, error) {
-		idx := schema.FieldIndices(name)
-		if len(idx) == 0 {
-			return nil, fmt.Errorf("column %q not found", name)
-		}
-		ch := table.Column(idx[0]).Data()
-		i, ok := ch.Chunk(0).(*array.Int64)
-		if !ok {
-			return nil, fmt.Errorf("column %q is %T, want *array.Int64", name, ch.Chunk(0))
-		}
-		return i, nil
-	}
-
-	cleanup := func() {
-		table.Release()
-		pf.Close()
-		f.Close()
-	}
-	for _, t := range []struct {
-		name string
-		dst  **array.String
-	}{
-		{"candidate_id", &src.cols.id},
-		{"first_name", &src.cols.firstName},
-		{"last_name", &src.cols.lastName},
-		{"email", &src.cols.email},
-		{"phone", &src.cols.phone},
-		{"city", &src.cols.city},
-		{"state", &src.cols.state},
-		{"skills", &src.cols.skills},
-		{"status", &src.cols.status},
-	} {
-		col, err := stringColByName(t.name)
-		if err != nil {
-			cleanup()
-			return nil, nil, err
-		}
-		*t.dst = col
-	}
-	for _, t := range []struct {
-		name string
-		dst  **array.Int64
-	}{
-		{"years_experience", &src.cols.years},
-		{"hourly_rate_usd", &src.cols.rate},
-	} {
-		col, err := int64ColByName(t.name)
-		if err != nil {
-			cleanup()
-			return nil, nil, err
-		}
-		*t.dst = col
-	}
-	return src, cleanup, nil
-}
-
-func (s *candidatesSource) Next() (corpusingest.Row, error) {
-	if s.cur >= s.n {
-		return corpusingest.Row{}, io.EOF
-	}
-	i := s.cur
-	s.cur++
-
-	candidateID := s.cols.id.Value(i)
-	firstName := s.cols.firstName.Value(i)
-	lastName := s.cols.lastName.Value(i)
-	city := s.cols.city.Value(i)
-	state := s.cols.state.Value(i)
-	skills := s.cols.skills.Value(i)
-	status := s.cols.status.Value(i)
-	years := s.cols.years.Value(i)
-	rate := s.cols.rate.Value(i)
-
-	// Embed text: name + role-shape from skills + location + experience
-	// + status. Order matters — embedding models weight earlier tokens
-	// slightly more, so role-relevant signal (skills) goes first.
-	var b strings.Builder
-	b.WriteString("Candidate skills: ")
-	b.WriteString(skills)
-	b.WriteString(". Based in ")
-	b.WriteString(city)
-	b.WriteString(", ")
-	b.WriteString(state)
-	b.WriteString(". ")
-	fmt.Fprintf(&b, "%d years experience. Status: %s. ", years, status)
-	b.WriteString(firstName)
-	b.WriteString(" ")
-	b.WriteString(lastName)
-	b.WriteString(".")
-
-	return corpusingest.Row{
-		ID:   "c-" + candidateID,
-		Text: b.String(),
-		Metadata: map[string]any{
-			"candidate_id":     candidateID,
-			"first_name":       firstName,
-			"last_name":        lastName,
-			"email":            s.cols.email.Value(i),
-			"phone":            s.cols.phone.Value(i),
-			"city":             city,
-			"state":            state,
-			"skills":           skills,
-			"status":           status,
-			"years_experience": years,
-			"hourly_rate_usd":  rate,
-		},
-	}, nil
-}
-
-func main() {
-	var (
-		gateway     = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
-		parquetPath = flag.String("parquet", "/home/profit/lakehouse/data/datasets/candidates.parquet", "candidates parquet")
-		limit       = flag.Int("limit", 0, "limit rows (0 = all 1000)")
-		query       = flag.String("query", "Python AWS Docker engineer in Chicago available now", "post-ingest reality-test query")
-		drop        = flag.Bool("drop", true, "DELETE candidates index before populate")
-		skipPop     = flag.Bool("skip-populate", false, "skip ingest, only run query")
-	)
-	flag.Parse()
-
-	hc := &http.Client{Timeout: 5 * time.Minute}
-	ctx := context.Background()
-
-	if !*skipPop {
-		src, cleanup, err := newCandidatesSource(*parquetPath)
-		if err != nil {
-			log.Fatalf("open candidates source: %v", err)
-		}
-		defer cleanup()
-
-		stats, err := corpusingest.Run(ctx, corpusingest.Config{
-			GatewayURL:   *gateway,
-			IndexName:    indexName,
-			Dimension:    dim,
-			Distance:     "cosine",
-			EmbedBatch:   16,
-			EmbedWorkers: 8,
-			AddBatch:     500, // 1000 candidates → 2 add calls; small batches keep memory bounded
-			Limit:        *limit,
-			DropExisting: *drop,
-			HTTPClient:   hc,
-			LogProgress:  5 * time.Second,
-		}, src)
-		if err != nil {
-			if errors.Is(err, corpusingest.ErrPartialFailure) {
-				fmt.Printf("[candidates] WARN partial failure: %v\n", err)
-			} else {
-				log.Fatalf("ingest: %v", err)
-			}
-		}
-		fmt.Printf("[candidates] populate: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
-			stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
-			stats.Wall.Round(time.Millisecond))
-	}
-
-	// Reality test — run a real staffing query through /v1/matrix/search
-	// against just the candidates corpus. Multi-corpus retrieval against
-	// workers + candidates is the next step.
-	fmt.Printf("\n[candidates] reality test query: %q\n", *query)
-	runMatrixQuery(hc, *gateway, *query)
-}
-
-func runMatrixQuery(hc *http.Client, gateway, query string) {
-	body, _ := json.Marshal(map[string]any{
-		"query_text":   query,
-		"corpora":      []string{indexName},
-		"k":            5,
-		"per_corpus_k": 10,
-	})
-	req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/matrix/search", bytes.NewReader(body))
-	req.Header.Set("Content-Type", "application/json")
-	t0 := time.Now()
-	resp, err := hc.Do(req)
-	if err != nil {
-		log.Fatalf("matrix search: %v", err)
-	}
-	defer resp.Body.Close()
-	dur := time.Since(t0)
-	if resp.StatusCode != 200 {
-		preview, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
-		log.Fatalf("matrix search %d: %s", resp.StatusCode, preview)
-	}
-	var sr struct {
-		Results []struct {
-			ID       string          `json:"id"`
-			Distance float32         `json:"distance"`
-			Corpus   string          `json:"corpus"`
-			Metadata json.RawMessage `json:"metadata"`
-		} `json:"results"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil {
-		log.Fatalf("decode: %v", err)
-	}
-	fmt.Printf("[candidates] matrix returned %d hits in %v:\n", len(sr.Results), dur.Round(time.Millisecond))
-	for i, r := range sr.Results {
-		fmt.Printf("  %d. %s d=%.4f corpus=%s\n     %s\n",
-			i+1, r.ID, r.Distance, r.Corpus, string(r.Metadata))
-	}
-}
--- a/scripts/staffing_workers/main.go
+++ b/scripts/staffing_workers/main.go
@ -1,308 +0,0 @@
-// Staffing workers corpus driver — second-of-two corpora that proves
-// the multi-corpus matrix indexer end-to-end. Mirrors the candidates
-// driver's parquet pattern but handles multi-chunk arrow tables
-// (workers_500k.parquet has multiple row groups, candidates fits in
-// one).
-//
-// Source: /home/profit/lakehouse/data/datasets/workers_500k.parquet
-// (500000 rows, 18 cols including role + skills + certifications +
-// archetype + reliability scores + resume_text).
-//
-// IDs prefixed "w-" so multi-corpus matrix queries returning workers
-// alongside candidates ("c-") stay unambiguous in merged results.
-//
-// Default -limit 5000 because the goal of this driver is multi-corpus
-// reality testing, not the 500K stress test (separate concern, see
-// project_golang_lakehouse.md scale framing).
-
-package main
-
-import (
-	"context"
-	"errors"
-	"flag"
-	"fmt"
-	"io"
-	"log"
-	"net/http"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/apache/arrow-go/v18/arrow"
-	"github.com/apache/arrow-go/v18/arrow/array"
-	"github.com/apache/arrow-go/v18/arrow/memory"
-	"github.com/apache/arrow-go/v18/parquet/file"
-	"github.com/apache/arrow-go/v18/parquet/pqarrow"
-
-	"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
-)
-
-const (
-	indexName = "workers"
-	dim       = 768
-)
-
-// workersSource implements corpusingest.Source over an in-memory
-// arrow.Table loaded from workers_500k.parquet. Unlike the candidates
-// driver, this MUST handle multi-chunk arrow columns — a 500K-row
-// parquet has ≥1 row group, each becoming its own chunk after read.
-type workersSource struct {
-	cols struct {
-		workerID                                                            *chunkedInt64
-		name, role, city, state, skills, certs, archetype, resume, comm     *chunkedString
-	}
-	n   int64
-	cur int64
-}
-
-// chunkedString lets per-row access work whether the table came back
-// with one chunk or many. Forward-only iteration; not safe to seek.
-type chunkedString struct {
-	chunks []*array.String
-	sizes  []int64
-}
-
-func newChunkedString(col *arrow.Chunked) (*chunkedString, error) {
-	cs := &chunkedString{}
-	for i, ch := range col.Chunks() {
-		s, ok := ch.(*array.String)
-		if !ok {
-			return nil, fmt.Errorf("chunk %d is %T, want *array.String", i, ch)
-		}
-		cs.chunks = append(cs.chunks, s)
-		cs.sizes = append(cs.sizes, int64(s.Len()))
-	}
-	return cs, nil
-}
-
-// At returns the value at the global row index. O(chunks) per call;
-// fine for our scale (≤5000 rows × ~5 chunks).
-func (c *chunkedString) At(row int64) string {
-	var offset int64
-	for i, s := range c.chunks {
-		n := c.sizes[i]
-		if row < offset+n {
-			return s.Value(int(row - offset))
-		}
-		offset += n
-	}
-	return ""
-}
-
-type chunkedInt64 struct {
-	chunks []*array.Int64
-	sizes  []int64
-}
-
-func newChunkedInt64(col *arrow.Chunked) (*chunkedInt64, error) {
-	ci := &chunkedInt64{}
-	for i, ch := range col.Chunks() {
-		s, ok := ch.(*array.Int64)
-		if !ok {
-			return nil, fmt.Errorf("chunk %d is %T, want *array.Int64", i, ch)
-		}
-		ci.chunks = append(ci.chunks, s)
-		ci.sizes = append(ci.sizes, int64(s.Len()))
-	}
-	return ci, nil
-}
-
-func (c *chunkedInt64) At(row int64) int64 {
-	var offset int64
-	for i, s := range c.chunks {
-		n := c.sizes[i]
-		if row < offset+n {
-			return s.Value(int(row - offset))
-		}
-		offset += n
-	}
-	return 0
-}
-
-func newWorkersSource(path string) (*workersSource, func(), error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return nil, nil, fmt.Errorf("open parquet: %w", err)
-	}
-	pf, err := file.NewParquetReader(f)
-	if err != nil {
-		f.Close()
-		return nil, nil, fmt.Errorf("parquet reader: %w", err)
-	}
-	fr, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
-	if err != nil {
-		pf.Close()
-		f.Close()
-		return nil, nil, fmt.Errorf("arrow reader: %w", err)
-	}
-	table, err := fr.ReadTable(context.Background())
-	if err != nil {
-		pf.Close()
-		f.Close()
-		return nil, nil, fmt.Errorf("read table: %w", err)
-	}
-
-	src := &workersSource{n: table.NumRows()}
-	schema := table.Schema()
-
-	stringCol := func(name string) (*chunkedString, error) {
-		idx := schema.FieldIndices(name)
-		if len(idx) == 0 {
-			return nil, fmt.Errorf("column %q not found", name)
-		}
-		return newChunkedString(table.Column(idx[0]).Data())
-	}
-	int64Col := func(name string) (*chunkedInt64, error) {
-		idx := schema.FieldIndices(name)
-		if len(idx) == 0 {
-			return nil, fmt.Errorf("column %q not found", name)
-		}
-		return newChunkedInt64(table.Column(idx[0]).Data())
-	}
-
-	cleanup := func() {
-		table.Release()
-		pf.Close()
-		f.Close()
-	}
-
-	wid, err := int64Col("worker_id")
-	if err != nil {
-		cleanup()
-		return nil, nil, err
-	}
-	src.cols.workerID = wid
-
-	for _, t := range []struct {
-		name string
-		dst  **chunkedString
-	}{
-		{"name", &src.cols.name},
-		{"role", &src.cols.role},
-		{"city", &src.cols.city},
-		{"state", &src.cols.state},
-		{"skills", &src.cols.skills},
-		{"certifications", &src.cols.certs},
-		{"archetype", &src.cols.archetype},
-		{"resume_text", &src.cols.resume},
-		{"communications", &src.cols.comm},
-	} {
-		col, err := stringCol(t.name)
-		if err != nil {
-			cleanup()
-			return nil, nil, err
-		}
-		*t.dst = col
-	}
-	return src, cleanup, nil
-}
-
-func (s *workersSource) Next() (corpusingest.Row, error) {
-	if s.cur >= s.n {
-		return corpusingest.Row{}, io.EOF
-	}
-	i := s.cur
-	s.cur++
-
-	workerID := s.cols.workerID.At(i)
-	name := s.cols.name.At(i)
-	role := s.cols.role.At(i)
-	city := s.cols.city.At(i)
-	state := s.cols.state.At(i)
-	skills := s.cols.skills.At(i)
-	certs := s.cols.certs.At(i)
-	archetype := s.cols.archetype.At(i)
-	resume := s.cols.resume.At(i)
-
-	// Embed text — restored to V0 after 2026-04-29 D experiment.
-	// Three variants tested on a query of "Forklift operator with
-	// OSHA-30 certification, warehouse experience":
-	//   V0 (this):   structured "Worker role: ... Skills: ... <resume_text>"
-	//                → 6 workers in top-8, 0 Forklift, top dist 0.327
-	//   V4a (drop):  drop labels + resume + archetype, double the role
-	//                → 6 workers in top-8, 0 Forklift, top dist 0.254
-	//   V4b (resume only): just resume_text, no structured prefix
-	//                → 4 workers in top-8 (worse mix), 0 Forklift, top 0.379
-	// All three surfaced Production Workers / Machine Operators /
-	// Line Leads above actual Forklift Operators. Conclusion: the
-	// bottleneck is nomic-embed-text 137M's geometry, not text
-	// design. Real fixes belong elsewhere — hybrid SQL+semantic
-	// (B in next-step menu) or playbook boost (component 5,
-	// already shipped). V0 keeps the best worker/candidate mix.
-	var b strings.Builder
-	b.WriteString("Worker role: ")
-	b.WriteString(role)
-	b.WriteString(". Skills: ")
-	b.WriteString(skills)
-	b.WriteString(". Certifications: ")
-	b.WriteString(certs)
-	b.WriteString(". Based in ")
-	b.WriteString(city)
-	b.WriteString(", ")
-	b.WriteString(state)
-	b.WriteString(". Archetype: ")
-	b.WriteString(archetype)
-	b.WriteString(". ")
-	b.WriteString(resume)
-	text := b.String()
-
-	return corpusingest.Row{
-		ID:   fmt.Sprintf("w-%d", workerID),
-		Text: text,
-		Metadata: map[string]any{
-			"worker_id":      workerID,
-			"name":           name,
-			"role":           role,
-			"city":           city,
-			"state":          state,
-			"skills":         skills,
-			"certifications": certs,
-			"archetype":      archetype,
-		},
-	}, nil
-}
-
-func main() {
-	var (
-		gateway     = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
-		parquetPath = flag.String("parquet", "/home/profit/lakehouse/data/datasets/workers_500k.parquet", "workers parquet")
-		limit       = flag.Int("limit", 5000, "limit rows (0 = all 500K — usually not what you want here)")
-		drop        = flag.Bool("drop", true, "DELETE workers index before populate")
-	)
-	flag.Parse()
-
-	hc := &http.Client{Timeout: 5 * time.Minute}
-	ctx := context.Background()
-
-	src, cleanup, err := newWorkersSource(*parquetPath)
-	if err != nil {
-		log.Fatalf("open workers source: %v", err)
-	}
-	defer cleanup()
-
-	stats, err := corpusingest.Run(ctx, corpusingest.Config{
-		GatewayURL:   *gateway,
-		IndexName:    indexName,
-		Dimension:    dim,
-		Distance:     "cosine",
-		EmbedBatch:   16,
-		EmbedWorkers: 8,
-		AddBatch:     500,
-		Limit:        *limit,
-		DropExisting: *drop,
-		HTTPClient:   hc,
-		LogProgress:  10 * time.Second,
-	}, src)
-	if err != nil {
-		if errors.Is(err, corpusingest.ErrPartialFailure) {
-			fmt.Printf("[workers] WARN partial failure: %v\n", err)
-		} else {
-			log.Fatalf("ingest: %v", err)
-		}
-	}
-	fmt.Printf("[workers] populate: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
-		stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
-		stats.Wall.Round(time.Millisecond))
-}
-
--- a/scripts/workflow_smoke.sh
+++ b/scripts/workflow_smoke.sh
@ -1,193 +0,0 @@
-#!/usr/bin/env bash
-# Workflow smoke — Observer-KB workflow runner end-to-end (SPEC §3.8
-# first slice). All assertions go through gateway :3110.
-#
-# Validates:
-#   - GET /observer/workflow/modes lists fixture.echo + fixture.upper
-#   - POST /observer/workflow/run executes a 3-node DAG with $-ref
-#     substitution: shape (uppercase) → weakness → improvement
-#   - Each node's execution lands an ObservedOp via the observer
-#     ring (visible in /observer/stats with source="workflow")
-#   - Aborting case: unknown mode → 400 with helpful error
-#   - Skip cascade: node with failed dep gets skipped, independent
-#     siblings still run
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-export PATH="$PATH:/usr/local/go/bin"
-
-echo "[workflow-smoke] building observerd + gateway..."
-go build -o bin/ ./cmd/observerd ./cmd/gateway
-
-pkill -f "bin/(observerd|gateway)" 2>/dev/null || true
-sleep 0.3
-
-PIDS=()
-TMP="$(mktemp -d)"
-CFG="$TMP/workflow.toml"
-
-cleanup() {
-  echo "[workflow-smoke] cleanup"
-  for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
-  rm -rf "$TMP"
-}
-trap cleanup EXIT INT TERM
-
-cat > "$CFG" <<EOF
-[gateway]
-bind = "127.0.0.1:3110"
-storaged_url = "http://127.0.0.1:3211"
-catalogd_url = "http://127.0.0.1:3212"
-ingestd_url  = "http://127.0.0.1:3213"
-queryd_url   = "http://127.0.0.1:3214"
-vectord_url  = "http://127.0.0.1:3215"
-embedd_url   = "http://127.0.0.1:3216"
-pathwayd_url = "http://127.0.0.1:3217"
-matrixd_url  = "http://127.0.0.1:3218"
-observerd_url = "http://127.0.0.1:3219"
-
-[observerd]
-bind = "127.0.0.1:3219"
-EOF
-
-poll_health() {
-  local port="$1" deadline=$(($(date +%s) + 5))
-  while [ "$(date +%s)" -lt "$deadline" ]; do
-    if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
-    sleep 0.05
-  done
-  return 1
-}
-
-echo "[workflow-smoke] launching observerd → gateway..."
-./bin/observerd -config "$CFG" > /tmp/observerd.log 2>&1 &
-PIDS+=($!)
-poll_health 3219 || { echo "observerd failed"; tail /tmp/observerd.log; exit 1; }
-
-./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
-PIDS+=($!)
-poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
-
-FAILED=0
-
-# ── 1. /observer/workflow/modes lists registered modes ────────────
-echo "[workflow-smoke] /observer/workflow/modes lists fixtures + real modes:"
-RESP="$(curl -sS http://127.0.0.1:3110/v1/observer/workflow/modes)"
-EXPECTED=("fixture.echo" "fixture.upper" "matrix.relevance" "matrix.downgrade" "distillation.score" "drift.scorer" "matrix.search")
-MISSING=""
-for m in "${EXPECTED[@]}"; do
-  if [ "$(echo "$RESP" | jq -r --arg m "$m" '.modes | index($m) != null')" != "true" ]; then
-    MISSING="$MISSING $m"
-  fi
-done
-if [ -z "$MISSING" ]; then
-  echo "  ✓ all 7 expected modes registered (fixtures + 4 pure + matrix.search HTTP)"
-else
-  echo "  ✗ missing modes:$MISSING"; FAILED=1
-fi
-
-# ── 2. 3-node DAG with $-ref substitution ─────────────────────────
-echo "[workflow-smoke] 3-node DAG: shape (upper) → weakness → improvement"
-WORKFLOW='{
-  "workflow": {
-    "name": "smoke-chain",
-    "description": "DAG ref substitution test",
-    "nodes": [
-      {"id":"shape", "mode":"fixture.upper", "prompt":"hello world"},
-      {"id":"weakness", "mode":"fixture.echo",
-       "prompt":"observed shape: $shape.output.upper",
-       "depends_on":["shape"]},
-      {"id":"improvement", "mode":"fixture.echo",
-       "prompt":"based on $weakness.output.prompt do better",
-       "depends_on":["weakness"]}
-    ]
-  }
-}'
-RUN="$(curl -sS -X POST http://127.0.0.1:3110/v1/observer/workflow/run \
-  -H 'Content-Type: application/json' -d "$WORKFLOW")"
-STATUS="$(echo "$RUN" | jq -r '.status')"
-SHAPE_UPPER="$(echo "$RUN" | jq -r '.nodes[0].output.upper')"
-WEAK_PROMPT="$(echo "$RUN" | jq -r '.nodes[1].output.prompt')"
-IMP_PROMPT="$(echo "$RUN" | jq -r '.nodes[2].output.prompt')"
-
-if [ "$STATUS" = "succeeded" ] && [ "$SHAPE_UPPER" = "HELLO WORLD" ] \
-    && [[ "$WEAK_PROMPT" == *"HELLO WORLD"* ]] \
-    && [[ "$IMP_PROMPT" == *"HELLO WORLD"* ]]; then
-  echo "  ✓ status=succeeded · shape=HELLO WORLD · refs propagated through 3-node chain"
-else
-  echo "  ✗ status=$STATUS shape=$SHAPE_UPPER weak=$WEAK_PROMPT imp=$IMP_PROMPT"
-  echo "    full: $RUN"
-  FAILED=1
-fi
-
-# ── 3. Per-node provenance recorded as ObservedOps ────────────────
-echo "[workflow-smoke] /observer/stats reflects workflow ops:"
-STATS="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
-WORKFLOW_OPS="$(echo "$STATS" | jq -r '.by_source.workflow // 0')"
-TOTAL="$(echo "$STATS" | jq -r '.total')"
-if [ "$WORKFLOW_OPS" = "3" ] && [ "$TOTAL" = "3" ]; then
-  echo "  ✓ 3 workflow ops recorded (one per node), total=3"
-else
-  echo "  ✗ workflow=$WORKFLOW_OPS total=$TOTAL"
-  echo "    full: $STATS"; FAILED=1
-fi
-
-# ── 4. Unknown mode → 400 ─────────────────────────────────────────
-echo "[workflow-smoke] unknown mode → 400:"
-HTTP="$(curl -sS -o /tmp/wf_bad.json -w '%{http_code}' -X POST \
-  http://127.0.0.1:3110/v1/observer/workflow/run \
-  -H 'Content-Type: application/json' \
-  -d '{"workflow":{"name":"bad","nodes":[{"id":"a","mode":"does.not.exist"}]}}')"
-ERR="$(jq -r '.error' < /tmp/wf_bad.json 2>/dev/null)"
-if [ "$HTTP" = "400" ] && echo "$ERR" | grep -qi "unknown mode"; then
-  echo "  ✓ unknown mode aborts with 400 + helpful error"
-else
-  echo "  ✗ http=$HTTP err=$ERR"; FAILED=1
-fi
-
-# ── 5. Real-mode chain: matrix.downgrade → distillation.score ─────
-# This proves the §3.4 components compose through the workflow runner.
-# Two pure modes, no external service deps, deterministic input/output.
-echo "[workflow-smoke] real-mode chain: downgrade → distillation.score"
-REAL_WORKFLOW='{
-  "workflow": {
-    "name": "real-mode-chain",
-    "nodes": [
-      {"id":"gate", "mode":"matrix.downgrade",
-       "inputs":{"mode":"codereview_lakehouse", "model":"x-ai/grok-4.1-fast"}},
-      {"id":"score", "mode":"distillation.score",
-       "inputs":{"record":{
-         "run_id":"r-1", "task_id":"t-1",
-         "timestamp":"2026-04-29T12:00:00Z", "schema_version":1,
-         "provenance":{"source_file":"data/_kb/scrum_reviews.jsonl",
-                       "sig_hash":"x", "recorded_at":"2026-04-29T12:00:01Z"},
-         "success_markers":["accepted_on_attempt_1"]
-       }}}
-    ]
-  }
-}'
-RUN="$(curl -sS -X POST http://127.0.0.1:3110/v1/observer/workflow/run \
-  -H 'Content-Type: application/json' -d "$REAL_WORKFLOW")"
-STATUS="$(echo "$RUN" | jq -r '.status')"
-GATE_MODE="$(echo "$RUN" | jq -r '.nodes[0].output.mode')"
-GATE_FROM="$(echo "$RUN" | jq -r '.nodes[0].output.downgraded_from')"
-SCORE_CAT="$(echo "$RUN" | jq -r '.nodes[1].output.category')"
-if [ "$STATUS" = "succeeded" ] \
-    && [ "$GATE_MODE" = "codereview_isolation" ] \
-    && [ "$GATE_FROM" = "codereview_lakehouse" ] \
-    && [ "$SCORE_CAT" = "accepted" ]; then
-  echo "  ✓ downgrade flipped lakehouse→isolation; scorer rated scrum_review attempt_1=accepted"
-else
-  echo "  ✗ status=$STATUS gate=$GATE_MODE from=$GATE_FROM score=$SCORE_CAT"
-  echo "    full: $RUN"
-  FAILED=1
-fi
-
-if [ "$FAILED" -eq 0 ]; then
-  echo "[workflow-smoke] Workflow runner acceptance: PASSED"
-  exit 0
-else
-  echo "[workflow-smoke] Workflow runner acceptance: FAILED"
-  exit 1
-fi