Compare commits

..

No commits in common. "c41698acaef4745776656ee96ffd93ecd6803e6b" and "ad1670d36a7dca80165930bc9cf61379e2930d0c" have entirely different histories.

53 changed files with 275 additions and 11539 deletions

View File

@ -44,9 +44,6 @@ func main() {
"queryd_url": cfg.Gateway.QuerydURL,
"vectord_url": cfg.Gateway.VectordURL,
"embedd_url": cfg.Gateway.EmbeddURL,
"pathwayd_url": cfg.Gateway.PathwaydURL,
"matrixd_url": cfg.Gateway.MatrixdURL,
"observerd_url": cfg.Gateway.ObserverdURL,
}
for k, v := range upstreams {
if v == "" {
@ -66,9 +63,6 @@ func main() {
querydURL := mustParseUpstream("queryd_url", cfg.Gateway.QuerydURL)
vectordURL := mustParseUpstream("vectord_url", cfg.Gateway.VectordURL)
embeddURL := mustParseUpstream("embedd_url", cfg.Gateway.EmbeddURL)
pathwaydURL := mustParseUpstream("pathwayd_url", cfg.Gateway.PathwaydURL)
matrixdURL := mustParseUpstream("matrixd_url", cfg.Gateway.MatrixdURL)
observerdURL := mustParseUpstream("observerd_url", cfg.Gateway.ObserverdURL)
storagedProxy := gateway.NewProxyHandler(storagedURL)
catalogdProxy := gateway.NewProxyHandler(catalogdURL)
@ -76,9 +70,6 @@ func main() {
querydProxy := gateway.NewProxyHandler(querydURL)
vectordProxy := gateway.NewProxyHandler(vectordURL)
embeddProxy := gateway.NewProxyHandler(embeddURL)
pathwaydProxy := gateway.NewProxyHandler(pathwaydURL)
matrixdProxy := gateway.NewProxyHandler(matrixdURL)
observerdProxy := gateway.NewProxyHandler(observerdURL)
if err := shared.Run("gateway", cfg.Gateway.Bind, func(r chi.Router) {
@ -97,12 +88,6 @@ func main() {
r.Handle("/v1/vectors/*", vectordProxy)
// Embedding service — /v1/embed
r.Handle("/v1/embed", embeddProxy)
// Pathway memory — /v1/pathway/*
r.Handle("/v1/pathway/*", pathwaydProxy)
// Matrix indexer — /v1/matrix/* (multi-corpus retrieve+merge per SPEC §3.4)
r.Handle("/v1/matrix/*", matrixdProxy)
// Observer — /v1/observer/* (autonomous-iteration witness loop)
r.Handle("/v1/observer/*", observerdProxy)
}, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)

View File

@ -1,295 +0,0 @@
// matrixd is the matrix indexer service. Wraps internal/matrix's
// Retriever with HTTP routes per docs/SPEC.md §3.4.
//
// Routes:
// POST /matrix/search — multi-corpus retrieve+merge,
// with optional playbook boost
// GET /matrix/corpora — list known vectord indexes
// POST /matrix/relevance — adjacency-pollution filter
// POST /matrix/downgrade — strong-model downgrade gate
// POST /matrix/playbooks/record — record a single (query → answer)
// success for the learning loop
// POST /matrix/playbooks/bulk — bulk-record N successes; useful
// for backfilling historical
// placement data into the
// playbook substrate
//
// matrixd talks to embedd (for query-text embedding) and vectord
// (for per-corpus search) via HTTP. Both URLs come from
// [matrixd] config; gateway sets them to its own upstream URLs so
// matrixd inherits the same provider topology.
package main
import (
"encoding/json"
"errors"
"flag"
"log/slog"
"net/http"
"os"
"strings"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
if cfg.Matrixd.EmbeddURL == "" || cfg.Matrixd.VectordURL == "" {
slog.Error("matrixd: embedd_url and vectord_url required in [matrixd]")
os.Exit(1)
}
retriever := matrix.New(cfg.Matrixd.EmbeddURL, cfg.Matrixd.VectordURL)
h := &handlers{r: retriever}
if err := shared.Run("matrixd", cfg.Matrixd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
r *matrix.Retriever
}
func (h *handlers) register(r chi.Router) {
r.Post("/matrix/search", h.handleSearch)
r.Get("/matrix/corpora", h.handleCorpora)
r.Post("/matrix/relevance", h.handleRelevance)
r.Post("/matrix/downgrade", h.handleDowngrade)
r.Post("/matrix/playbooks/record", h.handlePlaybookRecord)
r.Post("/matrix/playbooks/bulk", h.handlePlaybookBulk)
}
func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
var req matrix.SearchRequest
if !decodeJSON(w, r, &req) {
return
}
resp, err := h.r.Search(r.Context(), req)
if err != nil {
writeMatrixError(w, err)
return
}
writeJSON(w, http.StatusOK, resp)
}
// relevanceRequest is the POST /matrix/relevance body. Threshold
// defaults to matrix.DefaultRelevanceThreshold when zero.
type relevanceRequest struct {
Focus matrix.FocusFile `json:"focus"`
Chunks []matrix.CandidateChunk `json:"chunks"`
Threshold float64 `json:"threshold,omitempty"`
}
func (h *handlers) handleRelevance(w http.ResponseWriter, r *http.Request) {
var req relevanceRequest
if !decodeJSON(w, r, &req) {
return
}
if len(req.Chunks) == 0 {
http.Error(w, "chunks must be non-empty", http.StatusBadRequest)
return
}
threshold := req.Threshold
if threshold == 0 {
threshold = matrix.DefaultRelevanceThreshold
}
res := matrix.FilterChunks(req.Focus, req.Chunks, threshold)
writeJSON(w, http.StatusOK, res)
}
// playbookRecordRequest is the POST /matrix/playbooks/record body.
// Corpus is optional; defaults to matrix.DefaultPlaybookCorpus.
type playbookRecordRequest struct {
QueryText string `json:"query_text"`
AnswerID string `json:"answer_id"`
AnswerCorpus string `json:"answer_corpus"`
Score float64 `json:"score"`
Tags []string `json:"tags,omitempty"`
Corpus string `json:"corpus,omitempty"`
}
func (h *handlers) handlePlaybookRecord(w http.ResponseWriter, r *http.Request) {
var req playbookRecordRequest
if !decodeJSON(w, r, &req) {
return
}
entry := matrix.NewPlaybookEntry(req.QueryText, req.AnswerID, req.AnswerCorpus, req.Score, req.Tags)
if err := entry.Validate(); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
pbID, err := h.r.Record(r.Context(), entry, req.Corpus)
if err != nil {
slog.Warn("playbook record", "err", err)
http.Error(w, err.Error(), http.StatusBadGateway)
return
}
writeJSON(w, http.StatusOK, map[string]any{
"playbook_id": pbID,
"query_text": entry.QueryText,
"answer_id": entry.AnswerID,
"answer_corpus": entry.AnswerCorpus,
"score": entry.Score,
})
}
// playbookBulkRequest is the POST /matrix/playbooks/bulk body —
// component C (operational rating wiring). Used to backfill
// historical placement data, or batch-record a session's worth of
// coordinator click-tracking. Each Entry is recorded independently;
// failures are reported per-entry without aborting the batch.
type playbookBulkRequest struct {
Entries []playbookRecordRequest `json:"entries"`
Corpus string `json:"corpus,omitempty"` // applies to all if entry-level not set
}
// playbookBulkResult reports per-entry outcomes plus the aggregate
// count. Errors include the entry index so callers can locate the
// offending record without diffing.
type playbookBulkResult struct {
Recorded int `json:"recorded"`
Failed int `json:"failed"`
Results []playbookBulkItemResult `json:"results"`
}
type playbookBulkItemResult struct {
Index int `json:"index"`
PlaybookID string `json:"playbook_id,omitempty"`
Error string `json:"error,omitempty"`
}
func (h *handlers) handlePlaybookBulk(w http.ResponseWriter, r *http.Request) {
var req playbookBulkRequest
if !decodeJSON(w, r, &req) {
return
}
if len(req.Entries) == 0 {
http.Error(w, "entries must be non-empty", http.StatusBadRequest)
return
}
out := playbookBulkResult{
Results: make([]playbookBulkItemResult, len(req.Entries)),
}
for i, item := range req.Entries {
corpus := item.Corpus
if corpus == "" {
corpus = req.Corpus
}
entry := matrix.NewPlaybookEntry(item.QueryText, item.AnswerID, item.AnswerCorpus, item.Score, item.Tags)
if err := entry.Validate(); err != nil {
out.Results[i] = playbookBulkItemResult{Index: i, Error: err.Error()}
out.Failed++
continue
}
pbID, err := h.r.Record(r.Context(), entry, corpus)
if err != nil {
out.Results[i] = playbookBulkItemResult{Index: i, Error: err.Error()}
out.Failed++
continue
}
out.Results[i] = playbookBulkItemResult{Index: i, PlaybookID: pbID}
out.Recorded++
}
writeJSON(w, http.StatusOK, out)
}
// downgradeRequest is the POST /matrix/downgrade body. Mirrors
// matrix.DowngradeInput. When ForceFullOverride is omitted from
// the body, the value falls back to matrixd's process env
// (LH_FORCE_FULL_ENRICHMENT) — an opinionated default that lets
// operators set the env var on the matrixd unit and have every
// gate decision honor it without per-request changes. Per
// 2026-04-29 cross-lineage scrum (Opus WARN): callers that want
// deterministic gate behavior independent of matrixd's env should
// pass ForceFullOverride explicitly in the body.
type downgradeRequest struct {
Mode string `json:"mode"`
Model string `json:"model"`
ForcedMode bool `json:"forced_mode,omitempty"`
ForceFullOverride *bool `json:"force_full_override,omitempty"`
}
func (h *handlers) handleDowngrade(w http.ResponseWriter, r *http.Request) {
var req downgradeRequest
if !decodeJSON(w, r, &req) {
return
}
if req.Mode == "" || req.Model == "" {
http.Error(w, "mode and model are required", http.StatusBadRequest)
return
}
in := matrix.NewDowngradeInputFromEnv(req.Mode, req.Model, req.ForcedMode)
if req.ForceFullOverride != nil {
// Explicit body override beats env, useful for tooling that
// wants to ask "what would the gate do under these conditions"
// without env pollution.
in.ForceFullOverride = *req.ForceFullOverride
}
writeJSON(w, http.StatusOK, matrix.MaybeDowngrade(in))
}
func (h *handlers) handleCorpora(w http.ResponseWriter, r *http.Request) {
names, err := h.r.Corpora(r.Context())
if err != nil {
slog.Error("matrix corpora", "err", err)
http.Error(w, "vectord unavailable", http.StatusBadGateway)
return
}
writeJSON(w, http.StatusOK, map[string]any{"corpora": names, "count": len(names)})
}
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("matrix write json", "err", err)
}
}
// writeMatrixError maps internal/matrix sentinels to HTTP statuses.
// Corpus / embed failures bubble up as 502 (the upstream service is
// what's wrong); validation errors are 400.
func writeMatrixError(w http.ResponseWriter, err error) {
switch {
case errors.Is(err, matrix.ErrEmptyCorpora),
errors.Is(err, matrix.ErrEmptyQuery):
http.Error(w, err.Error(), http.StatusBadRequest)
case errors.Is(err, matrix.ErrCorpus),
errors.Is(err, matrix.ErrEmbed):
slog.Warn("matrix upstream", "err", err)
http.Error(w, err.Error(), http.StatusBadGateway)
default:
slog.Error("matrix", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
}
}

View File

@ -1,263 +0,0 @@
// observerd is the autonomous-iteration witness service. Port of
// the load-bearing pieces of mcp-server/observer.ts (Rust system).
//
// Routes (all under /observer):
// GET /observer/health — service liveness + ring size
// GET /observer/stats — aggregate counters + recent scenarios
// POST /observer/event — record one observed op
//
// Deferred to follow-up commits (see internal/observer doc):
// - POST /observer/review (cloud-LLM hand review fall-back)
// - background loops (analyzeErrors, consolidatePlaybooks,
// tailOverseerCorrections)
// - failure-cluster escalation to LLM Team
//
// /relevance was already ported to internal/matrix in 9588bd8 and is
// not duplicated here.
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"strings"
"time"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/observer"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/workflow"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
// Persistence is optional — empty path = ephemeral (matches the
// pathwayd pattern). Production sets a stable path under
// /var/lib/lakehouse/observer/ops.jsonl.
var persistor *observer.Persistor
if cfg.Observerd.PersistPath != "" {
persistor, err = observer.NewPersistor(cfg.Observerd.PersistPath)
if err != nil {
slog.Error("observer persistor", "err", err)
os.Exit(1)
}
}
store := observer.NewStore(persistor)
if persistor != nil {
n, err := store.Load()
if err != nil {
slog.Warn("observer load", "err", err, "loaded", n)
} else {
slog.Info("observer loaded", "ops", n, "path", cfg.Observerd.PersistPath)
}
}
runner := workflow.NewRunner()
// matrixd URL: prefer explicit observerd config field, fall back
// to gateway's matrixd_url so a single-toml deploy works without
// duplicating the address.
matrixdURL := cfg.Gateway.MatrixdURL
registerBuiltinModes(runner, store, matrixdURL)
h := &handlers{store: store, runner: runner}
if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
store *observer.Store
runner *workflow.Runner
}
func (h *handlers) register(r chi.Router) {
r.Get("/observer/stats", h.handleStats)
r.Post("/observer/event", h.handleEvent)
r.Post("/observer/workflow/run", h.handleWorkflowRun)
r.Get("/observer/workflow/modes", h.handleWorkflowModes)
}
func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, h.store.Stats())
}
func (h *handlers) handleEvent(w http.ResponseWriter, r *http.Request) {
var op observer.ObservedOp
if !decodeJSON(w, r, &op) {
return
}
if err := h.store.Record(op); err != nil {
if errors.Is(err, observer.ErrInvalidOp) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
slog.Error("observer record", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
}
stats := h.store.Stats()
writeJSON(w, http.StatusOK, map[string]any{
"accepted": true,
"ring_size": stats.Total,
})
}
// workflowRunRequest is the POST /observer/workflow/run body — a
// Workflow definition in JSON form (matches Archon's YAML shape but
// JSON-serialized for the HTTP path).
type workflowRunRequest struct {
Workflow workflow.Workflow `json:"workflow"`
}
func (h *handlers) handleWorkflowRun(r http.ResponseWriter, req *http.Request) {
var body workflowRunRequest
if !decodeJSON(r, req, &body) {
return
}
res, err := h.runner.Run(req.Context(), body.Workflow)
// Record per-node provenance into the observer ring AS the
// workflow runs — same shape as any other ObservedOp so the
// existing /observer/stats aggregation surfaces workflow ops
// alongside scenario ops without a schema change.
for _, n := range res.Nodes {
op := observer.ObservedOp{
Endpoint: "/observer/workflow/run/" + body.Workflow.Name + "/" + n.NodeID,
InputSummary: fmt.Sprintf("workflow=%s node=%s mode=%s", body.Workflow.Name, n.NodeID, n.Mode),
Success: n.Error == "",
DurationMs: n.DurationMs,
OutputSummary: summarizeOutput(n.Output),
Source: observer.Source("workflow"),
Error: n.Error,
Timestamp: n.StartedAt.UTC().Format(time.RFC3339Nano),
}
if recErr := h.store.Record(op); recErr != nil {
slog.Warn("workflow run: provenance record failed", "err", recErr)
}
}
if err != nil {
// Aborting errors (cycle, missing dep, unknown mode) — surface
// as 4xx because the workflow definition itself is wrong.
slog.Warn("workflow run aborted", "err", err)
writeJSON(r, http.StatusBadRequest, map[string]any{
"error": err.Error(),
"result": res,
})
return
}
writeJSON(r, http.StatusOK, res)
}
func (h *handlers) handleWorkflowModes(w http.ResponseWriter, _ *http.Request) {
modes := h.runner.Modes()
writeJSON(w, http.StatusOK, map[string]any{
"modes": modes,
"count": len(modes),
})
}
// summarizeOutput renders a workflow node's output map for the
// ObservedOp's OutputSummary string. Best-effort — long values get
// truncated rather than ballooning the ring buffer's memory.
func summarizeOutput(output map[string]any) string {
if output == nil {
return "(nil)"
}
bs, err := json.Marshal(output)
if err != nil {
return fmt.Sprintf("(marshal err: %v)", err)
}
if len(bs) > 256 {
return string(bs[:256]) + "...(truncated)"
}
return string(bs)
}
// registerBuiltinModes wires the modes the runner knows about. The
// pure-function wrappers (matrix.relevance, matrix.downgrade,
// distillation.score, drift.scorer) are direct Go calls. matrix.search
// is HTTP-backed, pointed at the configured matrixd_url so workflows
// can compose retrieval into multi-pass measurement chains.
//
// Fixture modes (fixture.echo, fixture.upper) stay registered for
// the workflow_smoke that proves the runner mechanics independently
// of the real modes' availability.
//
// Real-mode follow-ups still pending:
// - playbook.record (HTTP to matrixd)
// - playbook.lookup (HTTP to matrixd)
// - llm.chat (HTTP to gateway /v1/chat)
func registerBuiltinModes(r *workflow.Runner, store *observer.Store, matrixdURL string) {
// Fixture modes for runner mechanics smokes.
r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
out := make(map[string]any, len(input))
for k, v := range input {
out[k] = v
}
return out, nil
})
r.RegisterMode("fixture.upper", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
prompt, _ := input["prompt"].(string)
return map[string]any{"upper": strings.ToUpper(prompt)}, nil
})
// Real modes — pure-function wrappers (no I/O).
r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
r.RegisterMode("matrix.downgrade", workflow.MatrixDowngrade)
r.RegisterMode("distillation.score", workflow.DistillationScore)
r.RegisterMode("drift.scorer", workflow.DriftScorer)
// HTTP-backed modes — only register when their backend URL is set.
// matrixd_url defaults to a known address but tests/dev may run
// without matrixd.
if matrixdURL != "" {
hc := &http.Client{Timeout: 30 * time.Second}
r.RegisterMode("matrix.search", workflow.MatrixSearch(matrixdURL, hc))
}
_ = store // reserved for future modes that need self-provenance
}
// context still used in decodeJSON via http.Request.Context().
var _ = context.Background
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("observer write json", "err", err)
}
}

View File

@ -1,278 +0,0 @@
// pathwayd is the pathway memory service. Wraps internal/pathway's
// Store with HTTP routes for the Mem0-style operations defined in
// ADR-004.
//
// Routes (all under /pathway):
// POST /pathway/add — new trace with fresh UID
// POST /pathway/add_idempotent — UID-keyed add or replay-bump
// POST /pathway/update — replace content for an existing UID
// POST /pathway/revise — new revision linked to predecessor
// POST /pathway/retire — mark trace retired (excluded from search)
// GET /pathway/get/{uid} — fetch one trace (incl. retired)
// GET /pathway/history/{uid} — backward chain via predecessor links
// POST /pathway/search — filter-based listing
// GET /pathway/stats — total/active/retired counters
//
// Persistence: optional. Empty [pathwayd].persist_path = in-memory
// only (matches vectord G1's pattern). Set a path for durable
// per-trace JSONL append.
package main
import (
"encoding/json"
"errors"
"flag"
"log/slog"
"net/http"
"os"
"strings"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/pathway"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
// Persistence is optional — empty path = in-memory ephemeral.
var persistor *pathway.Persistor
if cfg.Pathwayd.PersistPath != "" {
persistor, err = pathway.NewPersistor(cfg.Pathwayd.PersistPath)
if err != nil {
slog.Error("pathway persistor", "err", err)
os.Exit(1)
}
}
store := pathway.NewStore(persistor)
if persistor != nil {
n, err := store.Load()
if err != nil {
slog.Warn("pathway load", "err", err, "loaded", n)
} else {
slog.Info("pathway loaded", "events", n, "path", cfg.Pathwayd.PersistPath)
}
}
h := &handlers{store: store}
if err := shared.Run("pathwayd", cfg.Pathwayd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
store *pathway.Store
}
func (h *handlers) register(r chi.Router) {
r.Post("/pathway/add", h.handleAdd)
r.Post("/pathway/add_idempotent", h.handleAddIdempotent)
r.Post("/pathway/update", h.handleUpdate)
r.Post("/pathway/revise", h.handleRevise)
r.Post("/pathway/retire", h.handleRetire)
r.Get("/pathway/get/{uid}", h.handleGet)
r.Get("/pathway/history/{uid}", h.handleHistory)
r.Post("/pathway/search", h.handleSearch)
r.Get("/pathway/stats", h.handleStats)
}
// ── request shapes ───────────────────────────────────────────────
type addRequest struct {
Content json.RawMessage `json:"content"`
Tags []string `json:"tags,omitempty"`
}
type addIdempotentRequest struct {
UID string `json:"uid"`
Content json.RawMessage `json:"content"`
Tags []string `json:"tags,omitempty"`
}
type updateRequest struct {
UID string `json:"uid"`
Content json.RawMessage `json:"content"`
}
type reviseRequest struct {
PredecessorUID string `json:"predecessor_uid"`
Content json.RawMessage `json:"content"`
Tags []string `json:"tags,omitempty"`
}
type retireRequest struct {
UID string `json:"uid"`
}
type searchRequest struct {
Tag string `json:"tag,omitempty"`
ContentContains string `json:"content_contains,omitempty"`
CreatedAfterNs int64 `json:"created_after_ns,omitempty"`
CreatedBeforeNs int64 `json:"created_before_ns,omitempty"`
IncludeRetired bool `json:"include_retired,omitempty"`
}
// ── handlers ────────────────────────────────────────────────────
func (h *handlers) handleAdd(w http.ResponseWriter, r *http.Request) {
var req addRequest
if !decodeJSON(w, r, &req) {
return
}
tr, err := h.store.Add(req.Content, req.Tags...)
if writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusCreated, tr)
}
func (h *handlers) handleAddIdempotent(w http.ResponseWriter, r *http.Request) {
var req addIdempotentRequest
if !decodeJSON(w, r, &req) {
return
}
tr, err := h.store.AddIdempotent(req.UID, req.Content, req.Tags...)
if writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusOK, tr)
}
func (h *handlers) handleUpdate(w http.ResponseWriter, r *http.Request) {
var req updateRequest
if !decodeJSON(w, r, &req) {
return
}
if err := h.store.Update(req.UID, req.Content); writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusOK, map[string]any{"status": "updated"})
}
func (h *handlers) handleRevise(w http.ResponseWriter, r *http.Request) {
var req reviseRequest
if !decodeJSON(w, r, &req) {
return
}
tr, err := h.store.Revise(req.PredecessorUID, req.Content, req.Tags...)
if writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusCreated, tr)
}
func (h *handlers) handleRetire(w http.ResponseWriter, r *http.Request) {
var req retireRequest
if !decodeJSON(w, r, &req) {
return
}
if err := h.store.Retire(req.UID); writeStoreError(w, err) {
return
}
w.WriteHeader(http.StatusNoContent)
}
func (h *handlers) handleGet(w http.ResponseWriter, r *http.Request) {
uid := chi.URLParam(r, "uid")
tr, err := h.store.Get(uid)
if writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusOK, tr)
}
func (h *handlers) handleHistory(w http.ResponseWriter, r *http.Request) {
uid := chi.URLParam(r, "uid")
chain, err := h.store.History(uid)
if writeStoreError(w, err) {
return
}
writeJSON(w, http.StatusOK, map[string]any{
"chain": chain,
"length": len(chain),
})
}
func (h *handlers) handleSearch(w http.ResponseWriter, r *http.Request) {
var req searchRequest
if !decodeJSON(w, r, &req) {
return
}
results := h.store.Search(pathway.SearchFilter{
Tag: req.Tag,
ContentContains: req.ContentContains,
CreatedAfterNs: req.CreatedAfterNs,
CreatedBeforeNs: req.CreatedBeforeNs,
IncludeRetired: req.IncludeRetired,
})
writeJSON(w, http.StatusOK, map[string]any{
"results": results,
"count": len(results),
})
}
func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, h.store.Stats())
}
// ── helpers ────────────────────────────────────────────────────
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("pathway write json", "err", err)
}
}
// writeStoreError maps internal/pathway sentinel errors to HTTP
// status codes. Returns true if a response was written (caller
// should return). Returns false on success (caller continues).
func writeStoreError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
switch {
case errors.Is(err, pathway.ErrNotFound):
http.Error(w, err.Error(), http.StatusNotFound)
case errors.Is(err, pathway.ErrPredecessorMissing):
http.Error(w, err.Error(), http.StatusNotFound)
case errors.Is(err, pathway.ErrEmptyUID),
errors.Is(err, pathway.ErrInvalidContent):
http.Error(w, err.Error(), http.StatusBadRequest)
case errors.Is(err, pathway.ErrCycle):
http.Error(w, err.Error(), http.StatusConflict)
default:
slog.Error("pathway store", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
}
return true
}

View File

@ -274,18 +274,21 @@ func (h *handlers) handleAdd(w http.ResponseWriter, r *http.Request) {
return
}
}
// Pre-validation above is exhaustive (id, dim, finite, zero-norm),
// so BatchAdd takes the write-lock once and pushes the whole batch
// into coder/hnsw via one variadic Graph.Add. Saves N-1 lock
// acquisitions per HTTP batch.
batch := make([]vectord.BatchItem, len(req.Items))
for j, it := range req.Items {
batch[j] = vectord.BatchItem{ID: it.ID, Vector: it.Vector, Metadata: it.Metadata}
}
if err := idx.BatchAdd(batch); err != nil {
slog.Error("batch add", "name", name, "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
// Vector-validation errors (NaN/Inf, zero-norm under
// cosine) only surface here; pre-validation is intentional
// minimal scope (id + dim only).
if errors.Is(err, vectord.ErrDimensionMismatch) ||
strings.Contains(err.Error(), "non-finite") ||
strings.Contains(err.Error(), "zero-norm") {
http.Error(w, "items["+strconv.Itoa(j)+"]: "+err.Error(), http.StatusBadRequest)
return
}
slog.Error("add", "name", name, "id", it.ID, "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
}
}
// One save per batch (post-loop), not per item. Per scrum
// O-W4-style discipline: HTTP-batch boundary is the natural unit.

View File

@ -242,123 +242,6 @@ need rotate-without-restart.
---
## ADR-004: Pathway memory data model — Mem0-style versioned traces
**Date:** 2026-04-29
**Decided by:** J + Claude
**Status:** Decided — substrate landing in `internal/pathway/`
**Decision:** Pathway memory is an append-only event log of opaque
traces with Mem0-style semantics: Add / Update / Revise / Retire /
History / Search. Each trace has a UID; revisions chain backward
via `predecessor_uid` so the full history is reconstructible.
Persistence is JSONL append-only with full-replay on load;
corruption recovery skips bad lines without halting startup.
### Operations
| Op | Effect |
|---|---|
| `Add(content, tags...)` | New UID, stored fresh, replay_count=1. |
| `AddIdempotent(uid, content, tags...)` | If UID exists → replay_count++. Else → Add with that UID. |
| `Update(uid, content)` | In-place content replacement (same UID). Bumps `updated_at_ns`. NOT a revision — same trace, new content. |
| `Revise(predecessorUID, content, tags...)` | New UID with `predecessor_uid` set. Old trace stays accessible via History. Failure modes: predecessor missing → error; predecessor retired → still allowed (revisions of retired traces are valid). |
| `Retire(uid)` | Sets `retired=true`. Excluded from `Search` by default; still accessible via `Get` and `History`. |
| `Get(uid)` | Returns the trace (including if retired); error on missing. |
| `History(uid)` | Walks `predecessor_uid` chain backward, returns slice [self, parent, grandparent, ...]. Cycle-detected via visited-set; returns error on cycle (which only happens if persistence file was hand-edited). |
| `Search(filter)` | Returns matching traces. Default excludes retired; opt in via `IncludeRetired: true`. Filters: tag-match, content-substring, time range. |
### Why Mem0-style + Why these specific ops
- **Mem0** (memory pattern from the OpenAI Memories paper / Mem0 lib)
is the canonical "agent memory" interface for the same reason
Markdown is the canonical text format: it's the lowest-common-
denominator that the entire ecosystem assumes. Adopting it lets
agent loops written against any Mem0-aware substrate work here.
- Update vs Revise are deliberately separate. Update is "I noticed
a typo in my note." Revise is "I now believe something different
than I did when I wrote this; preserve the old belief for audit."
Conflating them loses the audit trail.
- Retire vs Delete is deliberate. Retire stops a trace from
surfacing in search but preserves it for history reconstruction.
Delete (which we don't expose) would break references.
### Trace data shape
```go
type Trace struct {
UID string // UUID v4 unless caller provides one
Content json.RawMessage // opaque, schema is caller's contract
PredecessorUID string // empty if root revision
CreatedAtNs int64
UpdatedAtNs int64
Retired bool
ReplayCount int // ≥1 for any stored trace
Tags []string // for Search
}
```
`Content` is opaque JSON (not a struct) so callers can store any
shape — the data model doesn't constrain semantics. Callers add
their own validators on top.
### Persistence
JSONL append-only log under `_pathway/<store_name>.jsonl`. Each
mutation appends one JSON line:
```
{"op":"add", "trace":{...}}
{"op":"update", "uid":"…", "content":"…"}
{"op":"revise", "trace":{…}} # trace.PredecessorUID is set
{"op":"retire", "uid":"…"}
{"op":"replay", "uid":"…"} # idempotent re-add hit
```
On startup, replay every line in order, building in-memory state.
A malformed line logs a warn and is skipped; load continues.
Corruption tolerance is non-optional — partial state is better
than no state for an agent substrate.
Compaction is a future concern. A 100K-trace log replays in
seconds; below that scale, JSONL append is the simplest correct
choice. When compaction lands, the format will be: snapshot file
(full state JSON) + tail JSONL since snapshot. Detect snapshot,
load it, then replay tail.
### Cycle safety
UIDs are generated server-side via `uuid.New()` (existing dep —
catalogd uses it). New UID for every Add and Revise. The data
model itself can't form cycles — every Revise points at an
EXISTING uid, and the new uid didn't exist a moment ago.
History walks defensively anyway: visited-set tracks UIDs seen
this walk; if we encounter a duplicate, return error. Protects
against corruption (manual edit, bug in a future op) without
constraining the happy path.
### Storage location
JSONL file path is configurable per store. Default:
`/var/lib/lakehouse/pathway/<name>.jsonl` for prod; tests use
`t.TempDir()`. Persistence is OPTIONAL — empty path means
in-memory only (matches vectord G1's pattern).
### What this ADR does NOT do
- **No HTTP surface decision.** Whether `cmd/pathwayd` is its own
binary or routes get added to `cmd/vectord` is the next ADR's
concern. The substrate is a pure library either way.
- **No vector index integration.** Pathway traces can carry a
vector embedding in `Content` (caller decides), but this ADR
doesn't define how the substrate integrates with `vectord`'s
HNSW indexes. That's the staffing co-pilot's design problem
when those layers compose.
- **No agent-loop semantics.** "When does an agent ADD vs
REVISE?" is a workflow decision, not a substrate decision.
---
(Future ADRs from ADR-005 onward will be added as the Go
implementation accrues design decisions — e.g. observer fail-safe
semantics, distillation rebuild, gRPC adapter wire format, etc.)
(Future ADRs from ADR-004 onward will be added as the Go
implementation accrues design decisions — e.g. HNSW parameter
choices, pathway-memory hash function, auditor model rotation, etc.)

View File

@ -9,61 +9,6 @@ estimates, library choices, and acceptance gates.
---
## Product vision — what we're actually building
**The Go refactor isn't the goal. The goal is a small-model-driven autonomous pipeline that gets better with each run, with frontier models in audit/oversight and humans triaged in only for the genuinely abstract cases.**
The Rust Lakehouse already has most of the pieces:
- **Pathway memory** (`internal/pathway` in Go, 88 Rust traces preserved) — what we tried, what worked
- **Matrix indexer** (SPEC §3.4) — multi-corpus retrieve+merge that gives the small model the right knowledge slice for *this* task
- **Observer** — watches runs, refines configs, escalates
- **Distillation v1.0.0** (`e7636f2`) — turns successful runs into denser playbooks
- **Auditor cross-lineage fabric** — Kimi/Haiku/Opus oversight on small-model outputs
What the Go refactor is FOR: a second-language pass surfaces architectural weaknesses that Rust hid. The pipeline has to pull together cleanly *as a pipeline* — not as 15 crates that happen to interact.
### The five-loop substrate
1. **Knowledge pathway loop** — pathway memory + matrix indexer give the small model context for the task. Pathway answers "what worked last time?"; matrix answers "what's relevant now?"
2. **Execution loop** — small model runs on focused context. Frontier API calls are reserved for audit/escalation, not the inner loop. Cost + rate limits stay sane.
3. **Observer loop** — watches each run, refines the configs (matrix corpus picks, downgrade gate, prompt mold) that got the model to a good pathway. Outputs new config, not new prompt.
4. **Rating + distillation loop** — successful outcomes get scored and folded back into the playbook substrate. The playbook gets denser; the next run starts smarter.
5. **Drift loop** — quantify when the distilled playbook stops matching reality (codebase changed, contracts shifted, profiles updated). Drift is a *measured* signal, not "hope nothing broke."
### The gate
**The playbook + matrix indexer must produce the results we're looking for.** That's the single load-bearing acceptance criterion. Throughput, scaling, code elegance — all secondary. If a deep-field reality test on the 500K corpus surfaces wrong answers, the loop isn't working and we fix that before adding anything else.
### Observer as system resource (clarified 2026-04-29)
The observer is not a service among services — it's a *system
resource*. Its job is to be objective about the process: watch
everything, record measurements, surface what worked vs what
didn't, feed the KB so the playbook substrate can decide the
right pathway to the correct outcome.
The bare-bones observerd shipped in `bc9ab93` (event ingest +
stats) is the substrate for this. The architectural pattern
that grows it into the full "objective measurement engine" is
the **multi-pass workflow runner** documented in SPEC §3.8 —
inspired by Archon (`/home/profit/external/Archon`) and proven
in the Rust `observer-kb` branch's Python prototypes (`deep_analysis.py`,
`extract_knowledge.py`, `process_knowledge.py`).
The pipeline mode-chain (extract → validator → hallucination →
consensus → redteam → pipeline → render) IS how the observer
makes actionable decisions: each mode pass is a deterministic
measurement; what survives the gauntlet is what feeds the KB.
### Triage / human-in-loop
Most cases are abstract enough that small-model + pathway + matrix can complete them. Some can't — they need a human. The system's job is to **identify which is which** and only escalate the second class. Frontier models partially solve this internally with their thinking loops; we're externalizing it so:
- Small models are swappable (vendor independence)
- Drift is measurable (quantitative signal, not vibes)
- Each loop iteration is auditable (the pathway memory IS the audit trail)
This is what the auditor cross-lineage fabric proves out in Rust — Opus auto-promote on diffs >100k chars is the same pattern: triage by signal, not by guesswork.
## Direction pivot — why this PRD exists
The Rust-first Lakehouse (15 crates, ~24 unmerged commits past PR #11,

View File

@ -28,7 +28,6 @@ Effort scale (one engineer-week = ~40h focused work):
| `queryd` | datafusion, arrow | `cmd/queryd` | **`duckdb/duckdb-go/v2`** (cgo, official) | **HARD** | high — see §3 |
| `ingestd` | csv, json, lopdf, postgres | `cmd/ingestd` | stdlib `encoding/csv`, `encoding/json`, `pdfcpu/pdfcpu`, `jackc/pgx/v5` | **L** | low |
| `vectord` | hora, arrow, hnsw | `cmd/vectord` | `coder/hnsw`, `apache/arrow-go/v18` | **L** | medium — re-validate HNSW recall |
| **matrix indexer** (emergent in Rust — `mode.rs` + `build_*_corpus.ts` + observer `/relevance`) | scripts/build_*_corpus.ts, crates/gateway/src/v1/mode.rs, mcp-server/observer.ts | `internal/matrix/` + gateway routes (`/v1/matrix/*`) | stdlib + vectord client | **L** | medium — see §3.4. Corpus-as-shard composer; relevance filter; strong-model downgrade gate; multi-corpus retrieve+merge. The learning-loop layer that lifts vectord from "static index" to "meta-index that learns from playbooks." |
| `vectord-lance` | lance | **DROPPED** | n/a | n/a | n/a — Parquet+HNSW only |
| `journald` | parquet, arrow | `cmd/journald` | `apache/arrow-go/v18` | **M** | low |
| `aibridge` | reqwest | library | `net/http` + connection pool · `anthropics/anthropic-sdk-go` available for direct Claude calls (currently routed via opencode) | **S** | low |
@ -117,287 +116,6 @@ needs revisiting in Go to confirm the sidecar format we ship.
- G3.2.C — Recall@10 within 2% of Rust baseline on
`lakehouse_arch_v1`
### §3.4 — Matrix indexer (corpus-as-shard composer)
**What it is.** The matrix indexer is the layer above `vectord` that
turns a fleet of single-corpus HNSW indexes into a learning meta-index.
In the Rust system this is emergent — split between corpus builders
(`scripts/build_*_corpus.ts`), the mode runner (`crates/gateway/src/v1/mode.rs`),
the observer relevance endpoint (`mcp-server/observer.ts`), and the
strong-model downgrade gate (`mode.rs::execute`). In Go we name it
explicitly so future sessions don't reduce it to "vectord."
**Why corpus-as-shard, not shard-by-id.** Sharding a single index by
hash(id) is a pure throughput hack with a recall tax. Sharding by
corpus is the existing retrieval shape — `lakehouse_arch_v1`,
`lakehouse_symbols_v1`, `scrum_findings_v1`, `lakehouse_answers_v1`,
`kb_team_runs_v1`, `successful_playbooks_live`, etc. — each with
distinct topology and a distinct retrieval intent. Concurrent Adds
parallelize naturally because they go to different corpora; the
matrix layer's job is to retrieve+merge across them, filter for
relevance, and downgrade composition when strong models prove the
matrix is anti-additive.
**Components to port (in dependency order):**
1. **Corpus builders** — Go equivalents of `scripts/build_*_corpus.ts`.
For each named corpus, a builder that reads source, splits into
chunks per the corpus's schema, embeds via `/v1/embed`, and adds
to a vectord index of the same name. Effort: **M** for the first
builder, **S** for each subsequent.
2. **Multi-corpus retrieve+merge** (`internal/matrix/retrieve.go`) —
given a query and a list of corpus names, search each at top_k=K,
merge by score, return top N globally. Match Rust's pattern:
top_k=6 per corpus, top 8 globally before relevance filter.
3. **Relevance filter** (`internal/matrix/relevance.go`) — port the
threshold-based filter from `mcp-server/observer.ts:/relevance`.
Drops adjacency-pollution chunks that share a corpus with the hit
but aren't actually about the query. `LH_RELEVANCE_FILTER` /
`LH_RELEVANCE_THRESHOLD` env knobs preserved.
4. **Strong-model downgrade gate** (`internal/matrix/downgrade.go`) —
port `is_weak_model` + the `codereview_lakehouse → codereview_isolation`
flip from `mode.rs::execute`. Pass5 proved composed corpora lose
5/5 vs isolation on grok-4.1-fast (p=0.031); the gate is
load-bearing for paid-model retrieval quality.
5. **Learning-loop integration** — write outcomes back to a
playbook-memory corpus (probably `lakehouse_answers_v1` analogue).
This is what makes the matrix INDEX a learning system rather than
static retrieval. Per `feedback_meta_index_vision.md`: this is the
north star, not the data structure.
**Gateway routes:** `/v1/matrix/search` (multi-corpus retrieve+merge),
`/v1/matrix/corpora` (list + metadata), `/v1/matrix/relevance` (filter
endpoint, used by both internal callers and external tooling).
**Acceptance gates:**
- G3.4.A — `/v1/matrix/search` against ≥3 corpora returns merged top-N
with corpus attribution per result.
- G3.4.B — Relevance filter drops at least the threshold-margin chunks
on a known adjacency-pollution test case.
- G3.4.C — Strong-model downgrade gate flips composed→isolation when
the model is non-weak; bypassed when caller sets `force_mode`.
- G3.4.D — Concurrent Adds across N=4 corpora parallelize (no shared
write-lock); Add throughput scales near-linearly with corpus count.
**Persistence:** each corpus's vectord index persists via the existing
G1P LHV1 format. The matrix layer is stateless above that — corpus
list lives in catalog, retrieval params in config.
**Why this is its own §3.x:** in Rust the matrix indexer was emergent
and got reduced to "we have vectord" in earlier port-planning. The
SPEC names it explicitly so the port preserves the multi-corpus
retrieval shape AND the learning loop, not just the HNSW substrate.
### §3.5 — Drift quantification (loop 5 of the PRD)
**What it is.** PRD names "drift" as the 5th loop: quantify when
historical decisions stop matching current reality. Distinct from
the rating+distillation loop because drift is MEASUREMENT, not
LEARNING. The learning loop says "this match worked, remember it";
the drift loop says "this 4-month-old playbook entry — does it
still match what the substrate would surface today?"
**What's shipped (commit `be65f85`):**
- SCORER drift: re-runs current `distillation.ScoreRecord` over
historical (EvidenceRecord, persisted_category) pairs and
reports mismatches + a sorted shift matrix
- `internal/drift/drift.go` — pure-function `ComputeScorerDrift`
- 6 unit tests covering no-drift, shift detection, multi-shift
sorted-by-count, includeEntries flag, empty input, scorer-version
stamping
**Future drift shapes (not shipped):**
- PLAYBOOK drift: re-run playbook queries through current
matrix-search; recorded answer not in top-K = drift
- EMBEDDING drift: KS-test on vector distribution at T1 vs T2
- AUDIT BASELINE drift: matches Rust `audit_baselines.jsonl`
longitudinal signal
**Acceptance gates:**
- G3.5.A — A scorer-version bump triggers a non-zero `Drifted` count
on a corpus of historical ScoredRuns where the new logic produces
different categories than the persisted ones.
- G3.5.B — `ScorerDriftReport.ShiftMatrix` is deterministic-ordered
(count desc, ties broken alphabetically) so JSON output is stable
across runs.
### §3.6 — Staffing-side structured filter
**What it is.** Reality tests on the candidates + workers corpora
(commits `0d1553c`, `a97881d`) surfaced that pure semantic retrieval
can't gate by location/status/availability — the matrix indexer
returns Production Workers for a Forklift+OSHA-30 query because
nomic-embed-text's geometry doesn't separate the role labels well.
Structured filtering is the addressable piece: pre-filter the
candidate set on metadata fields BEFORE semantic ranking.
**What's shipped (commit `b199093`):**
- `SearchRequest.MetadataFilter``map[string]any` of metadata
field → expected value (single value or list-of-values for OR
semantics within a key, AND across keys)
- Post-retrieval filter applied before top-K truncation in
`internal/matrix/retrieve.go`
- `SearchResponse.MetadataFilterDropped` for telemetry on filter
aggressiveness
- 7 unit tests covering nil filter, missing metadata, exact match,
AND across keys, OR within list, bool match, malformed JSON
**Deferred:**
- Pre-retrieval SQL gate via `queryd` (the actual hybrid). The
post-retrieval filter is an MVP that helps when the candidate
set is mostly relevant; for aggressive filters that drop most
results, a SQL pre-filter into matrix retrieval would surface
the right candidates with less wasted embedding work.
- Filter language richer than equality (e.g. range, prefix, regex).
**Acceptance gates:**
- G3.6.A — `MetadataFilter: {"state": "IL"}` against a mixed-state
corpus drops every non-IL result; `MetadataFilterDropped` reports
the count.
- G3.6.B — List filter `{"state": ["IL", "WI"]}` keeps both states,
drops the rest (OR within key).
- G3.6.C — Multi-key filter is AND: a result missing any key is
dropped, no exception.
### §3.7 — Operational rating wiring
**What it is.** PRD loop 4 (rating + distillation) needs real
inflows to be a learning system rather than a substrate. The
playbook-record endpoint (`06e7152`) takes one (query, answer,
score) per call; productizing it into actual signal sources is what
makes the system get smarter with use.
**What's shipped (commit `6392772`):**
- `POST /v1/matrix/playbooks/bulk` — bulk-record N successes;
per-entry success/failure response so callers can see which of
a 4,701-row historical placement import succeeded vs which
failed validation.
- Single-record path from `06e7152` unchanged.
**Deferred:**
- UI shim for click-tracking (no Go demo UI yet — the Bun demo at
`devop.live/lakehouse/` is still serving the public surface).
When the Go UI lands or a feedback API is added to the Bun UI,
every coordinator click → bulk-batched POST → playbook entry.
- Negative feedback (this match didn't work). Currently only
positive scores are recorded; a rejection signal would help the
learning loop avoid pushing bad matches.
- Time-decay on playbook scores so stale recommendations attenuate.
**Acceptance gates:**
- G3.7.A — Bulk POST of N entries returns `{recorded, failed,
results[]}` with per-entry IDs/errors, no single-entry failure
aborting the batch.
- G3.7.B — Each recorded entry surfaces in `/v1/matrix/search` with
`use_playbook=true` after a re-query.
### §3.8 — Observer-KB workflow runner (Archon-style multi-pass)
**What it is.** The architectural pattern documented in the Rust
`observer-kb` branch (10 commits ahead of main, never merged) and
proven by `/home/profit/external/Archon`'s workflow engine. Multiple
mode passes processing data, with each pass an objective measurement
that contributes to the KB:
```
Raw data
↓ Mode: EXTRACT structured facts/entities/relationships
↓ Mode: VALIDATOR fact-check, confidence 1-10
↓ Mode: HALLUCINATION verify each claim, flag likely fabrications
↓ Mode: CONSENSUS multiple passes until extraction converges
↓ Mode: REDTEAM attack what survived, patch what fails
↓ Mode: PIPELINE clean → Q&A structure → topic group → rank
↓ RENDER curated doc anchored on questions
```
This is the *orchestrator* missing from §3.4 components 1-5: each
SPEC §3.4 piece (relevance, downgrade, scorer, drift) is a "mode";
what's missing is the workflow engine that chains them.
**Why it matters.** Per the PRD's product vision: the observer
should make actionable decisions based on watching what's
successful. The workflow runner is how observers compose modes
into multi-pass pipelines that score outcomes rigorously enough
to feed the KB and inform the playbook substrate.
**Reference materials on the system:**
- `/home/profit/lakehouse/.archon/workflows/lakehouse-architect-review.yaml`
(committed `69919d9` in main) — proves Archon-via-Lakehouse
works with a 3-node `shape → weakness → improvement` workflow
- `/home/profit/external/Archon` — the upstream workflow engine
(cloned 2026-04-26); `packages/providers/src/community/pi/provider.ts`
has the local Lakehouse-routing mod committed locally as
`3f2afc8` (not pushed to upstream `coleam00/Archon`)
- Rust `observer-kb` branch (10 commits, +4338/-55506 LoC) —
`apps/observer-kb/docs/PRD.md` documents the multi-pass
architecture; `scripts/{deep_analysis,extract_knowledge,process_knowledge}.py`
are the Python prototypes that proved it on real ChatGPT/Claude
PDF data (496 topics, 300 decisions, 100 insights extracted)
**Components to port (in dependency order):**
1. **Workflow definition** (`internal/workflow/types.go`) — YAML
schema matching Archon's shape: `name`, `description`, `provider`,
`model`, list of `nodes` each with `id`, `prompt`, `allowed_tools`,
`effort`, `idle_timeout`, `depends_on`. The depends_on edges form
a DAG; the runner resolves topologically.
2. **Node executor** (`internal/workflow/runner.go`) — given a
workflow and a starting context, walks the DAG, executes each
node by dispatching to the configured backend (matrix.Search,
distillation.ScoreRecord, drift.ComputeScorerDrift, or a generic
prompt-against-LLM via gateway `/v1/chat`), captures per-node
output, makes it available as `$<node_id>.output` in subsequent
nodes.
3. **Provenance recording** — every node execution lands an
ObservedOp (via the observerd substrate from `bc9ab93`) with
`source: "workflow"`, the workflow name + node ID, input/output
summaries, and timing. The ring buffer + JSONL log become the
substrate for the rating+distillation loop's KB feed.
4. **Mode catalog** (`internal/workflow/modes.go`) — registry of
the modes the runner can dispatch to. Each mode is a Go function
matching a uniform `func(ctx, input map[string]any) (map[string]any, error)`
signature so workflows can compose them. Initial modes from
§3.4: `matrix.search`, `matrix.relevance`, `matrix.downgrade`,
`playbook.record`, `playbook.lookup`, `distillation.score`,
`drift.scorer`. Plus `llm.chat` for free-form mode prompts.
5. **HTTP surface**`POST /v1/observer/workflow/run` accepts a
workflow YAML body + a starting context; returns the per-node
results + the chain of ObservedOps generated. `GET
/v1/observer/workflow/list` lists workflows in a known directory
for operator discoverability.
**Why integrate into observerd, not a new service.** The observer
is the system resource that watches and records. Workflows ARE
observation patterns — multi-step processes whose every step is
recorded. Putting the runner inside observerd keeps the
"measurement → KB feed" wiring tight; a separate service would
re-implement the recording layer.
**Acceptance gates:**
- G3.8.A — Load a workflow YAML matching the Archon `lakehouse-architect-review.yaml`
shape; runner executes the 3-node DAG topologically.
- G3.8.B — Each node execution lands an ObservedOp with
`source: "workflow"` and the node's input/output. Stats endpoint
shows the workflow ops.
- G3.8.C — A node referencing `$<prior_node>.output` in its prompt
resolves correctly; missing reference is a clear error not a
silent empty string.
- G3.8.D — Mode catalog dispatches `matrix.search` invocation to
the matrixd backend without going through HTTP (in-process
function call when matrixd is co-resident).
**Status:** PORT TARGET, not yet started. SPEC commits the design;
implementation is its own wave (estimated **L** effort given the
DAG runner + mode dispatch + provenance recording).
### §3.3 — UI (HTMX)
**Approach:** server-rendered Go templates using `html/template`,

View File

@ -1,437 +0,0 @@
// Package corpusingest is the generalized text→vector ingestion
// pipeline. Originally extracted from scripts/staffing_500k/main.go;
// reusable by any corpus-builder script that needs to embed a stream
// of (id, text, metadata) rows and push them into a vectord index.
//
// Design: per-corpus Source impls own the parsing/column-mapping;
// this package owns the parallel-embed dispatcher, batching, vectord
// index lifecycle, and progress reporting. Adding a corpus is one
// Source struct + one main.go that calls Run; no copy-pasted pipeline.
//
// Per docs/SPEC.md §3.4 component 1 (corpus builders): this is the
// substrate the rest of the matrix indexer's value depends on. Get
// the pipeline right, then iterate on builders.
package corpusingest
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"sync"
"sync/atomic"
"time"
)
// Row is one logical document in a corpus. Metadata may be any
// JSON-marshalable value (struct, map, json.RawMessage); the library
// marshals once per row before pushing to vectord.
type Row struct {
ID string
Text string
Metadata any
}
// Source produces a stream of rows. Source lifecycle (open/close) is
// owned by the caller; this package only consumes Next() until io.EOF.
type Source interface {
// Next returns the next row or io.EOF when the source is drained.
// Other errors cause Run to abort with the error wrapped.
Next() (Row, error)
}
// Config drives one Run. Defaults match the Ollama-on-A4000 sweet
// spot from the 500K validation; override per-deployment if needed.
type Config struct {
GatewayURL string // default "http://127.0.0.1:3110"
IndexName string // required
Dimension int // required, must match the embed model output
Distance string // default "cosine"
EmbedModel string // optional; empty = embedd's default
EmbedBatch int // default 16, texts per /v1/embed call
EmbedWorkers int // default 8, parallel embed goroutines
AddBatch int // default 1000, items per /v1/vectors/index/add call
Limit int // 0 = no limit (process all rows)
DropExisting bool // true = DELETE index first; false = idempotent reuse
HTTPClient *http.Client
// LogProgress is the interval between progress logs. 0 disables.
LogProgress time.Duration
}
// Stats reports run outcomes. FailedBatches counts embed-or-add
// batches that errored out and were skipped (partial-failure
// semantics). When non-zero, Run returns ErrPartialFailure so
// callers can't accidentally treat "1 of 313 batches succeeded"
// as a successful run.
type Stats struct {
Scanned int64
Embedded int64
Added int64
Wall time.Duration
FailedBatches int64
}
// ErrPartialFailure signals that one or more batches errored during
// Run. Stats.FailedBatches has the count; the caller decides
// whether to retry / log / abort. Per 2026-04-29 cross-lineage
// scrum (Opus WARN): the original behavior returned nil even when
// 100% of batches failed silently, making "embedded=0/scanned=N"
// look like an empty corpus rather than a broken pipeline.
var ErrPartialFailure = errors.New("corpusingest: one or more batches failed")
// Run executes the ingest pipeline. Returns on source EOF after all
// in-flight jobs drain, on context cancellation, or on the first
// embed/add error (errors are logged via slog and the pipeline
// continues — partial-failure semantics; see comment inside).
func Run(ctx context.Context, cfg Config, src Source) (Stats, error) {
cfg = applyDefaults(cfg)
if err := validateConfig(cfg); err != nil {
return Stats{}, err
}
t0 := time.Now()
if err := prepareIndex(ctx, cfg); err != nil {
return Stats{}, fmt.Errorf("prepare index: %w", err)
}
jobs := make(chan job, cfg.EmbedWorkers*2)
var (
totalEmbedded int64
totalAdded int64
failedBatches int64
)
var wg sync.WaitGroup
for i := 0; i < cfg.EmbedWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := range jobs {
vecs, err := embedBatch(ctx, cfg, j.texts)
if err != nil {
// Partial-failure semantics: log + continue. A wedged
// embed batch shouldn't kill 8 workers' worth of
// progress; Run returns ErrPartialFailure on any
// failure so callers can't miss the signal.
slog.Warn("corpusingest: embed batch failed",
"index", cfg.IndexName, "items", len(j.texts), "err", err)
atomic.AddInt64(&failedBatches, 1)
continue
}
// Defense against a degraded embed backend that returns
// fewer vectors than texts: vecs[i] would panic in
// addBatch otherwise. Caught by ContextCancel unit test.
if len(vecs) != len(j.ids) {
slog.Warn("corpusingest: embed returned wrong count",
"index", cfg.IndexName, "want", len(j.ids), "got", len(vecs))
atomic.AddInt64(&failedBatches, 1)
continue
}
atomic.AddInt64(&totalEmbedded, int64(len(vecs)))
if err := addBatch(ctx, cfg, j.ids, vecs, j.metas); err != nil {
slog.Warn("corpusingest: add batch failed",
"index", cfg.IndexName, "items", len(j.ids), "err", err)
atomic.AddInt64(&failedBatches, 1)
continue
}
atomic.AddInt64(&totalAdded, int64(len(j.ids)))
}
}()
}
stopProgress := make(chan struct{})
progressDone := make(chan struct{})
if cfg.LogProgress > 0 {
go func() {
defer close(progressDone)
ticker := time.NewTicker(cfg.LogProgress)
defer ticker.Stop()
for {
select {
case <-ticker.C:
slog.Info("corpusingest: progress",
"index", cfg.IndexName,
"embedded", atomic.LoadInt64(&totalEmbedded),
"added", atomic.LoadInt64(&totalAdded))
case <-stopProgress:
return
case <-ctx.Done():
return
}
}
}()
} else {
close(progressDone)
}
scanned, err := drainSource(ctx, cfg, src, jobs)
close(jobs)
wg.Wait()
close(stopProgress) // tell the progress goroutine to exit; would otherwise hang Run forever (caught by candidates e2e 2026-04-29)
<-progressDone
stats := Stats{
Scanned: scanned,
Embedded: atomic.LoadInt64(&totalEmbedded),
Added: atomic.LoadInt64(&totalAdded),
Wall: time.Since(t0),
FailedBatches: atomic.LoadInt64(&failedBatches),
}
if err != nil {
return stats, err
}
if stats.FailedBatches > 0 {
return stats, fmt.Errorf("%w: %d batches failed (embedded=%d added=%d scanned=%d)",
ErrPartialFailure, stats.FailedBatches, stats.Embedded, stats.Added, stats.Scanned)
}
return stats, nil
}
// drainSource pulls rows, batches them, and dispatches into jobs.
// Returns when source EOFs, ctx cancels, or limit is hit.
func drainSource(ctx context.Context, cfg Config, src Source, jobs chan<- job) (int64, error) {
curIDs := make([]string, 0, cfg.EmbedBatch)
curTexts := make([]string, 0, cfg.EmbedBatch)
curMetas := make([]json.RawMessage, 0, cfg.EmbedBatch)
flush := func() {
if len(curIDs) == 0 {
return
}
jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
curIDs = make([]string, 0, cfg.EmbedBatch)
curTexts = make([]string, 0, cfg.EmbedBatch)
curMetas = make([]json.RawMessage, 0, cfg.EmbedBatch)
}
var scanned int64
for {
if ctx.Err() != nil {
flush()
return scanned, ctx.Err()
}
row, err := src.Next()
if err == io.EOF {
flush()
return scanned, nil
}
if err != nil {
flush()
return scanned, fmt.Errorf("source row %d: %w", scanned, err)
}
if row.ID == "" {
return scanned, fmt.Errorf("source row %d: empty id", scanned)
}
// Empty Text would 400 at embedd; skip-with-warn rather than
// abort the whole run — a stray empty row shouldn't kill 500K.
if row.Text == "" {
slog.Warn("corpusingest: skipping row with empty text",
"index", cfg.IndexName, "id", row.ID)
scanned++
continue
}
meta, err := marshalMeta(row.Metadata)
if err != nil {
return scanned, fmt.Errorf("row %s: marshal metadata: %w", row.ID, err)
}
curIDs = append(curIDs, row.ID)
curTexts = append(curTexts, row.Text)
curMetas = append(curMetas, meta)
scanned++
if len(curIDs) >= cfg.EmbedBatch {
flush()
}
if cfg.Limit > 0 && scanned >= int64(cfg.Limit) {
flush()
return scanned, nil
}
}
}
// job is the unit of work between drainSource and the embed workers.
// Internal type; kept small so the channel buffer doesn't bloat.
type job struct {
ids []string
texts []string
metas []json.RawMessage
}
func marshalMeta(v any) (json.RawMessage, error) {
if v == nil {
return nil, nil
}
if rm, ok := v.(json.RawMessage); ok {
return rm, nil
}
return json.Marshal(v)
}
// prepareIndex creates the vectord index, optionally dropping a
// preexisting one. Idempotent on matching params: 409 from create is
// treated as "already exists, reuse." If DropExisting is set, DELETE
// fires first to give a clean slate.
func prepareIndex(ctx context.Context, cfg Config) error {
if cfg.DropExisting {
if err := httpDelete(ctx, cfg.HTTPClient,
cfg.GatewayURL+"/v1/vectors/index/"+cfg.IndexName); err != nil {
// 404 (not found) is fine — drop-existing is idempotent.
slog.Debug("corpusingest: drop existing", "err", err)
}
}
body, _ := json.Marshal(map[string]any{
"name": cfg.IndexName,
"dimension": cfg.Dimension,
"distance": cfg.Distance,
})
code, msg, err := httpPost(ctx, cfg.HTTPClient, cfg.GatewayURL+"/v1/vectors/index", body)
if err != nil {
return err
}
switch code {
case http.StatusCreated:
slog.Info("corpusingest: created index",
"name", cfg.IndexName, "dim", cfg.Dimension, "distance", cfg.Distance)
case http.StatusConflict:
// Already exists — vectord didn't change params on conflict.
// Caller's responsibility to ensure existing dim/distance match.
slog.Info("corpusingest: index already exists, reusing", "name", cfg.IndexName)
default:
return fmt.Errorf("create index %d: %s", code, msg)
}
return nil
}
func embedBatch(ctx context.Context, cfg Config, texts []string) ([][]float32, error) {
body := map[string]any{"texts": texts}
if cfg.EmbedModel != "" {
body["model"] = cfg.EmbedModel
}
bs, _ := json.Marshal(body)
code, msg, raw, err := httpPostRaw(ctx, cfg.HTTPClient, cfg.GatewayURL+"/v1/embed", bs)
if err != nil {
return nil, err
}
if code != http.StatusOK {
return nil, fmt.Errorf("embed status %d: %s", code, msg)
}
var er struct {
Vectors [][]float32 `json:"vectors"`
}
if err := json.Unmarshal(raw, &er); err != nil {
return nil, fmt.Errorf("embed decode: %w", err)
}
return er.Vectors, nil
}
func addBatch(ctx context.Context, cfg Config, ids []string, vecs [][]float32, metas []json.RawMessage) error {
type addItem struct {
ID string `json:"id"`
Vector []float32 `json:"vector"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
// Add-batch may exceed cfg.AddBatch when EmbedBatch divides into it
// non-evenly; vectord handles that fine. Keep one HTTP per job.
items := make([]addItem, len(ids))
for i := range ids {
items[i] = addItem{ID: ids[i], Vector: vecs[i], Metadata: metas[i]}
}
bs, _ := json.Marshal(map[string]any{"items": items})
code, msg, err := httpPost(ctx, cfg.HTTPClient,
cfg.GatewayURL+"/v1/vectors/index/"+cfg.IndexName+"/add", bs)
if err != nil {
return err
}
if code != http.StatusOK {
return fmt.Errorf("add status %d: %s", code, msg)
}
return nil
}
// ── HTTP helpers — small, no extra deps ─────────────────────────
func httpPost(ctx context.Context, hc *http.Client, url string, body []byte) (int, string, error) {
code, msg, _, err := httpPostRaw(ctx, hc, url, body)
return code, msg, err
}
func httpPostRaw(ctx context.Context, hc *http.Client, url string, body []byte) (int, string, []byte, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return 0, "", nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := hc.Do(req)
if err != nil {
return 0, "", nil, err
}
defer resp.Body.Close()
raw, err := io.ReadAll(resp.Body)
if err != nil {
return resp.StatusCode, "", nil, err
}
preview := raw
if len(preview) > 256 {
preview = preview[:256]
}
return resp.StatusCode, string(preview), raw, nil
}
func httpDelete(ctx context.Context, hc *http.Client, url string) error {
req, err := http.NewRequestWithContext(ctx, http.MethodDelete, url, nil)
if err != nil {
return err
}
resp, err := hc.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
io.Copy(io.Discard, resp.Body)
if resp.StatusCode >= 400 && resp.StatusCode != http.StatusNotFound {
return fmt.Errorf("delete status %d", resp.StatusCode)
}
return nil
}
// ── config validation + defaults ────────────────────────────────
func applyDefaults(cfg Config) Config {
if cfg.GatewayURL == "" {
cfg.GatewayURL = "http://127.0.0.1:3110"
}
if cfg.Distance == "" {
cfg.Distance = "cosine"
}
if cfg.EmbedBatch <= 0 {
cfg.EmbedBatch = 16
}
if cfg.EmbedWorkers <= 0 {
cfg.EmbedWorkers = 8
}
if cfg.AddBatch <= 0 {
cfg.AddBatch = 1000
}
if cfg.HTTPClient == nil {
cfg.HTTPClient = &http.Client{Timeout: 5 * time.Minute}
}
if cfg.LogProgress < 0 {
cfg.LogProgress = 0
}
return cfg
}
func validateConfig(cfg Config) error {
if cfg.IndexName == "" {
return errors.New("corpusingest: IndexName is required")
}
if cfg.Dimension <= 0 {
return errors.New("corpusingest: Dimension must be > 0")
}
return nil
}

View File

@ -1,455 +0,0 @@
package corpusingest
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
"time"
)
// fakeGateway records the embed + add calls corpusingest fires and
// returns canned responses. The whole point of the unit test is to
// validate the pipeline shape (request payloads, batching, stats)
// without needing live embedd/vectord.
type fakeGateway struct {
mu sync.Mutex
embedCalls int
embedTexts [][]string // texts per call
addCalls int
addItems [][]addItem // items per call
createCalled bool
deleteCalled bool
indexConflict bool // simulate "index already exists" → 409
embedDimension int
}
type addItem struct {
ID string `json:"id"`
Vector []float32 `json:"vector"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
func newFakeGateway(dim int) *fakeGateway {
return &fakeGateway{embedDimension: dim}
}
func (f *fakeGateway) handler() http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
f.mu.Lock()
f.createCalled = true
conflict := f.indexConflict
f.mu.Unlock()
if conflict {
http.Error(w, "exists", http.StatusConflict)
return
}
w.WriteHeader(http.StatusCreated)
})
mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
var req struct {
Texts []string `json:"texts"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
// Synthesize deterministic vectors: vector[i] = float32(i+1).
vecs := make([][]float32, len(req.Texts))
for i := range vecs {
v := make([]float32, f.embedDimension)
for j := range v {
v[j] = float32(i + j + 1)
}
vecs[i] = v
}
f.mu.Lock()
f.embedCalls++
// Copy because we'll release the slice after returning.
texts := append([]string(nil), req.Texts...)
f.embedTexts = append(f.embedTexts, texts)
f.mu.Unlock()
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{
"vectors": vecs,
"dimension": f.embedDimension,
"model": "fake-embed",
})
})
mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
// /v1/vectors/index/{name}/add
if !strings.HasSuffix(r.URL.Path, "/add") {
if r.Method == http.MethodDelete {
f.mu.Lock()
f.deleteCalled = true
f.mu.Unlock()
w.WriteHeader(http.StatusNoContent)
return
}
http.Error(w, "unhandled "+r.URL.Path, http.StatusNotFound)
return
}
var req struct {
Items []addItem `json:"items"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
f.mu.Lock()
f.addCalls++
f.addItems = append(f.addItems, append([]addItem(nil), req.Items...))
f.mu.Unlock()
_, _ = io.WriteString(w, `{"added":`+fmt.Sprint(len(req.Items))+`}`)
})
return mux
}
// staticSource yields a fixed slice of rows.
type staticSource struct {
rows []Row
i int
}
func (s *staticSource) Next() (Row, error) {
if s.i >= len(s.rows) {
return Row{}, io.EOF
}
r := s.rows[s.i]
s.i++
return r, nil
}
func TestRun_PipelineShapeAndStats(t *testing.T) {
const dim = 4
fg := newFakeGateway(dim)
srv := httptest.NewServer(fg.handler())
defer srv.Close()
rows := make([]Row, 50)
for i := range rows {
rows[i] = Row{
ID: fmt.Sprintf("r-%03d", i),
Text: fmt.Sprintf("row %d text", i),
Metadata: map[string]any{"i": i, "kind": "test"},
}
}
stats, err := Run(context.Background(), Config{
GatewayURL: srv.URL,
IndexName: "test_corpus",
Dimension: dim,
Distance: "cosine",
EmbedBatch: 16,
EmbedWorkers: 4,
HTTPClient: srv.Client(),
LogProgress: 0,
}, &staticSource{rows: rows})
if err != nil {
t.Fatalf("Run: %v", err)
}
if stats.Scanned != 50 {
t.Errorf("Scanned: want 50, got %d", stats.Scanned)
}
if stats.Embedded != 50 {
t.Errorf("Embedded: want 50, got %d", stats.Embedded)
}
if stats.Added != 50 {
t.Errorf("Added: want 50, got %d", stats.Added)
}
if !fg.createCalled {
t.Error("expected create-index to be called")
}
// 50 rows / 16 batch = ceil(50/16) = 4 batches → 4 embed calls + 4 add calls
if fg.embedCalls != 4 {
t.Errorf("embedCalls: want 4 (50 rows / 16 batch), got %d", fg.embedCalls)
}
if fg.addCalls != 4 {
t.Errorf("addCalls: want 4, got %d", fg.addCalls)
}
// Sum of texts across embed calls must be 50, and IDs across add
// calls must be every r-NNN exactly once.
totalTexts := 0
for _, ts := range fg.embedTexts {
totalTexts += len(ts)
}
if totalTexts != 50 {
t.Errorf("total embedded texts: want 50, got %d", totalTexts)
}
seen := make(map[string]bool)
for _, items := range fg.addItems {
for _, it := range items {
if seen[it.ID] {
t.Errorf("duplicate id in add stream: %s", it.ID)
}
seen[it.ID] = true
if len(it.Vector) != dim {
t.Errorf("vector dim: want %d, got %d", dim, len(it.Vector))
}
}
}
if len(seen) != 50 {
t.Errorf("unique ids added: want 50, got %d", len(seen))
}
}
func TestRun_DropExistingFiresDelete(t *testing.T) {
fg := newFakeGateway(4)
srv := httptest.NewServer(fg.handler())
defer srv.Close()
_, err := Run(context.Background(), Config{
GatewayURL: srv.URL,
IndexName: "drops_first",
Dimension: 4,
DropExisting: true,
HTTPClient: srv.Client(),
}, &staticSource{rows: []Row{{ID: "x", Text: "y", Metadata: nil}}})
if err != nil {
t.Fatalf("Run: %v", err)
}
if !fg.deleteCalled {
t.Error("expected delete-index to fire when DropExisting=true")
}
}
func TestRun_IndexAlreadyExistsIsReused(t *testing.T) {
fg := newFakeGateway(4)
fg.indexConflict = true // first POST /v1/vectors/index → 409
srv := httptest.NewServer(fg.handler())
defer srv.Close()
stats, err := Run(context.Background(), Config{
GatewayURL: srv.URL,
IndexName: "exists_already",
Dimension: 4,
HTTPClient: srv.Client(),
EmbedWorkers: 1,
}, &staticSource{rows: []Row{{ID: "x", Text: "y", Metadata: nil}}})
if err != nil {
t.Fatalf("Run with existing index should succeed: %v", err)
}
if stats.Added != 1 {
t.Errorf("Added: want 1, got %d", stats.Added)
}
}
func TestRun_LimitStopsEarly(t *testing.T) {
fg := newFakeGateway(4)
srv := httptest.NewServer(fg.handler())
defer srv.Close()
rows := make([]Row, 100)
for i := range rows {
rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "t", Metadata: nil}
}
stats, err := Run(context.Background(), Config{
GatewayURL: srv.URL,
IndexName: "limited",
Dimension: 4,
Limit: 25,
EmbedBatch: 8,
EmbedWorkers: 2,
HTTPClient: srv.Client(),
}, &staticSource{rows: rows})
if err != nil {
t.Fatalf("Run: %v", err)
}
if stats.Scanned != 25 {
t.Errorf("Scanned: want 25 (limit), got %d", stats.Scanned)
}
}
func TestRun_EmptyTextSkipped(t *testing.T) {
fg := newFakeGateway(4)
srv := httptest.NewServer(fg.handler())
defer srv.Close()
rows := []Row{
{ID: "a", Text: "real text", Metadata: nil},
{ID: "b", Text: "", Metadata: nil}, // skipped
{ID: "c", Text: "more text", Metadata: nil},
}
stats, err := Run(context.Background(), Config{
GatewayURL: srv.URL, IndexName: "skip", Dimension: 4,
HTTPClient: srv.Client(),
}, &staticSource{rows: rows})
if err != nil {
t.Fatalf("Run: %v", err)
}
if stats.Scanned != 3 {
t.Errorf("Scanned: want 3 (b is skipped but counted as scanned), got %d", stats.Scanned)
}
if stats.Added != 2 {
t.Errorf("Added: want 2 (b excluded from embed), got %d", stats.Added)
}
}
// TestRun_ProgressLoggerExits guards the bug caught 2026-04-29 in
// the candidates e2e: when LogProgress > 0, the progress goroutine's
// only exit was ctx.Done(). With context.Background() in the
// production driver, Run hung forever after the pipeline finished.
// This test bounds Run's wall to a few hundred ms — if it regresses,
// the test deadline kicks in.
func TestRun_ProgressLoggerExits(t *testing.T) {
fg := newFakeGateway(4)
srv := httptest.NewServer(fg.handler())
defer srv.Close()
rows := []Row{
{ID: "a", Text: "x", Metadata: nil},
{ID: "b", Text: "y", Metadata: nil},
}
done := make(chan error, 1)
go func() {
_, err := Run(context.Background(), Config{
GatewayURL: srv.URL,
IndexName: "progress_test",
Dimension: 4,
HTTPClient: srv.Client(),
LogProgress: 50 * time.Millisecond,
}, &staticSource{rows: rows})
done <- err
}()
select {
case err := <-done:
if err != nil {
t.Fatalf("Run: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("Run did not return within 2s — progress goroutine likely hanging")
}
}
// TestRun_NonzeroFailedBatchesReturnsError guards the 2026-04-29
// scrum WARN: original behavior returned nil even when 100% of
// batches failed, making "embedded=0/scanned=N" look like an empty
// corpus rather than a broken pipeline.
func TestRun_NonzeroFailedBatchesReturnsError(t *testing.T) {
// Fake gateway that fails every embed call.
mux := http.NewServeMux()
mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusCreated)
})
mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "embed failure injected", http.StatusBadGateway)
})
mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
// shouldn't reach here since embed fails first
http.Error(w, "should not be called", http.StatusInternalServerError)
})
srv := httptest.NewServer(mux)
defer srv.Close()
rows := make([]Row, 5)
for i := range rows {
rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "x"}
}
stats, err := Run(context.Background(), Config{
GatewayURL: srv.URL, IndexName: "fail_only", Dimension: 4,
EmbedBatch: 1, EmbedWorkers: 1, HTTPClient: srv.Client(),
}, &staticSource{rows: rows})
if !errors.Is(err, ErrPartialFailure) {
t.Errorf("want ErrPartialFailure, got %v", err)
}
if stats.FailedBatches == 0 {
t.Error("FailedBatches should be > 0 when embeds fail")
}
if stats.Added != 0 {
t.Errorf("Added: want 0 (all failed), got %d", stats.Added)
}
}
func TestRun_RequiresIndexName(t *testing.T) {
_, err := Run(context.Background(), Config{Dimension: 4},
&staticSource{rows: nil})
if err == nil || !strings.Contains(err.Error(), "IndexName") {
t.Errorf("want IndexName-required error, got %v", err)
}
}
func TestRun_RequiresDimension(t *testing.T) {
_, err := Run(context.Background(), Config{IndexName: "x"},
&staticSource{rows: nil})
if err == nil || !strings.Contains(err.Error(), "Dimension") {
t.Errorf("want Dimension-required error, got %v", err)
}
}
// TestRun_ContextCancel verifies the pipeline drains cleanly when
// ctx is cancelled mid-run. Source returns rows fast enough that
// without ctx the run would complete; cancelling early should stop
// well before all 1000 rows are processed.
func TestRun_ContextCancel(t *testing.T) {
fg := newFakeGateway(4)
// Slow embed handler: each call sleeps 50ms.
mux := http.NewServeMux()
mux.HandleFunc("/v1/vectors/index", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusCreated)
})
mux.HandleFunc("/v1/embed", func(w http.ResponseWriter, r *http.Request) {
var req struct {
Texts []string `json:"texts"`
}
_ = json.NewDecoder(r.Body).Decode(&req)
// Simulate slow-but-valid backend so we test ctx cancel, not
// degraded-payload handling (that's covered in production by
// the len-mismatch guard in Run's worker).
time.Sleep(50 * time.Millisecond)
_ = fg
vecs := make([][]float32, len(req.Texts))
for i := range vecs {
vecs[i] = []float32{1, 2, 3, 4}
}
_ = json.NewEncoder(w).Encode(map[string]any{
"vectors": vecs,
"dimension": 4,
"model": "x",
})
})
mux.HandleFunc("/v1/vectors/index/", func(w http.ResponseWriter, r *http.Request) {
_, _ = io.WriteString(w, `{}`)
})
srv := httptest.NewServer(mux)
defer srv.Close()
rows := make([]Row, 1000)
for i := range rows {
rows[i] = Row{ID: fmt.Sprintf("r-%d", i), Text: "t"}
}
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
stats, err := Run(ctx, Config{
GatewayURL: srv.URL, IndexName: "cancel_me", Dimension: 4,
EmbedBatch: 1, EmbedWorkers: 1, HTTPClient: srv.Client(),
}, &staticSource{rows: rows})
// Either an error or a partial stats; the point is "didn't process all 1000."
if stats.Scanned >= 1000 {
t.Errorf("ctx cancel did not stop early: scanned=%d err=%v", stats.Scanned, err)
}
}

View File

@ -1,410 +0,0 @@
package distillation
// scorer.go — pure deterministic Success Scorer (port of Rust
// scripts/distillation/scorer.ts at e7636f2).
//
// Takes one EvidenceRecord, returns category + reasons + sub_scores.
// NO I/O, NO LLM, NO clock reads, NO mutable state. Identical input
// → identical output forever. Same contract as the Rust source —
// future scoring-rule changes bump ScorerVersion atomically with
// the logic.
//
// Three-class strategy mirrors the Rust source taxonomy
// (docs/recon/local-distillation-recon.md + data/_kb/evidence_health.md):
//
// CLASS A — verdict-bearing
// scrum_reviews, observer_reviews, audits, contract_analyses
// Direct scoring from existing markers / observer_verdict
//
// CLASS B — telemetry-rich
// auto_apply, outcomes, mode_experiments
// Markers exist but partial; needs_human_review fills the gap
//
// CLASS C — pure-extraction (no native scoring signal)
// distilled_*, audit_facts, observer_escalations
// Default needs_human_review; v2 will JOIN to parent verdict
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"strconv"
"strings"
)
// sourceClass categorizes an EvidenceRecord's source_file for the
// scorer's three-class dispatch.
type sourceClass string
const (
classVerdict sourceClass = "verdict"
classTelemetry sourceClass = "telemetry"
classExtraction sourceClass = "extraction"
)
// sourceClassFor maps a source_file (from provenance) to a class.
// Centralized so adding a new source is a one-line change. Mirrors
// the Rust switch on the stem (data/_kb/X.jsonl → X).
func sourceClassFor(sourceFile string) sourceClass {
stem := strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
switch stem {
case "scrum_reviews", "observer_reviews", "audits", "contract_analyses":
return classVerdict
case "auto_apply", "outcomes", "mode_experiments":
return classTelemetry
case "distilled_facts", "distilled_procedures", "distilled_config_hints",
"audit_facts", "observer_escalations":
return classExtraction
default:
// Unknown source → most conservative path (forces
// needs_human_review until a transform is added).
return classExtraction
}
}
// stemOf extracts the stable corpus identifier from a source_file.
// E.g. "data/_kb/scrum_reviews.jsonl" → "scrum_reviews".
func stemOf(sourceFile string) string {
return strings.TrimSuffix(strings.TrimPrefix(sourceFile, "data/_kb/"), ".jsonl")
}
// ScoreOutput is the scorer's return shape — category + reasons +
// the captured sub-signals. Reasons is always non-empty (validator
// requires it).
type ScoreOutput struct {
Category ScoreCategory
Reasons []string
SubScores *SubScores
}
// ScoreRecord dispatches an EvidenceRecord to the appropriate class
// scorer and returns the verdict + reasons + sub-scores. Pure
// function. Caller wraps the output in a ScoredRun via BuildScoredRun
// for the on-wire shape.
func ScoreRecord(rec EvidenceRecord) ScoreOutput {
cls := sourceClassFor(rec.Provenance.SourceFile)
stem := stemOf(rec.Provenance.SourceFile)
switch cls {
case classVerdict:
switch stem {
case "scrum_reviews":
return scoreScrumReview(rec)
case "observer_reviews":
return scoreObserverReview(rec)
case "audits":
return scoreAudit(rec)
case "contract_analyses":
return scoreContractAnalysis(rec)
}
case classTelemetry:
switch stem {
case "auto_apply":
return scoreAutoApply(rec)
case "outcomes":
return scoreOutcomes(rec)
case "mode_experiments":
return scoreModeExperiment(rec)
}
}
return scoreExtraction()
}
// BuildScoredRun composes a complete ScoredRun for persistence.
// Caller supplies recorded_at + the source file path/line offset.
// SigHash is computed deterministically from the EvidenceRecord
// JSON; ScoredRun traces to the materialized evidence row.
func BuildScoredRun(rec EvidenceRecord, sourceFile string, lineOffset int64, recordedAt string) (ScoredRun, error) {
out := ScoreRecord(rec)
sig, err := canonicalSha256(rec)
if err != nil {
return ScoredRun{}, fmt.Errorf("scoredrun sig hash: %w", err)
}
return ScoredRun{
SchemaVersion: ScoredRunSchemaVersion,
EvidenceRunID: rec.RunID,
EvidenceTaskID: rec.TaskID,
Category: out.Category,
Reasons: out.Reasons,
ScoredAt: recordedAt,
ScorerVersion: ScorerVersion,
SubScores: out.SubScores,
Provenance: Provenance{
SourceFile: sourceFile,
LineOffset: lineOffset,
SigHash: sig,
RecordedAt: recordedAt,
},
}, nil
}
// canonicalSha256 hashes a value's canonical JSON encoding. Used
// for ScoredRun.Provenance.SigHash. Matches the Rust pattern of
// "hash the structured object, not the raw source bytes" so
// re-materialization with same logic produces same hash.
func canonicalSha256(v any) (string, error) {
bs, err := json.Marshal(v)
if err != nil {
return "", err
}
sum := sha256.Sum256(bs)
return hex.EncodeToString(sum[:]), nil
}
// ─── Class A: verdict-bearing ────────────────────────────────────
func scoreScrumReview(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
successMarker := findPrefix(r.SuccessMarkers, "accepted_on_attempt_")
if successMarker == "" {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"scrum_review missing accepted_on_attempt_* success marker"},
SubScores: subs,
}
}
attemptStr := strings.TrimPrefix(successMarker, "accepted_on_attempt_")
attempt, err := strconv.Atoi(attemptStr)
if err != nil {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"scrum_review accepted_on_attempt_* marker has non-integer suffix: " + attemptStr},
SubScores: subs,
}
}
subs.AcceptedOnAttempt = &attempt
switch {
case attempt == 1:
return ScoreOutput{
Category: CategoryAccepted,
Reasons: []string{"scrum: accepted on first attempt"},
SubScores: subs,
}
case attempt <= 3:
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("scrum: accepted after %d attempts", attempt)},
SubScores: subs,
}
default:
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("scrum: accepted only after %d attempts (high-cost path)", attempt)},
SubScores: subs,
}
}
}
func scoreObserverReview(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
switch r.ObserverVerdict {
case VerdictAccept:
subs.ObserverVerdict = VerdictAccept
return ScoreOutput{
Category: CategoryAccepted,
Reasons: []string{"observer accepted the reviewed attempt"},
SubScores: subs,
}
case VerdictReject:
subs.ObserverVerdict = VerdictReject
return ScoreOutput{
Category: CategoryRejected,
Reasons: []string{"observer rejected the reviewed attempt"},
SubScores: subs,
}
case VerdictCycle:
subs.ObserverVerdict = VerdictCycle
return ScoreOutput{
Category: CategoryPartiallyAccepted,
Reasons: []string{"observer flagged the attempt as cycling — partial signal"},
SubScores: subs,
}
default:
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{fmt.Sprintf("observer_verdict missing or unrecognized: %q", r.ObserverVerdict)},
SubScores: subs,
}
}
}
func scoreAudit(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
succ := r.SuccessMarkers
fail := r.FailureMarkers
// Legacy markers (back-compat with pre-fix materializations).
if contains(succ, "approved") {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"audit overall=approved (legacy marker)"}, SubScores: subs}
}
if contains(fail, "blocked") {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"audit overall=block (legacy marker)"}, SubScores: subs}
}
if contains(fail, "request_changes") {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"audit overall=request_changes (legacy marker)"}, SubScores: subs}
}
// Severity-derived markers (Phase 2 transform).
sevSucc := findPrefix(succ, "audit_severity_")
sevFail := findPrefix(fail, "audit_severity_")
if sevSucc != "" {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{sevSucc + " → minor finding"}, SubScores: subs}
}
if sevFail == "audit_severity_medium" {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"audit_severity_medium → finding warrants review"}, SubScores: subs}
}
if sevFail == "audit_severity_high" || sevFail == "audit_severity_critical" {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{sevFail + " → blocking finding"}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"audit row has no severity or overall marker"}, SubScores: subs}
}
func scoreContractAnalysis(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
// failure_markers takes precedence: explicit rejection beats absent verdict.
if contains(r.FailureMarkers, "observer_rejected") || r.ObserverVerdict == VerdictReject {
subs.ObserverVerdict = VerdictReject
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"contract analysis: observer rejected"}, SubScores: subs}
}
switch r.ObserverVerdict {
case VerdictAccept:
subs.ObserverVerdict = VerdictAccept
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"contract analysis: observer accepted"}, SubScores: subs}
case VerdictCycle:
subs.ObserverVerdict = VerdictCycle
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{"contract analysis: observer cycled (partial)"}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"contract analysis: no observer verdict signal"}, SubScores: subs}
}
// ─── Class B: telemetry-rich ─────────────────────────────────────
func scoreAutoApply(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if contains(r.SuccessMarkers, "committed") {
t := true
subs.CargoGreen = &t
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)"},
SubScores: subs}
}
reverted := findContaining(r.FailureMarkers, "reverted")
if reverted != "" {
if strings.Contains(reverted, "build_red") {
f := false
subs.CargoGreen = &f
}
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"auto_apply: " + reverted}, SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"auto_apply: no commit + no revert (likely no_patches or dry_run)"},
SubScores: subs}
}
func scoreOutcomes(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if contains(r.SuccessMarkers, "all_events_ok") {
return ScoreOutput{Category: CategoryAccepted,
Reasons: []string{"outcomes: all events ok"}, SubScores: subs}
}
if gap := numericFromMap(r.ValidationResults, "gap_signals"); gap > 0 {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("outcomes: %d gap signal(s) detected", int(gap))},
SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"outcomes: no decisive marker — defer to human"},
SubScores: subs}
}
func scoreModeExperiment(r EvidenceRecord) ScoreOutput {
subs := &SubScores{}
if strings.TrimSpace(r.Text) == "" {
return ScoreOutput{Category: CategoryRejected,
Reasons: []string{"mode_experiment: empty response text"}, SubScores: subs}
}
if r.LatencyMs > 120_000 {
return ScoreOutput{Category: CategoryPartiallyAccepted,
Reasons: []string{fmt.Sprintf("mode_experiment: latency %dms exceeds 2-minute soft cap", r.LatencyMs)},
SubScores: subs}
}
return ScoreOutput{Category: CategoryNeedsHumanReview,
Reasons: []string{"mode_experiment: response present, latency within bounds; verdict not yet wired"},
SubScores: subs}
}
// ─── Class C: pure-extraction ────────────────────────────────────
func scoreExtraction() ScoreOutput {
return ScoreOutput{
Category: CategoryNeedsHumanReview,
Reasons: []string{"extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"},
SubScores: &SubScores{},
}
}
// ─── Internal helpers ────────────────────────────────────────────
func contains(slice []string, want string) bool {
for _, s := range slice {
if s == want {
return true
}
}
return false
}
func findPrefix(slice []string, prefix string) string {
for _, s := range slice {
if strings.HasPrefix(s, prefix) {
return s
}
}
return ""
}
func findContaining(slice []string, sub string) string {
for _, s := range slice {
if strings.Contains(s, sub) {
return s
}
}
return ""
}
func numericFromMap(m map[string]any, key string) float64 {
if m == nil {
return 0
}
v, ok := m[key]
if !ok {
return 0
}
switch n := v.(type) {
case int:
return float64(n)
case int64:
return float64(n)
case float32:
return float64(n)
case float64:
return n
case json.Number:
f, _ := n.Float64()
return f
}
return 0
}

View File

@ -1,375 +0,0 @@
package distillation
import (
"errors"
"strings"
"testing"
)
func mkRecord(sourceFile string) EvidenceRecord {
return EvidenceRecord{
RunID: "run-1",
TaskID: "task-1",
Timestamp: "2026-04-29T12:00:00Z",
SchemaVersion: EvidenceSchemaVersion,
Provenance: Provenance{
SourceFile: sourceFile,
SigHash: "deadbeef",
RecordedAt: "2026-04-29T12:00:01Z",
},
}
}
func TestSourceClassFor(t *testing.T) {
cases := []struct {
path string
want sourceClass
}{
{"data/_kb/scrum_reviews.jsonl", classVerdict},
{"data/_kb/observer_reviews.jsonl", classVerdict},
{"data/_kb/audits.jsonl", classVerdict},
{"data/_kb/contract_analyses.jsonl", classVerdict},
{"data/_kb/auto_apply.jsonl", classTelemetry},
{"data/_kb/outcomes.jsonl", classTelemetry},
{"data/_kb/mode_experiments.jsonl", classTelemetry},
{"data/_kb/distilled_facts.jsonl", classExtraction},
{"data/_kb/audit_facts.jsonl", classExtraction},
{"data/_kb/observer_escalations.jsonl", classExtraction},
{"data/_kb/wholly_unknown.jsonl", classExtraction}, // unknown → extraction (conservative)
}
for _, c := range cases {
got := sourceClassFor(c.path)
if got != c.want {
t.Errorf("sourceClassFor(%q): want %q, got %q", c.path, c.want, got)
}
}
}
func TestScoreScrumReview(t *testing.T) {
cases := []struct {
name string
successMarkers []string
wantCategory ScoreCategory
wantReasonSub string
}{
{
name: "first attempt → accepted",
successMarkers: []string{"accepted_on_attempt_1"},
wantCategory: CategoryAccepted,
wantReasonSub: "first attempt",
},
{
name: "second attempt → partial",
successMarkers: []string{"accepted_on_attempt_2"},
wantCategory: CategoryPartiallyAccepted,
wantReasonSub: "after 2 attempts",
},
{
name: "fourth attempt → partial (high-cost)",
successMarkers: []string{"accepted_on_attempt_4"},
wantCategory: CategoryPartiallyAccepted,
wantReasonSub: "high-cost",
},
{
name: "missing marker → needs_human_review",
successMarkers: []string{},
wantCategory: CategoryNeedsHumanReview,
wantReasonSub: "missing accepted_on_attempt",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
rec := mkRecord("data/_kb/scrum_reviews.jsonl")
rec.SuccessMarkers = c.successMarkers
out := ScoreRecord(rec)
if out.Category != c.wantCategory {
t.Errorf("category: want %q, got %q (reasons=%v)", c.wantCategory, out.Category, out.Reasons)
}
if !reasonsContain(out.Reasons, c.wantReasonSub) {
t.Errorf("reasons missing %q: %v", c.wantReasonSub, out.Reasons)
}
})
}
}
func TestScoreObserverReview(t *testing.T) {
cases := []struct {
verdict ObserverVerdict
want ScoreCategory
}{
{VerdictAccept, CategoryAccepted},
{VerdictReject, CategoryRejected},
{VerdictCycle, CategoryPartiallyAccepted},
{"", CategoryNeedsHumanReview},
{"weird-verdict", CategoryNeedsHumanReview},
}
for _, c := range cases {
rec := mkRecord("data/_kb/observer_reviews.jsonl")
rec.ObserverVerdict = c.verdict
out := ScoreRecord(rec)
if out.Category != c.want {
t.Errorf("verdict=%q: want %q, got %q", c.verdict, c.want, out.Category)
}
}
}
func TestScoreAudit_LegacyAndSeverityMarkers(t *testing.T) {
cases := []struct {
name string
succ []string
fail []string
want ScoreCategory
}{
{"legacy approved", []string{"approved"}, nil, CategoryAccepted},
{"legacy blocked", nil, []string{"blocked"}, CategoryRejected},
{"legacy request_changes", nil, []string{"request_changes"}, CategoryPartiallyAccepted},
{"severity_low → accepted", []string{"audit_severity_low"}, nil, CategoryAccepted},
{"severity_info → accepted", []string{"audit_severity_info"}, nil, CategoryAccepted},
{"severity_medium fail → partial", nil, []string{"audit_severity_medium"}, CategoryPartiallyAccepted},
{"severity_high → rejected", nil, []string{"audit_severity_high"}, CategoryRejected},
{"severity_critical → rejected", nil, []string{"audit_severity_critical"}, CategoryRejected},
{"no markers", nil, nil, CategoryNeedsHumanReview},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
rec := mkRecord("data/_kb/audits.jsonl")
rec.SuccessMarkers = c.succ
rec.FailureMarkers = c.fail
out := ScoreRecord(rec)
if out.Category != c.want {
t.Errorf("want %q, got %q (reasons=%v)", c.want, out.Category, out.Reasons)
}
})
}
}
func TestScoreAutoApply(t *testing.T) {
cases := []struct {
name string
succ []string
fail []string
want ScoreCategory
}{
{"committed → accepted", []string{"committed"}, nil, CategoryAccepted},
{"reverted_build_red → rejected", nil, []string{"reverted_build_red"}, CategoryRejected},
{"reverted other → rejected", nil, []string{"reverted_warning_count_up"}, CategoryRejected},
{"no signal → needs_human", nil, nil, CategoryNeedsHumanReview},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
rec := mkRecord("data/_kb/auto_apply.jsonl")
rec.SuccessMarkers = c.succ
rec.FailureMarkers = c.fail
out := ScoreRecord(rec)
if out.Category != c.want {
t.Errorf("want %q, got %q", c.want, out.Category)
}
})
}
}
func TestScoreOutcomes(t *testing.T) {
rec := mkRecord("data/_kb/outcomes.jsonl")
rec.SuccessMarkers = []string{"all_events_ok"}
if out := ScoreRecord(rec); out.Category != CategoryAccepted {
t.Errorf("all_events_ok: want accepted, got %q", out.Category)
}
rec2 := mkRecord("data/_kb/outcomes.jsonl")
rec2.ValidationResults = map[string]any{"gap_signals": float64(2)}
if out := ScoreRecord(rec2); out.Category != CategoryPartiallyAccepted {
t.Errorf("gap_signals=2: want partial, got %q (reasons=%v)", out.Category, out.Reasons)
}
rec3 := mkRecord("data/_kb/outcomes.jsonl")
if out := ScoreRecord(rec3); out.Category != CategoryNeedsHumanReview {
t.Errorf("no signal: want needs_human, got %q", out.Category)
}
}
func TestScoreModeExperiment(t *testing.T) {
rec := mkRecord("data/_kb/mode_experiments.jsonl")
rec.Text = ""
if out := ScoreRecord(rec); out.Category != CategoryRejected {
t.Errorf("empty text: want rejected, got %q", out.Category)
}
rec.Text = "real response"
rec.LatencyMs = 130_000
if out := ScoreRecord(rec); out.Category != CategoryPartiallyAccepted {
t.Errorf("over latency cap: want partial, got %q", out.Category)
}
rec.LatencyMs = 5000
if out := ScoreRecord(rec); out.Category != CategoryNeedsHumanReview {
t.Errorf("normal: want needs_human (verdict not yet wired), got %q", out.Category)
}
}
func TestScoreExtraction_Defaults(t *testing.T) {
for _, src := range []string{
"data/_kb/distilled_facts.jsonl",
"data/_kb/distilled_procedures.jsonl",
"data/_kb/audit_facts.jsonl",
"data/_kb/observer_escalations.jsonl",
} {
rec := mkRecord(src)
out := ScoreRecord(rec)
if out.Category != CategoryNeedsHumanReview {
t.Errorf("%s: want needs_human_review, got %q", src, out.Category)
}
}
}
// ─── Contamination firewall — the safety-critical guarantee ───────
func TestValidateSftSample_RejectsContaminationCategories(t *testing.T) {
for _, contaminated := range []SftQualityScore{
SftQualityScore("rejected"),
SftQualityScore("needs_human_review"),
} {
s := goodSftSample()
s.QualityScore = contaminated
err := ValidateSftSample(s)
if err == nil {
t.Errorf("contaminated quality_score=%q should fail validation", contaminated)
continue
}
if !errors.Is(err, ErrSftContamination) {
t.Errorf("contaminated %q: want errors.Is(err, ErrSftContamination), got %v", contaminated, err)
}
}
}
func TestValidateSftSample_AcceptsLegalCategories(t *testing.T) {
for _, legal := range []SftQualityScore{SftQualityAccepted, SftQualityPartiallyAccepted} {
s := goodSftSample()
s.QualityScore = legal
if err := ValidateSftSample(s); err != nil {
t.Errorf("legal quality_score=%q failed: %v", legal, err)
}
}
}
func TestValidateSftSample_RejectsTypoCategory(t *testing.T) {
s := goodSftSample()
s.QualityScore = "approved" // close to "accepted" but wrong
err := ValidateSftSample(s)
if err == nil {
t.Fatal("typo category should fail validation")
}
// Typo is NOT contamination — should be a regular ValidationError,
// not the firewall sentinel. This distinguishes "you typo'd" from
// "you broke the spec."
if errors.Is(err, ErrSftContamination) {
t.Error("typo should not surface as ErrSftContamination")
}
}
func TestValidateSftSample_RejectsEmptyPair(t *testing.T) {
s := goodSftSample()
s.Instruction = " "
if err := ValidateSftSample(s); err == nil {
t.Error("whitespace-only instruction should fail")
}
s2 := goodSftSample()
s2.Response = ""
if err := ValidateSftSample(s2); err == nil {
t.Error("empty response should fail")
}
}
func TestValidateScoredRun_ReasonsRequired(t *testing.T) {
r := ScoredRun{
SchemaVersion: ScoredRunSchemaVersion,
EvidenceRunID: "x",
EvidenceTaskID: "y",
Category: CategoryAccepted,
Reasons: nil, // empty — must fail
ScoredAt: "2026-04-29T12:00:00Z",
ScorerVersion: ScorerVersion,
Provenance: Provenance{
SourceFile: "data/_kb/scrum_reviews.jsonl",
SigHash: "abc",
RecordedAt: "2026-04-29T12:00:00Z",
},
}
err := ValidateScoredRun(r)
if err == nil {
t.Fatal("empty reasons should fail")
}
if !strings.Contains(err.Error(), "reasons") {
t.Errorf("error should mention reasons: %v", err)
}
}
func TestBuildScoredRun_DeterministicSigHash(t *testing.T) {
rec := mkRecord("data/_kb/scrum_reviews.jsonl")
rec.SuccessMarkers = []string{"accepted_on_attempt_1"}
r1, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
if err != nil {
t.Fatal(err)
}
r2, err := BuildScoredRun(rec, "data/scored-runs/2026/04/29/x.jsonl", 0, "2026-04-29T12:00:00Z")
if err != nil {
t.Fatal(err)
}
if r1.Provenance.SigHash != r2.Provenance.SigHash {
t.Errorf("identical EvidenceRecord should produce identical sig_hash: %s vs %s",
r1.Provenance.SigHash, r2.Provenance.SigHash)
}
if r1.Category != CategoryAccepted {
t.Errorf("scored category: %q", r1.Category)
}
if r1.ScorerVersion != ScorerVersion {
t.Errorf("scorer version stamped wrong: %q", r1.ScorerVersion)
}
}
func TestScoreRecord_PureFunction_NoMutationOfInput(t *testing.T) {
// Belt-and-braces: the contract says "NO mutable state, identical
// input → identical output forever." Verify by scoring the same
// record twice and ensuring the input hasn't been touched.
rec := mkRecord("data/_kb/scrum_reviews.jsonl")
rec.SuccessMarkers = []string{"accepted_on_attempt_2"}
original := rec
out1 := ScoreRecord(rec)
out2 := ScoreRecord(rec)
if rec.RunID != original.RunID || len(rec.SuccessMarkers) != 1 {
t.Error("ScoreRecord mutated its input")
}
if out1.Category != out2.Category {
t.Error("ScoreRecord is non-deterministic")
}
}
// ─── Helpers ─────────────────────────────────────────────────────
func goodSftSample() SftSample {
return SftSample{
SchemaVersion: SftSampleSchemaVersion,
ID: "sft-1",
Instruction: "summarize the diff",
Context: "diff body...",
Response: "the diff adds a function",
SourceRunID: "run-1",
QualityScore: SftQualityAccepted,
CreatedAt: "2026-04-29T12:00:00Z",
Provenance: Provenance{
SourceFile: "data/scored-runs/2026/04/29/x.jsonl",
SigHash: "deadbeef",
RecordedAt: "2026-04-29T12:00:01Z",
},
}
}
func reasonsContain(reasons []string, sub string) bool {
for _, r := range reasons {
if strings.Contains(r, sub) {
return true
}
}
return false
}

View File

@ -1,484 +0,0 @@
// Package distillation is the Go port of the Rust v1.0.0 distillation
// substrate (frozen at e7636f2). Per ADR-001 #4: port LOGIC, not
// bit-identical reproducibility.
//
// What this package owns (this commit):
// - The deterministic scorer: EvidenceRecord → ScoredRun
// - Score categories + scorer version constant
// - SftSample type + validator with the contamination firewall
// (the safety-critical piece — rejected/needs_human_review must
// NEVER ship to SFT)
//
// What's deferred to follow-up commits:
// - Materialization layer (file iteration, jsonl read/write,
// date-partitioned storage) — operational tooling on top of
// the scorer logic
// - export_preference, export_rag (other export shapes)
// - acceptance harness (the gate that locks v1.0.0)
// - replay, receipts, evidence-index builders
//
// The scorer + SftSample validator are the LOAD-BEARING pieces
// per project_distillation_substrate.md memory. The rest is plumbing
// that can land incrementally without changing the logic the
// downstream learning loop depends on.
package distillation
import (
"encoding/json"
"errors"
"fmt"
"strings"
"time"
)
// ScoreCategory is one of the 4 deterministic verdicts. Matches Rust
// SCORE_CATEGORIES exactly.
type ScoreCategory string
const (
CategoryAccepted ScoreCategory = "accepted"
CategoryPartiallyAccepted ScoreCategory = "partially_accepted"
CategoryRejected ScoreCategory = "rejected"
CategoryNeedsHumanReview ScoreCategory = "needs_human_review"
)
// AllScoreCategories lists every legal category — used by validators.
var AllScoreCategories = []ScoreCategory{
CategoryAccepted,
CategoryPartiallyAccepted,
CategoryRejected,
CategoryNeedsHumanReview,
}
// ScorerVersion is hardcoded — the deterministic-output contract
// requires this. Bump the literal in the same commit as any scoring-
// rule change so the version stamp moves atomically with logic.
// Mirrors the Rust SCORER_VERSION (also v1.0.0 at e7636f2).
const ScorerVersion = "v1.0.0"
// SftQualityScore enumerates the categories LEGAL in SFT exports.
// SFT_NEVER (defined below) is the inverse — categories that NEVER
// ship to SFT under any flag combination. The contamination firewall
// is enforced at the schema layer (ValidateSftSample) AND by the
// exporter; defense in depth.
type SftQualityScore string
const (
SftQualityAccepted SftQualityScore = "accepted"
SftQualityPartiallyAccepted SftQualityScore = "partially_accepted"
)
// SftQualityScores lists quality scores legal in SFT samples.
// Default is SftQualityAccepted only; --include-partial CLI flag
// expands to both. rejected and needs_human_review are NEVER legal.
var SftQualityScores = []SftQualityScore{
SftQualityAccepted,
SftQualityPartiallyAccepted,
}
// SftNever is the contamination firewall: ScoreCategories that NEVER
// ship to SFT under ANY caller flag. Enforced at the schema layer
// (ValidateSftSample) AND at the exporter layer. Per the Rust
// e7636f2 spec: "Hard non-negotiable: this set never expands. If you
// find yourself adding 'needs_human_review' or 'rejected' here, stop
// — that's the contamination the spec forbids."
//
// Exported so callers AND the validator share the same source of
// truth. Modifying this constant changes the contract; reviewers
// should treat any commit that touches it as a security review.
var SftNever = []ScoreCategory{
CategoryRejected,
CategoryNeedsHumanReview,
}
// SftSampleSchemaVersion bumps when the on-wire SftSample shape
// changes incompatibly. Match the Rust SFT_SAMPLE_SCHEMA_VERSION.
const SftSampleSchemaVersion = 1
// ScoredRunSchemaVersion bumps when the on-wire ScoredRun shape
// changes incompatibly. Match the Rust SCORED_RUN_SCHEMA_VERSION.
const ScoredRunSchemaVersion = 1
// EvidenceSchemaVersion mirrors the Rust EVIDENCE_SCHEMA_VERSION.
// This package consumes EvidenceRecord; producing it is a separate
// concern (the materialization layer not yet ported).
const EvidenceSchemaVersion = 1
// ModelRole categorizes the kind of model output represented by an
// EvidenceRecord. Used by the SFT exporter to filter "real model
// output" from pure-extraction rows.
type ModelRole string
const (
RoleExecutor ModelRole = "executor"
RoleReviewer ModelRole = "reviewer"
RoleExtractor ModelRole = "extractor"
RoleVerifier ModelRole = "verifier"
RoleCategorizer ModelRole = "categorizer"
RoleTiebreaker ModelRole = "tiebreaker"
RoleApplier ModelRole = "applier"
RoleEmbedder ModelRole = "embedder"
RoleOther ModelRole = "other"
)
// Provenance is the source-linkage every distillation record carries.
// SourceFile is required (no record without source linkage); other
// fields are best-effort for de-duplication and trace-back.
type Provenance struct {
SourceFile string `json:"source_file"`
LineOffset int64 `json:"line_offset,omitempty"`
SigHash string `json:"sig_hash"`
RecordedAt string `json:"recorded_at"` // ISO 8601
}
// ObserverVerdict is what an observer returned for an executor's
// output. Matches the Rust enum but as a string type for JSON
// flexibility.
type ObserverVerdict string
const (
VerdictAccept ObserverVerdict = "accept"
VerdictReject ObserverVerdict = "reject"
VerdictCycle ObserverVerdict = "cycle"
)
// EvidenceRecord is one row in the canonical evidence stream.
// Producing it (transforms from raw KB streams) is a separate
// concern; this package consumes it.
//
// Fields mirror the Rust EvidenceRecord at e7636f2. Optional fields
// use Go pointers / slices so missing-vs-empty stays distinguishable
// for the scorer's heuristics.
type EvidenceRecord struct {
RunID string `json:"run_id"`
TaskID string `json:"task_id"`
Timestamp string `json:"timestamp"`
SchemaVersion int `json:"schema_version"`
Provenance Provenance `json:"provenance"`
ModelName string `json:"model_name,omitempty"`
ModelProvider string `json:"model_provider,omitempty"`
ModelRole ModelRole `json:"model_role,omitempty"`
InputHash string `json:"input_hash,omitempty"`
OutputHash string `json:"output_hash,omitempty"`
SourceFiles []string `json:"source_files,omitempty"`
CommandsRun []string `json:"commands_run,omitempty"`
RetrievedContext *RetrievedContext `json:"retrieved_context,omitempty"`
ObserverNotes []string `json:"observer_notes,omitempty"`
ObserverVerdict ObserverVerdict `json:"observer_verdict,omitempty"`
ObserverConfidence float64 `json:"observer_confidence,omitempty"`
ScratchpadSummary string `json:"scratchpad_summary,omitempty"`
SuccessMarkers []string `json:"success_markers,omitempty"`
FailureMarkers []string `json:"failure_markers,omitempty"`
ValidationResults map[string]any `json:"validation_results,omitempty"`
HumanOverride *HumanOverride `json:"human_override,omitempty"`
CostUSD float64 `json:"cost_usd,omitempty"`
LatencyMs int64 `json:"latency_ms,omitempty"`
Text string `json:"text,omitempty"`
}
// RetrievedContext captures what the model saw via retrieval. Matches
// the Rust shape exactly so the JSON round-trips byte-identical (per
// ADR-001 #4 "logic, not bit-identical" — but on-wire compatibility
// is desirable for tooling that consumes EvidenceRecord JSONL).
type RetrievedContext struct {
MatrixCorpora []string `json:"matrix_corpora,omitempty"`
MatrixHits int `json:"matrix_hits,omitempty"`
MatrixChunksKept int `json:"matrix_chunks_kept,omitempty"`
MatrixChunksDropped int `json:"matrix_chunks_dropped,omitempty"`
PathwayFingerprintsSeen int `json:"pathway_fingerprints_seen,omitempty"`
}
// HumanOverride captures a human-in-the-loop decision overriding the
// scorer's verdict. Recorded but doesn't change the scorer's output;
// downstream consumers (UI, distillation acceptance) decide how to
// treat it.
type HumanOverride struct {
Overrider string `json:"overrider"`
Decision string `json:"decision"` // accept|reject|needs_review
Reason string `json:"reason"`
OverriddenAt string `json:"overridden_at"`
}
// SubScores carries the deterministic scorer's intermediate signals
// alongside the final ScoreCategory. Persisted on every ScoredRun
// so a downstream UI can show "why" without re-running the scorer.
type SubScores struct {
CargoGreen *bool `json:"cargo_green,omitempty"`
AnchorGrounding *float64 `json:"anchor_grounding,omitempty"`
SchemaValid *bool `json:"schema_valid,omitempty"`
PathwayReplaySucceeded *bool `json:"pathway_replay_succeeded,omitempty"`
ObserverVerdict ObserverVerdict `json:"observer_verdict,omitempty"`
AcceptedOnAttempt *int `json:"accepted_on_attempt,omitempty"`
// Extra fields the Rust schema accepted as `[key: string]: unknown`.
// Captured here as a free-form map so future signals don't require
// type-system changes.
Extras map[string]any `json:"-"`
}
// ScoredRun is the deterministic scorer's output. One per
// EvidenceRecord. Provenance ties back to the materialized evidence
// row (not the raw source stream).
type ScoredRun struct {
SchemaVersion int `json:"schema_version"`
EvidenceRunID string `json:"evidence_run_id"`
EvidenceTaskID string `json:"evidence_task_id"`
Category ScoreCategory `json:"category"`
Reasons []string `json:"reasons"` // non-empty
ScoredAt string `json:"scored_at"`
ScorerVersion string `json:"scorer_version"`
SubScores *SubScores `json:"sub_scores,omitempty"`
Provenance Provenance `json:"provenance"`
}
// SftSample is one entry in exports/sft/instruction_response.jsonl.
// The contamination firewall lives in ValidateSftSample.
type SftSample struct {
SchemaVersion int `json:"schema_version"`
ID string `json:"id"`
Instruction string `json:"instruction"`
Context string `json:"context"` // empty allowed; null/missing not
Response string `json:"response"`
SourceRunID string `json:"source_run_id"`
QualityScore SftQualityScore `json:"quality_score"`
CreatedAt string `json:"created_at"`
Provenance Provenance `json:"provenance"`
}
// ─── Validators ──────────────────────────────────────────────────
// ValidationError is a single field-level violation.
type ValidationError struct {
Field string
Message string
}
func (e ValidationError) Error() string {
return fmt.Sprintf("%s: %s", e.Field, e.Message)
}
// ValidationErrors is the joinable error returned by the validators
// when one or more fields violate the schema.
type ValidationErrors []ValidationError
func (es ValidationErrors) Error() string {
if len(es) == 0 {
return "no errors"
}
parts := make([]string, len(es))
for i, e := range es {
parts[i] = e.Error()
}
return strings.Join(parts, "; ")
}
// HasErrors returns true when one or more errors are present.
func (es ValidationErrors) HasErrors() bool { return len(es) > 0 }
// ValidateScoredRun mirrors the Rust validateScoredRun. Returns nil
// on success or a ValidationErrors with the field-level violations.
func ValidateScoredRun(r ScoredRun) error {
var errs ValidationErrors
if r.SchemaVersion != ScoredRunSchemaVersion {
errs = append(errs, ValidationError{
"schema_version",
fmt.Sprintf("expected %d, got %d", ScoredRunSchemaVersion, r.SchemaVersion),
})
}
if r.EvidenceRunID == "" {
errs = append(errs, ValidationError{"evidence_run_id", "must be non-empty"})
}
if r.EvidenceTaskID == "" {
errs = append(errs, ValidationError{"evidence_task_id", "must be non-empty"})
}
if !validISOTimestamp(r.ScoredAt) {
errs = append(errs, ValidationError{"scored_at", "must be ISO 8601 timestamp"})
}
if r.ScorerVersion == "" {
errs = append(errs, ValidationError{"scorer_version", "must be non-empty"})
}
if len(r.Reasons) == 0 {
errs = append(errs, ValidationError{"reasons", "must be non-empty (every score needs a reason)"})
}
if !isValidCategory(r.Category) {
errs = append(errs, ValidationError{"category", fmt.Sprintf("must be one of %v, got %q", AllScoreCategories, r.Category)})
}
if err := validateProvenance(r.Provenance, "provenance"); err != nil {
errs = append(errs, err...)
}
if r.SubScores != nil && r.SubScores.AnchorGrounding != nil {
ag := *r.SubScores.AnchorGrounding
if ag < 0 || ag > 1 {
errs = append(errs, ValidationError{"sub_scores.anchor_grounding", "must be in [0, 1]"})
}
}
if errs.HasErrors() {
return errs
}
return nil
}
// ValidateSftSample is the contamination firewall. Returns ErrSftContamination
// (wrapped) when quality_score is in SftNever — which is the safety-critical
// guarantee the spec calls non-negotiable.
//
// Other field violations come back as ValidationErrors.
func ValidateSftSample(s SftSample) error {
var errs ValidationErrors
if s.SchemaVersion != SftSampleSchemaVersion {
errs = append(errs, ValidationError{
"schema_version",
fmt.Sprintf("expected %d, got %d", SftSampleSchemaVersion, s.SchemaVersion),
})
}
if s.ID == "" {
errs = append(errs, ValidationError{"id", "must be non-empty"})
}
if strings.TrimSpace(s.Instruction) == "" {
errs = append(errs, ValidationError{"instruction", "must be non-whitespace (no empty pairs)"})
}
if strings.TrimSpace(s.Response) == "" {
errs = append(errs, ValidationError{"response", "must be non-whitespace (no empty pairs)"})
}
// Context is required-string but empty is allowed.
// (Field is always typed as string in Go, so the only way to
// distinguish "set" from "missing" was via the JSON layer; here
// empty is fine.)
if s.SourceRunID == "" {
errs = append(errs, ValidationError{"source_run_id", "must be non-empty"})
}
if !validISOTimestamp(s.CreatedAt) {
errs = append(errs, ValidationError{"created_at", "must be ISO 8601 timestamp"})
}
if err := validateProvenance(s.Provenance, "provenance"); err != nil {
errs = append(errs, err...)
}
// Contamination firewall. Hard non-negotiable per the spec.
if !isLegalSftQualityScore(s.QualityScore) {
// If it's in SftNever, surface the firewall sentinel — callers
// can errors.Is(err, ErrSftContamination) to reliably detect
// "the spec said never" as opposed to "you typo'd a category."
if isContaminationCategory(s.QualityScore) {
return fmt.Errorf("%w: quality_score %q in SftNever (rejected/needs_human_review never legal in SFT)",
ErrSftContamination, s.QualityScore)
}
errs = append(errs, ValidationError{
"quality_score",
fmt.Sprintf("must be one of %v, got %q", SftQualityScores, s.QualityScore),
})
}
if errs.HasErrors() {
return errs
}
return nil
}
// ErrSftContamination is the firewall sentinel — when ValidateSftSample
// rejects a sample because its quality_score is in SftNever, callers
// can errors.Is(err, ErrSftContamination) to reliably distinguish
// "spec violation" from "typo'd category."
var ErrSftContamination = errors.New("distillation: SFT contamination — quality_score in SftNever")
// ─── Internal helpers ────────────────────────────────────────────
func isValidCategory(c ScoreCategory) bool {
for _, v := range AllScoreCategories {
if c == v {
return true
}
}
return false
}
func isLegalSftQualityScore(q SftQualityScore) bool {
for _, v := range SftQualityScores {
if q == v {
return true
}
}
return false
}
func isContaminationCategory(q SftQualityScore) bool {
// Compare as ScoreCategory — the on-wire string is the same; this
// just guards the firewall against typos that happen to match
// SftNever string-wise.
for _, v := range SftNever {
if string(v) == string(q) {
return true
}
}
return false
}
func validISOTimestamp(s string) bool {
if s == "" {
return false
}
// time.Parse with RFC3339 covers most ISO 8601. We accept both
// the basic and nano variants since the Rust producers vary.
if _, err := time.Parse(time.RFC3339, s); err == nil {
return true
}
if _, err := time.Parse(time.RFC3339Nano, s); err == nil {
return true
}
return false
}
func validateProvenance(p Provenance, field string) ValidationErrors {
var errs ValidationErrors
if p.SourceFile == "" {
errs = append(errs, ValidationError{field + ".source_file", "must be non-empty"})
}
if p.SigHash == "" {
errs = append(errs, ValidationError{field + ".sig_hash", "must be non-empty"})
}
if !validISOTimestamp(p.RecordedAt) {
errs = append(errs, ValidationError{field + ".recorded_at", "must be ISO 8601 timestamp"})
}
return errs
}
// MarshalSubScores is a shim — Go's encoding/json doesn't merge a
// "rest" map into the struct's JSON output by default. Callers that
// need Extras serialized into the same object can use this helper.
func MarshalSubScores(s *SubScores) ([]byte, error) {
if s == nil {
return []byte("null"), nil
}
// First marshal the typed fields normally.
type alias SubScores
base, err := json.Marshal((*alias)(s))
if err != nil {
return nil, err
}
if len(s.Extras) == 0 {
return base, nil
}
// Decode back to a map, merge Extras, re-encode. Less efficient
// but keeps the field semantics correct (typed fields override
// extras on collision — first-write-wins for known keys).
var combined map[string]any
if err := json.Unmarshal(base, &combined); err != nil {
return nil, err
}
for k, v := range s.Extras {
if _, exists := combined[k]; !exists {
combined[k] = v
}
}
return json.Marshal(combined)
}

View File

@ -1,151 +0,0 @@
// Package drift quantifies when historical decisions stop matching
// current reality. Per the PRD's 5-loop substrate, this is loop 5
// (drift) — distinct from the rating+distillation loop because
// drift is about MEASUREMENT, not learning. The learning loop says
// "this match worked, remember it"; the drift loop says "the
// playbook entry from 4 months ago — does it still match what the
// substrate would surface today?"
//
// First-shipped drift shape: SCORER drift. When the deterministic
// scorer's logic changes (ScorerVersion bumped), historical
// ScoredRuns may no longer match what the current scorer would
// produce on the same EvidenceRecord. ComputeScorerDrift re-runs
// the current scorer over a slice of (EvidenceRecord, persisted
// category) pairs and reports mismatches.
//
// Why this matters: the rating+distillation loop only learns
// forward. Without a drift quantifier, a scorer-rule change
// silently invalidates the historical training data feeding the
// loop. With drift quantification, a rule change surfaces a
// concrete number ("847 of 4701 historical scoredruns now
// disagree") that triggers a re-score-and-retrain cycle rather
// than letting the substrate quietly rot.
//
// Future drift shapes (not in this commit):
// - PLAYBOOK drift: for each playbook entry, re-run its query
// through current matrix-search; if the recorded answer is no
// longer in top-K, the world has moved.
// - EMBEDDING drift: KS-test on the distribution of embedding
// vectors at T1 vs T2; large shifts = the corpus has changed
// materially.
// - AUDIT BASELINE drift: track how PR audit verdicts shift over
// scorer/auditor versions; matches the Rust audit_baselines.jsonl
// longitudinal signal.
package drift
import (
"sort"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
)
// ScorerDriftEntry is one mismatch — a historical (record, category)
// pair where the current scorer disagrees with the persisted
// verdict. Reasons captures the current scorer's explanation so
// operators can see WHY the verdict changed.
type ScorerDriftEntry struct {
EvidenceRunID string `json:"evidence_run_id"`
EvidenceTaskID string `json:"evidence_task_id"`
PersistedCategory distillation.ScoreCategory `json:"persisted_category"`
CurrentCategory distillation.ScoreCategory `json:"current_category"`
CurrentReasons []string `json:"current_reasons"`
SourceFile string `json:"source_file"`
}
// CategoryShift is one cell in the drift matrix — "X persisted
// records that NOW classify as Y." e.g. "12 records that were
// 'rejected' yesterday are 'partially_accepted' today."
type CategoryShift struct {
From distillation.ScoreCategory `json:"from"`
To distillation.ScoreCategory `json:"to"`
Count int `json:"count"`
}
// ScorerDriftReport is the summary returned by ComputeScorerDrift.
// The shape is intentionally machine-readable so a downstream
// dashboard / alerting layer can threshold on Drifted / TotalChecked
// without parsing the entries list.
type ScorerDriftReport struct {
ScorerVersion string `json:"scorer_version"` // current scorer's version
TotalChecked int `json:"total_checked"`
Matched int `json:"matched"` // current == persisted
Drifted int `json:"drifted"` // current != persisted
DriftRate float64 `json:"drift_rate"` // Drifted / TotalChecked
ShiftMatrix []CategoryShift `json:"shift_matrix,omitempty"`
Entries []ScorerDriftEntry `json:"entries,omitempty"` // mismatches only
}
// ScorerDriftInput is one (record, persisted_category) pair to check.
// Caller is responsible for materializing these from disk; this
// package is pure compute.
type ScorerDriftInput struct {
Record distillation.EvidenceRecord
PersistedCategory distillation.ScoreCategory
}
// ComputeScorerDrift re-runs distillation.ScoreRecord over each
// input and reports mismatches. Pure function — no I/O. The caller
// supplies the inputs (typically by reading a directory of
// scored-runs JSONL alongside the corresponding evidence JSONL).
//
// IncludeEntries controls whether the per-mismatch detail list is
// populated. For large corpora (e.g. 4,701 fill events) the
// summary numbers may be all the caller needs; setting this to
// false avoids allocating the entries slice.
func ComputeScorerDrift(inputs []ScorerDriftInput, includeEntries bool) ScorerDriftReport {
report := ScorerDriftReport{
ScorerVersion: distillation.ScorerVersion,
TotalChecked: len(inputs),
}
shiftCounts := make(map[[2]distillation.ScoreCategory]int)
for _, in := range inputs {
out := distillation.ScoreRecord(in.Record)
if out.Category == in.PersistedCategory {
report.Matched++
continue
}
report.Drifted++
shiftCounts[[2]distillation.ScoreCategory{in.PersistedCategory, out.Category}]++
if includeEntries {
report.Entries = append(report.Entries, ScorerDriftEntry{
EvidenceRunID: in.Record.RunID,
EvidenceTaskID: in.Record.TaskID,
PersistedCategory: in.PersistedCategory,
CurrentCategory: out.Category,
CurrentReasons: out.Reasons,
SourceFile: in.Record.Provenance.SourceFile,
})
}
}
if report.TotalChecked > 0 {
report.DriftRate = float64(report.Drifted) / float64(report.TotalChecked)
}
if len(shiftCounts) > 0 {
report.ShiftMatrix = make([]CategoryShift, 0, len(shiftCounts))
for k, v := range shiftCounts {
report.ShiftMatrix = append(report.ShiftMatrix, CategoryShift{
From: k[0], To: k[1], Count: v,
})
}
// Sort: largest shifts first, then alphabetical-ish for ties.
// Stable ordering matters for downstream display and JSON
// determinism in tests.
sort.Slice(report.ShiftMatrix, func(i, j int) bool {
a, b := report.ShiftMatrix[i], report.ShiftMatrix[j]
if a.Count != b.Count {
return a.Count > b.Count
}
if a.From != b.From {
return string(a.From) < string(b.From)
}
return string(a.To) < string(b.To)
})
}
return report
}

View File

@ -1,155 +0,0 @@
package drift
import (
"testing"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
)
func mkInput(sourceFile string, persisted distillation.ScoreCategory, succ []string) ScorerDriftInput {
return ScorerDriftInput{
Record: distillation.EvidenceRecord{
RunID: "run-x",
TaskID: "task-x",
Timestamp: "2026-01-01T00:00:00Z",
SchemaVersion: distillation.EvidenceSchemaVersion,
Provenance: distillation.Provenance{
SourceFile: sourceFile,
SigHash: "abc",
RecordedAt: "2026-01-01T00:00:01Z",
},
SuccessMarkers: succ,
},
PersistedCategory: persisted,
}
}
func TestComputeScorerDrift_NoDrift(t *testing.T) {
// All inputs have persisted=accepted matching what the current
// scrum_review scorer produces on accepted_on_attempt_1.
inputs := []ScorerDriftInput{
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
}
r := ComputeScorerDrift(inputs, true)
if r.TotalChecked != 3 || r.Matched != 3 || r.Drifted != 0 {
t.Errorf("no-drift case: total=%d matched=%d drifted=%d",
r.TotalChecked, r.Matched, r.Drifted)
}
if r.DriftRate != 0 {
t.Errorf("drift_rate: want 0, got %v", r.DriftRate)
}
if len(r.Entries) != 0 {
t.Errorf("entries: want 0, got %d", len(r.Entries))
}
}
func TestComputeScorerDrift_ShiftDetected(t *testing.T) {
// Simulate a historical labeling where the persisted scorer
// thought attempt-2 acceptances were "accepted" but the current
// scorer (this code) categorizes them as "partially_accepted".
// Drift should fire on those.
inputs := []ScorerDriftInput{
// Match: attempt 1 → accepted (still)
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_1"}),
// Drift: persisted thought attempt-2 was accepted, today's scorer says partial
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_3"}),
// Drift: persisted thought attempt-5 was accepted, today's scorer says partial (high-cost)
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_5"}),
}
r := ComputeScorerDrift(inputs, true)
if r.TotalChecked != 5 {
t.Errorf("total: want 5, got %d", r.TotalChecked)
}
if r.Matched != 2 {
t.Errorf("matched: want 2, got %d", r.Matched)
}
if r.Drifted != 3 {
t.Errorf("drifted: want 3, got %d", r.Drifted)
}
wantRate := 3.0 / 5.0
if r.DriftRate < wantRate-1e-9 || r.DriftRate > wantRate+1e-9 {
t.Errorf("drift_rate: want %v, got %v", wantRate, r.DriftRate)
}
if len(r.Entries) != 3 {
t.Errorf("entries: want 3 mismatches, got %d", len(r.Entries))
}
// Shift matrix should show one shift: accepted → partially_accepted, count=3
if len(r.ShiftMatrix) != 1 {
t.Errorf("shift matrix: want 1 shift, got %d (%+v)", len(r.ShiftMatrix), r.ShiftMatrix)
} else {
s := r.ShiftMatrix[0]
if s.From != distillation.CategoryAccepted ||
s.To != distillation.CategoryPartiallyAccepted ||
s.Count != 3 {
t.Errorf("shift: got %+v", s)
}
}
}
func TestComputeScorerDrift_MultipleShiftsSortedByCount(t *testing.T) {
inputs := []ScorerDriftInput{
// 3× accepted→partial
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
// 1× rejected→needs_human (no marker)
{
Record: distillation.EvidenceRecord{
RunID: "r1", TaskID: "t1",
Timestamp: "2026-01-01T00:00:00Z",
SchemaVersion: distillation.EvidenceSchemaVersion,
Provenance: distillation.Provenance{
SourceFile: "data/_kb/scrum_reviews.jsonl",
SigHash: "x", RecordedAt: "2026-01-01T00:00:01Z",
},
// no markers → needs_human_review
},
PersistedCategory: distillation.CategoryRejected,
},
}
r := ComputeScorerDrift(inputs, false)
if r.Drifted != 4 {
t.Errorf("drifted: want 4, got %d", r.Drifted)
}
if len(r.ShiftMatrix) != 2 {
t.Errorf("shift matrix: want 2 distinct shifts, got %d", len(r.ShiftMatrix))
}
// Sorted by count desc, so accepted→partial (3) before rejected→needs_human (1)
if r.ShiftMatrix[0].Count != 3 || r.ShiftMatrix[1].Count != 1 {
t.Errorf("shift order wrong: got %+v", r.ShiftMatrix)
}
}
func TestComputeScorerDrift_IncludeEntriesFalse(t *testing.T) {
inputs := []ScorerDriftInput{
mkInput("data/_kb/scrum_reviews.jsonl", distillation.CategoryAccepted, []string{"accepted_on_attempt_2"}),
}
r := ComputeScorerDrift(inputs, false)
if r.Drifted != 1 {
t.Errorf("drifted: want 1, got %d", r.Drifted)
}
if len(r.Entries) != 0 {
t.Errorf("entries: want 0 when includeEntries=false, got %d", len(r.Entries))
}
}
func TestComputeScorerDrift_EmptyInput(t *testing.T) {
r := ComputeScorerDrift(nil, true)
if r.TotalChecked != 0 || r.Drifted != 0 || r.Matched != 0 {
t.Errorf("empty: want all-zero, got %+v", r)
}
if r.DriftRate != 0 {
t.Errorf("drift_rate on empty: want 0, got %v", r.DriftRate)
}
}
func TestComputeScorerDrift_ScorerVersionStamped(t *testing.T) {
r := ComputeScorerDrift(nil, false)
if r.ScorerVersion != distillation.ScorerVersion {
t.Errorf("scorer_version: want %q, got %q", distillation.ScorerVersion, r.ScorerVersion)
}
}

View File

@ -1,137 +0,0 @@
package matrix
// Strong-model auto-downgrade gate. Port of mode.rs::execute's
// downgrade block (Rust system, 2026-04-26 pass5).
//
// What it does: if the caller resolves `codereview_lakehouse` against
// a strong model and didn't force the mode, flip to
// `codereview_isolation` so we don't pollute the prompt with matrix
// chunks the model demonstrably does better without.
//
// Why: pass5 variance test on x-ai/grok-4.1-fast — composing matrix
// corpora into codereview_lakehouse LOST 5/5 head-to-head reps
// against matrix-free codereview_isolation, p=0.031. Strong models
// have enough native capacity that bug fingerprints + adversarial
// framing + file content carry them; matrix chunks displace
// depth-of-analysis.
//
// Defaults: assume "strong" (downgrade matrix off). The explicit
// IsWeakModel predicate keeps the weak-list small — anything
// matching `:free` (OpenRouter free tier) or the local last-resort
// rungs (qwen3.5/qwen3) stays on the full lakehouse path where
// matrix demonstrably helped during the 2026-04-26 free-tier
// bake-off.
import (
"os"
"strings"
)
// Mode constants — exported so callers don't string-literal them.
const (
ModeCodeReviewLakehouse = "codereview_lakehouse"
ModeCodeReviewIsolation = "codereview_isolation"
)
// EnvForceFullEnrichment is the env var that bypasses the gate for
// diagnostic runs ("LH_FORCE_FULL_ENRICHMENT=1" or "true").
const EnvForceFullEnrichment = "LH_FORCE_FULL_ENRICHMENT"
// IsWeakModel returns true for models matrix-corpus composition
// demonstrably helped during the 2026-04-26 pass5 bake-off. Strong
// models (default) get matrix dropped to avoid the "composed lost
// 5/5 vs isolation" effect.
//
// Weak signals:
// - `:free` suffix (OpenRouter free tier, e.g. `gpt-oss-120b:free`)
// - `:free/` infix (handles routing-prefixed names like `or:free/x`)
// - `qwen3.5:latest` / `qwen3:latest` — local last-resort rung
//
// Add new weak models by extending this function alongside variance
// data that justifies it.
func IsWeakModel(model string) bool {
if strings.HasSuffix(model, ":free") || strings.Contains(model, ":free/") {
return true
}
switch model {
case "qwen3.5:latest", "qwen3:latest":
return true
}
return false
}
// DowngradeInput is what MaybeDowngrade evaluates.
//
// ForcedMode: caller explicitly set their mode (mirrors Rust's
// req.force_mode.is_some()) — treated as opt-in to the chosen mode,
// skips the downgrade. Experiments need exact-mode control.
//
// ForceFullOverride: the LH_FORCE_FULL_ENRICHMENT escape hatch —
// usually populated from the env var via NewDowngradeInputFromEnv,
// but the field is explicit so callers can pass it from a config or
// test deterministically.
type DowngradeInput struct {
Mode string
Model string
ForcedMode bool
ForceFullOverride bool
}
// DowngradeDecision is the output. DowngradedFrom is non-empty
// only when a downgrade fired — callers should record it for audit
// (matches the Rust EnrichmentSources.downgraded_from field).
//
// Reason is a short human-readable string for logs/responses;
// useful for debugging "why did/didn't the gate fire."
type DowngradeDecision struct {
Mode string `json:"mode"`
DowngradedFrom string `json:"downgraded_from,omitempty"`
Reason string `json:"reason"`
}
// MaybeDowngrade applies the strong-model auto-downgrade gate.
// Pure function; no env reads. For env-driven callers see
// NewDowngradeInputFromEnv.
func MaybeDowngrade(in DowngradeInput) DowngradeDecision {
out := DowngradeDecision{Mode: in.Mode}
if in.Mode != ModeCodeReviewLakehouse {
out.Reason = "mode is not " + ModeCodeReviewLakehouse + "; gate not applicable"
return out
}
if in.ForcedMode {
out.Reason = "caller forced mode; skip downgrade"
return out
}
if in.ForceFullOverride {
out.Reason = EnvForceFullEnrichment + " bypass"
return out
}
if IsWeakModel(in.Model) {
out.Reason = "weak model; matrix composition demonstrably helped (2026-04-26 free-tier bake-off)"
return out
}
// Downgrade fires.
out.Mode = ModeCodeReviewIsolation
out.DowngradedFrom = ModeCodeReviewLakehouse
out.Reason = "strong model; matrix composes anti-additively (pass5: composed lost 5/5 vs isolation on grok-4.1-fast, p=0.031)"
return out
}
// NewDowngradeInputFromEnv is a convenience that reads
// LH_FORCE_FULL_ENRICHMENT from the process environment and returns
// a populated DowngradeInput. Most production callers want this;
// tests should construct DowngradeInput directly to avoid env
// pollution.
func NewDowngradeInputFromEnv(mode, model string, forcedMode bool) DowngradeInput {
return DowngradeInput{
Mode: mode,
Model: model,
ForcedMode: forcedMode,
ForceFullOverride: envForceFullEnrichment(),
}
}
func envForceFullEnrichment() bool {
v := strings.ToLower(strings.TrimSpace(os.Getenv(EnvForceFullEnrichment)))
return v == "1" || v == "true"
}

View File

@ -1,100 +0,0 @@
package matrix
import "testing"
func TestIsWeakModel(t *testing.T) {
cases := []struct {
model string
weak bool
}{
// :free suffix → weak
{"openai/gpt-4o:free", true},
{"meta-llama/llama-3-8b:free", true},
// :free/ infix (routing-prefixed names)
{"openrouter:free/anthropic/claude-3.5-sonnet", true},
// Local last-resort rungs
{"qwen3.5:latest", true},
{"qwen3:latest", true},
// Strong by default
{"x-ai/grok-4.1-fast", false},
{"opencode/claude-opus-4-7", false},
{"openai/gpt-5", false},
{"qwen3-coder:480b", false}, // not the :latest tag
{"", false},
}
for _, c := range cases {
got := IsWeakModel(c.model)
if got != c.weak {
t.Errorf("IsWeakModel(%q): want %v, got %v", c.model, c.weak, got)
}
}
}
func TestMaybeDowngrade_TruthTable(t *testing.T) {
cases := []struct {
name string
in DowngradeInput
want DowngradeDecision
}{
{
name: "downgrade fires: lakehouse mode + strong model + no force",
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast"},
want: DowngradeDecision{
Mode: ModeCodeReviewIsolation,
DowngradedFrom: ModeCodeReviewLakehouse,
},
},
{
name: "no downgrade: forced mode bypasses gate",
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForcedMode: true},
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
},
{
name: "no downgrade: env override bypasses gate",
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "x-ai/grok-4.1-fast", ForceFullOverride: true},
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
},
{
name: "no downgrade: weak model keeps lakehouse",
in: DowngradeInput{Mode: ModeCodeReviewLakehouse, Model: "openai/gpt-4o:free"},
want: DowngradeDecision{Mode: ModeCodeReviewLakehouse},
},
{
name: "no downgrade: non-lakehouse mode (gate not applicable)",
in: DowngradeInput{Mode: "codereview_isolation", Model: "x-ai/grok-4.1-fast"},
want: DowngradeDecision{Mode: "codereview_isolation"},
},
}
for _, c := range cases {
got := MaybeDowngrade(c.in)
if got.Mode != c.want.Mode {
t.Errorf("%s: Mode want %q, got %q", c.name, c.want.Mode, got.Mode)
}
if got.DowngradedFrom != c.want.DowngradedFrom {
t.Errorf("%s: DowngradedFrom want %q, got %q", c.name, c.want.DowngradedFrom, got.DowngradedFrom)
}
if got.Reason == "" {
t.Errorf("%s: Reason should be non-empty", c.name)
}
}
}
// TestMaybeDowngrade_ForcedTrumpsOthers verifies precedence: when
// multiple bypass conditions hit, ForcedMode wins (explicit caller
// intent always overrides). Caught a subtle ordering bug in the
// original Rust code where this was tested only by happy path.
func TestMaybeDowngrade_ForcedTrumpsOthers(t *testing.T) {
in := DowngradeInput{
Mode: ModeCodeReviewLakehouse,
Model: "qwen3.5:latest", // weak — would otherwise hit weak-bypass
ForcedMode: true,
ForceFullOverride: true,
}
got := MaybeDowngrade(in)
if got.Mode != ModeCodeReviewLakehouse {
t.Errorf("forced mode should keep mode: got %q", got.Mode)
}
if got.DowngradedFrom != "" {
t.Errorf("no downgrade expected; got DowngradedFrom=%q", got.DowngradedFrom)
}
}

View File

@ -1,95 +0,0 @@
package matrix
import (
"encoding/json"
"testing"
)
func TestMatchesMetadataFilter_NoFilter_KeepsAll(t *testing.T) {
meta := json.RawMessage(`{"role":"Forklift Operator","state":"IL"}`)
if !matchesMetadataFilter(meta, nil) {
t.Error("nil filter should match everything")
}
if !matchesMetadataFilter(meta, map[string]any{}) {
t.Error("empty filter should match everything")
}
}
func TestMatchesMetadataFilter_NoMetadata_AlwaysFails(t *testing.T) {
if matchesMetadataFilter(nil, map[string]any{"x": "y"}) {
t.Error("missing metadata should fail any filter")
}
}
func TestMatchesMetadataFilter_SingleValueExactMatch(t *testing.T) {
meta := json.RawMessage(`{"state":"IL","status":"active","years":5}`)
cases := []struct {
filter map[string]any
want bool
}{
{map[string]any{"state": "IL"}, true},
{map[string]any{"state": "TX"}, false},
{map[string]any{"status": "active"}, true},
{map[string]any{"status": "inactive"}, false},
// JSON normalizes both sides, so 5 matches 5.0
{map[string]any{"years": 5.0}, true},
{map[string]any{"years": 5}, true},
// Missing key = fail
{map[string]any{"city": "Chicago"}, false},
}
for _, c := range cases {
got := matchesMetadataFilter(meta, c.filter)
if got != c.want {
t.Errorf("filter %v on %s: want %v, got %v", c.filter, meta, c.want, got)
}
}
}
func TestMatchesMetadataFilter_AllKeysAND(t *testing.T) {
meta := json.RawMessage(`{"state":"IL","status":"active","role":"Forklift Operator"}`)
if !matchesMetadataFilter(meta, map[string]any{
"state": "IL",
"status": "active",
}) {
t.Error("both keys match: should pass")
}
if matchesMetadataFilter(meta, map[string]any{
"state": "IL",
"status": "inactive", // mismatch
}) {
t.Error("one key mismatches: should fail (AND across keys)")
}
}
func TestMatchesMetadataFilter_ListValueOR(t *testing.T) {
meta := json.RawMessage(`{"state":"IL"}`)
// state in {"IL","WI","IN"} → match
if !matchesMetadataFilter(meta, map[string]any{
"state": []any{"IL", "WI", "IN"},
}) {
t.Error("list with matching element: should pass")
}
// state in {"TX","CA"} → fail
if matchesMetadataFilter(meta, map[string]any{
"state": []any{"TX", "CA"},
}) {
t.Error("list with no matching element: should fail")
}
}
func TestMatchesMetadataFilter_BoolMatch(t *testing.T) {
meta := json.RawMessage(`{"available":true,"placed":false}`)
if !matchesMetadataFilter(meta, map[string]any{"available": true}) {
t.Error("bool true match")
}
if matchesMetadataFilter(meta, map[string]any{"available": false}) {
t.Error("bool true should not match false filter")
}
}
func TestMatchesMetadataFilter_MalformedMetadataFails(t *testing.T) {
meta := json.RawMessage(`{not valid json}`)
if matchesMetadataFilter(meta, map[string]any{"x": "y"}) {
t.Error("malformed metadata should fail")
}
}

View File

@ -1,196 +0,0 @@
package matrix
// Playbook memory — SPEC §3.4 component 5 (learning-loop integration).
//
// Concept: every time an external system confirms "(query → answer_id)
// was a successful match," record it. Future similar queries get that
// answer's score boosted, so the matrix indexer learns from outcomes
// rather than relying solely on the base embedder's geometry.
//
// Per feedback_meta_index_vision.md: this is the north star — a
// meta-index that LEARNS from playbooks over time, not a static
// hybrid search engine.
//
// Storage shape: a vectord index named DefaultPlaybookCorpus where:
// - The vector is embed(query_text)
// - The metadata is a serialized PlaybookEntry
// Retrieval shape: at /matrix/search time, when use_playbook=true,
// matrixd searches the playbook corpus with the same query vector,
// looks up each hit's answer_id, and if that answer is in the current
// matrix-search results, applies a boost to its distance.
//
// Composition: this layer is additive on top of the existing
// retrieve+merge — when use_playbook=false, behavior is unchanged.
// The boost only re-ranks results that ALREADY surfaced from the
// regular retrieval. A v1 enhancement would inject playbook hits
// directly even when they weren't in the top-K (Shape B from the
// design conversation), but v0 keeps the safer "boost-only" stance.
import (
"encoding/json"
"errors"
"sort"
"time"
)
// DefaultPlaybookCorpus is the vectord index name where playbook
// entries land by default. Callers can override per-request, but
// having one default makes the system observable from the outside
// (operator hits /vectors/index and sees this corpus in the list).
const DefaultPlaybookCorpus = "playbook_memory"
// DefaultPlaybookTopK is how many similar past queries to consider
// when applying boost. 3 keeps the influence focused — we want the
// boost to reward consistent matches, not let one stale playbook
// dominate. Caller can override.
const DefaultPlaybookTopK = 3
// DefaultPlaybookMaxDistance is the cosine ceiling for "this past
// query is similar enough to count." 0.5 lets in genuinely related
// queries while excluding pure-coincidence neighbors. Caller can
// override per-request as we learn what works for staffing data.
const DefaultPlaybookMaxDistance = 0.5
// PlaybookEntry is what gets stored as metadata on each playbook
// vector. RecordedAt is captured at write time; callers should not
// set it (the recorder fills it in).
type PlaybookEntry struct {
QueryText string `json:"query_text"`
AnswerID string `json:"answer_id"`
AnswerCorpus string `json:"answer_corpus"`
Score float64 `json:"score"` // 0..1; higher = better outcome
RecordedAtNs int64 `json:"recorded_at_ns"`
Tags []string `json:"tags,omitempty"`
}
// Validate returns an error if the entry is missing required fields.
// Callers should validate before storage so bad data doesn't pollute
// the corpus.
func (p PlaybookEntry) Validate() error {
if p.QueryText == "" {
return errors.New("playbook: query_text required")
}
if p.AnswerID == "" {
return errors.New("playbook: answer_id required")
}
if p.AnswerCorpus == "" {
return errors.New("playbook: answer_corpus required")
}
if p.Score < 0 || p.Score > 1 {
return errors.New("playbook: score must be in [0, 1]")
}
return nil
}
// BoostFactor returns the multiplier applied to a result's distance
// when this playbook entry matches it. Lower is better:
//
// score = 0 → 1.0 (no boost)
// score = 0.5 → 0.75 (mild boost)
// score = 1.0 → 0.5 (halve the distance — strong boost)
//
// Math: 1 - 0.5*score. Capped to [0.5, 1.0] for safety.
//
// Why halving as the maximum boost: a perfect-confidence playbook
// entry shouldn't completely override the base embedding (that
// invites runaway feedback loops where one early playbook
// dominates forever). Halving is enough to move a mid-rank result
// to the top in most cases without erasing the base ranking
// signal.
func (p PlaybookEntry) BoostFactor() float64 {
score := p.Score
if score < 0 {
score = 0
}
if score > 1 {
score = 1
}
return 1.0 - 0.5*score
}
// MarshalMetadata serializes the entry as the JSON RawMessage that
// vectord stores per item. Convenience for the recorder.
func (p PlaybookEntry) MarshalMetadata() (json.RawMessage, error) {
return json.Marshal(p)
}
// UnmarshalPlaybookMetadata is the inverse — used when fetching
// playbook hits to decode their metadata back into entries.
func UnmarshalPlaybookMetadata(raw json.RawMessage) (PlaybookEntry, error) {
var e PlaybookEntry
if len(raw) == 0 {
return e, errors.New("playbook: empty metadata")
}
if err := json.Unmarshal(raw, &e); err != nil {
return e, err
}
return e, nil
}
// NewPlaybookEntry stamps RecordedAtNs to now and returns the entry.
// Validation happens at storage; this is just construction.
func NewPlaybookEntry(query, answerID, answerCorpus string, score float64, tags []string) PlaybookEntry {
return PlaybookEntry{
QueryText: query,
AnswerID: answerID,
AnswerCorpus: answerCorpus,
Score: score,
RecordedAtNs: time.Now().UnixNano(),
Tags: tags,
}
}
// PlaybookHit is one similarity-search result from the playbook
// corpus, paired with its decoded entry. Distance is the cosine
// distance between the current query and this past playbook's
// query vector — used by the caller to filter out "too far"
// matches via PlaybookMaxDistance.
type PlaybookHit struct {
PlaybookID string `json:"playbook_id"`
Distance float32 `json:"distance"`
Entry PlaybookEntry `json:"entry"`
}
// ApplyPlaybookBoost re-ranks results in place using matched
// playbook hits. For each hit whose (AnswerID, AnswerCorpus)
// matches a result, multiply that result's distance by the hit's
// BoostFactor. If multiple hits match the same result, the highest-
// score one wins (greatest reduction in distance).
//
// After applying boosts, results are re-sorted ascending by
// distance.
//
// Returns the number of distinct results that received a boost.
// Callers can log this as a signal of "how much the playbook
// influenced this query."
func ApplyPlaybookBoost(results []Result, hits []PlaybookHit) int {
if len(hits) == 0 || len(results) == 0 {
return 0
}
// For each result, find the hit with the lowest BoostFactor
// (= largest boost = highest score, since BoostFactor is
// 1-0.5*score and we minimize).
bestBoost := make(map[int]float64, len(results))
for i, r := range results {
for _, h := range hits {
if h.Entry.AnswerID != r.ID || h.Entry.AnswerCorpus != r.Corpus {
continue
}
bf := h.Entry.BoostFactor()
if cur, ok := bestBoost[i]; !ok || bf < cur {
bestBoost[i] = bf
}
}
}
for i, bf := range bestBoost {
results[i].Distance = float32(float64(results[i].Distance) * bf)
}
sort.SliceStable(results, func(i, j int) bool {
return results[i].Distance < results[j].Distance
})
return len(bestBoost)
}

View File

@ -1,180 +0,0 @@
package matrix
import (
"encoding/json"
"testing"
)
func TestPlaybookEntry_Validate(t *testing.T) {
good := PlaybookEntry{
QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 0.5,
}
if err := good.Validate(); err != nil {
t.Errorf("good entry should validate: %v", err)
}
cases := []struct {
name string
entry PlaybookEntry
}{
{"empty query", PlaybookEntry{AnswerID: "y", AnswerCorpus: "z", Score: 0.5}},
{"empty answer id", PlaybookEntry{QueryText: "x", AnswerCorpus: "z", Score: 0.5}},
{"empty corpus", PlaybookEntry{QueryText: "x", AnswerID: "y", Score: 0.5}},
{"score too high", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: 1.5}},
{"score negative", PlaybookEntry{QueryText: "x", AnswerID: "y", AnswerCorpus: "z", Score: -0.1}},
}
for _, c := range cases {
if err := c.entry.Validate(); err == nil {
t.Errorf("%s: expected validation error, got nil", c.name)
}
}
}
func TestPlaybookEntry_BoostFactor(t *testing.T) {
cases := []struct {
score float64
want float64
}{
{0.0, 1.0},
{0.5, 0.75},
{1.0, 0.5},
{-0.1, 1.0}, // clamped
{1.5, 0.5}, // clamped
}
for _, c := range cases {
got := PlaybookEntry{Score: c.score}.BoostFactor()
if abs(got-c.want) > 1e-9 {
t.Errorf("BoostFactor(score=%.2f): want %.4f, got %.4f", c.score, c.want, got)
}
}
}
func TestApplyPlaybookBoost_NoHitsLeaveResultsAlone(t *testing.T) {
results := []Result{
{ID: "a", Distance: 0.1, Corpus: "x"},
{ID: "b", Distance: 0.2, Corpus: "x"},
}
n := ApplyPlaybookBoost(results, nil)
if n != 0 {
t.Errorf("expected 0 boosted, got %d", n)
}
if results[0].ID != "a" || results[1].ID != "b" {
t.Errorf("results reordered without hits: %v", results)
}
}
func TestApplyPlaybookBoost_BoostMovesResultUp(t *testing.T) {
// Initial: a (0.10) beats b (0.20) beats c (0.30).
// Playbook says (answer=c, score=1.0) should be boosted → c's
// distance becomes 0.30 * 0.5 = 0.15. New ordering: a, c, b.
results := []Result{
{ID: "a", Distance: 0.10, Corpus: "x"},
{ID: "b", Distance: 0.20, Corpus: "x"},
{ID: "c", Distance: 0.30, Corpus: "x"},
}
hits := []PlaybookHit{
{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
AnswerID: "c", AnswerCorpus: "x", Score: 1.0,
}},
}
n := ApplyPlaybookBoost(results, hits)
if n != 1 {
t.Errorf("expected 1 boosted, got %d", n)
}
if results[0].ID != "a" || results[1].ID != "c" || results[2].ID != "b" {
t.Errorf("expected order a,c,b after boost; got %v", idsOf(results))
}
if abs(float64(results[1].Distance)-0.15) > 1e-6 {
t.Errorf("expected c distance 0.15 after boost; got %.4f", results[1].Distance)
}
}
func TestApplyPlaybookBoost_HighestScoreWinsForSameAnswer(t *testing.T) {
results := []Result{
{ID: "a", Distance: 0.30, Corpus: "x"},
}
// Two playbook hits both pointing at "a". Score=0.4 (weak boost)
// + Score=0.9 (strong boost). Strong should win — distance gets
// multiplied by 1-0.5*0.9 = 0.55, not by 1-0.5*0.4 = 0.80.
hits := []PlaybookHit{
{PlaybookID: "p_weak", Distance: 0.05, Entry: PlaybookEntry{
AnswerID: "a", AnswerCorpus: "x", Score: 0.4,
}},
{PlaybookID: "p_strong", Distance: 0.05, Entry: PlaybookEntry{
AnswerID: "a", AnswerCorpus: "x", Score: 0.9,
}},
}
ApplyPlaybookBoost(results, hits)
wantDist := 0.30 * 0.55
if abs(float64(results[0].Distance)-wantDist) > 1e-6 {
t.Errorf("strong-score boost should win: want %.4f, got %.4f", wantDist, results[0].Distance)
}
}
func TestApplyPlaybookBoost_CorpusAttributionRespected(t *testing.T) {
// Playbook references answer_id="a" in corpus="x".
// Results have answer_id="a" in corpus="y" — DIFFERENT corpus.
// Boost should NOT apply; the (id, corpus) tuple is the join key,
// not just id (otherwise different-corpus collisions would create
// false positives).
results := []Result{
{ID: "a", Distance: 0.30, Corpus: "y"},
}
hits := []PlaybookHit{
{PlaybookID: "p1", Distance: 0.05, Entry: PlaybookEntry{
AnswerID: "a", AnswerCorpus: "x", Score: 1.0,
}},
}
n := ApplyPlaybookBoost(results, hits)
if n != 0 {
t.Errorf("cross-corpus collision should not boost: got %d", n)
}
if abs(float64(results[0].Distance)-0.30) > 1e-6 {
// 1e-6 tolerance accounts for float32→float64 conversion;
// the assertion that matters is "unchanged from input."
t.Errorf("distance should be unchanged: got %.6f", results[0].Distance)
}
}
func TestPlaybookEntry_RoundTripJSON(t *testing.T) {
e := NewPlaybookEntry("forklift query", "w-12345", "workers", 0.85, []string{"chicago", "verified"})
raw, err := e.MarshalMetadata()
if err != nil {
t.Fatalf("marshal: %v", err)
}
got, err := UnmarshalPlaybookMetadata(raw)
if err != nil {
t.Fatalf("unmarshal: %v", err)
}
if got.QueryText != e.QueryText || got.AnswerID != e.AnswerID ||
got.AnswerCorpus != e.AnswerCorpus || got.Score != e.Score {
t.Errorf("round-trip mismatch: want %+v, got %+v", e, got)
}
if len(got.Tags) != 2 || got.Tags[0] != "chicago" {
t.Errorf("tags lost in round-trip: %v", got.Tags)
}
if got.RecordedAtNs == 0 {
t.Error("RecordedAtNs not set by NewPlaybookEntry")
}
}
func TestUnmarshalPlaybookMetadata_RejectsEmpty(t *testing.T) {
if _, err := UnmarshalPlaybookMetadata(json.RawMessage{}); err == nil {
t.Error("empty metadata should error")
}
}
func abs(f float64) float64 {
if f < 0 {
return -f
}
return f
}
func idsOf(rs []Result) []string {
out := make([]string, len(rs))
for i, r := range rs {
out[i] = r.ID
}
return out
}

View File

@ -1,376 +0,0 @@
package matrix
// Heuristic relevance filter for matrix-retrieved chunks. Port of
// /home/profit/lakehouse/mcp-server/relevance.ts (Rust system).
//
// What it does: drops "adjacency pollution" — chunks that scored
// well on cosine but are actually about code the focus file IMPORTS,
// not the focus file itself. Without this, a reviewer LLM
// hallucinates imported-crate internals as belonging to the focus
// file ("I see main.rs does X" when X is in queryd::context that
// main.rs only calls through).
//
// IMPORTANT: this filter is CODE-aware. The signals are pub fn,
// struct, enum, use, import, file paths. It works for the eventual
// lakehouse_arch_v1 / lakehouse_symbols_v1 / scrum_findings_v1
// corpora ports. It will NOT meaningfully filter staffing data
// (candidates, workers, placements) — those need a different
// mechanism (structured constraints + status gates) that lives
// outside this package. See the candidates reality test 2026-04-29
// for the kind of staffing-side mismatch this filter doesn't fix.
//
// Scoring signals (all 0..1, additive then can sign-flip):
// path_match +1.0 chunk.source/doc_id encodes focus.path
// filename_match +0.6 chunk text mentions focus's filename
// defined_match +0.6 chunk text mentions focus.defined_symbols
// token_overlap +0.4 jaccard of non-stopword tokens
// prefix_match +0.3 chunk source shares first-2-segment prefix
// import_penalty -0.5 mentions ONLY imported symbols, no defined ones
//
// Threshold default 0.3 — same value the Rust observer ships.
import (
"fmt"
"regexp"
"strings"
)
// DefaultRelevanceThreshold is the value the Rust observer ships.
// Empirically tuned to keep direct hits and drop adjacency pollution.
const DefaultRelevanceThreshold = 0.3
// stopwords is the same list as relevance.ts. Includes English
// articles + common Rust/TS keywords that would otherwise flood
// jaccard scores between any two source files.
var stopwords = func() map[string]struct{} {
list := []string{
"the", "a", "an", "and", "or", "but", "if", "then", "else", "is", "are", "was", "were",
"be", "been", "being", "of", "in", "on", "at", "to", "for", "with", "by", "from", "as",
"that", "this", "these", "those", "it", "its", "they", "them", "their", "we", "our",
"you", "your", "i", "me", "my", "not", "no", "so", "do", "does", "did", "done",
"will", "would", "could", "should", "can", "may", "might", "must", "shall",
"fn", "let", "mut", "pub", "use", "mod", "struct", "enum", "trait", "impl", "self",
"type", "const", "static", "async", "await", "return", "match", "ok", "err", "some",
"none", "into", "from", "ref", "box", "arc", "rc", "vec", "string", "str",
}
m := make(map[string]struct{}, len(list))
for _, s := range list {
m[s] = struct{}{}
}
return m
}()
// FocusFile is what we're filtering chunks against. Path is required
// for path_match; Content lets the filter auto-extract Defined and
// ImportedSymbols when callers haven't already done so.
type FocusFile struct {
Path string
Content string
DefinedSymbols []string
ImportedSymbols []string
}
// CandidateChunk is a single retrieved item to score. Source is the
// corpus name; DocID is the chunk identifier; Score is the upstream
// cosine signal (carried through but not used by this filter — the
// matrix layer uses cosine for ranking, this filter for retention).
type CandidateChunk struct {
Source string `json:"source"`
DocID string `json:"doc_id"`
Text string `json:"text"`
Score float64 `json:"score"`
}
// ScoredChunk wraps a chunk with its computed relevance + the list
// of signals that fired. Reasons makes the filter auditable —
// debugging "why did this chunk get kept/dropped" is the hard part.
type ScoredChunk struct {
CandidateChunk
Relevance float64 `json:"relevance"`
Reasons []string `json:"reasons"`
}
// FilterResult is the output of FilterChunks. Kept + Dropped are
// disjoint and together cover the input. TotalIn is for sanity
// checks; FocusPath echoes input for logging.
type FilterResult struct {
Kept []ScoredChunk `json:"kept"`
Dropped []ScoredChunk `json:"dropped"`
Threshold float64 `json:"threshold"`
FocusPath string `json:"focus_path"`
TotalIn int `json:"total_in"`
}
// Tokenize lowercases, splits on identifier boundaries (>=3 chars),
// and drops stopwords. Used by Jaccard for token_overlap. Mirrors
// the TS regex /[a-z_][a-z0-9_]{2,}/g — RE2-compatible as written.
var tokenRe = regexp.MustCompile(`[a-z_][a-z0-9_]{2,}`)
func Tokenize(text string) map[string]struct{} {
out := make(map[string]struct{})
if text == "" {
return out
}
for _, m := range tokenRe.FindAllString(strings.ToLower(text), -1) {
if _, skip := stopwords[m]; skip {
continue
}
out[m] = struct{}{}
}
return out
}
// Jaccard returns |A ∩ B| / |A B|. 0 when either set is empty
// (matches the TS contract).
func Jaccard(a, b map[string]struct{}) float64 {
if len(a) == 0 || len(b) == 0 {
return 0
}
var inter int
for k := range a {
if _, ok := b[k]; ok {
inter++
}
}
union := len(a) + len(b) - inter
if union == 0 {
return 0
}
return float64(inter) / float64(union)
}
// ExtractDefinedSymbols pulls pub-symbol names from Rust/TS source.
// Conservative — would rather miss a symbol than over-match. Patterns
// match exactly the TS impl; \b and (?:...) are RE2-supported. Case-
// sensitivity matches TS: pub fn is lowercase, struct/enum/trait/etc
// are PascalCase, const is SCREAMING_CASE. Only the "pub fn" match
// uses (?i) because TS uses /gi explicitly there (the rest are /g).
var definedPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bpub\s+(?:async\s+)?fn\s+([a-z_][a-z0-9_]*)`),
regexp.MustCompile(`\bpub\s+struct\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bpub\s+enum\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bpub\s+trait\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bpub\s+const\s+([A-Z_][A-Z0-9_]*)`),
regexp.MustCompile(`\bpub\s+type\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bexport\s+(?:async\s+)?function\s+([a-z_][a-zA-Z0-9_]*)`),
regexp.MustCompile(`\bexport\s+class\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bexport\s+interface\s+([A-Z][A-Za-z0-9_]*)`),
regexp.MustCompile(`\bexport\s+(?:const|let|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)`),
}
func ExtractDefinedSymbols(content string) []string {
if content == "" {
return nil
}
seen := make(map[string]struct{})
var out []string
for _, re := range definedPatterns {
for _, m := range re.FindAllStringSubmatch(content, -1) {
if len(m) < 2 || m[1] == "" {
continue
}
if _, ok := seen[m[1]]; ok {
continue
}
seen[m[1]] = struct{}{}
out = append(out, m[1])
}
}
return out
}
// rustUseRe matches `use foo::bar::Baz;`, `use foo::{Bar, Baz};`,
// `use foo::bar as alias;`. Lazy `*?` so we don't run into the next
// `;` boundary too eagerly.
var rustUseRe = regexp.MustCompile(`\buse\s+([A-Za-z_][A-Za-z0-9_:{}, \n]*?);`)
// tsImportRe matches `import { X, Y } from "foo"` and `import X from "foo"`.
var tsImportRe = regexp.MustCompile(`\bimport\s+(?:\{([^}]+)\}|([A-Za-z_][A-Za-z0-9_]*))\s+from`)
// identRe extracts identifiers from a use/import block.
var identRe = regexp.MustCompile(`[A-Za-z_][A-Za-z0-9_]*`)
func ExtractImportedSymbols(content string) []string {
if content == "" {
return nil
}
ignore := map[string]bool{
"use": true, "as": true, "crate": true, "super": true, "self": true, "mod": true,
}
seen := make(map[string]struct{})
var out []string
add := func(tok string) {
if len(tok) <= 2 {
return
}
if ignore[tok] {
return
}
if _, ok := seen[tok]; ok {
return
}
seen[tok] = struct{}{}
out = append(out, tok)
}
for _, m := range rustUseRe.FindAllStringSubmatch(content, -1) {
if len(m) < 2 {
continue
}
for _, ident := range identRe.FindAllString(m[1], -1) {
add(ident)
}
}
for _, m := range tsImportRe.FindAllStringSubmatch(content, -1) {
if len(m) < 3 {
continue
}
block := m[1]
if block == "" {
block = m[2]
}
for _, ident := range identRe.FindAllString(block, -1) {
add(ident)
}
}
return out
}
// FilePrefix returns the first two path segments joined by "/" —
// e.g. "crates/queryd/src/foo.rs" → "crates/queryd". Used for cheap
// "same crate" comparisons; mirrors pathway_memory's notion.
func FilePrefix(path string) string {
parts := strings.Split(path, "/")
if len(parts) > 2 {
parts = parts[:2]
}
return strings.Join(parts, "/")
}
// ScoreRelevance computes the additive 0..1-ish score plus the list
// of signals that fired. Negative scores are possible (import_penalty
// without compensating positive signal). Pure function — no side
// effects, no I/O.
func ScoreRelevance(focus FocusFile, chunk CandidateChunk) (float64, []string) {
var score float64
var reasons []string
focusPath := focus.Path
focusBase := ""
if focusPath != "" {
parts := strings.Split(focusPath, "/")
focusBase = parts[len(parts)-1]
}
chunkText := chunk.Text
chunkSource := chunk.Source
chunkDocID := chunk.DocID
// path_match: chunk's provenance encodes the focus path or filename.
if focusPath != "" && (strings.Contains(chunkSource, focusPath) ||
strings.Contains(chunkDocID, focusPath) ||
strings.Contains(chunkText, focusPath)) {
score += 1.0
reasons = append(reasons, "path_match")
} else if focusBase != "" && len(focusBase) > 4 &&
(strings.Contains(chunkText, focusBase) || strings.Contains(chunkDocID, focusBase)) {
score += 0.6
reasons = append(reasons, "filename_match")
}
// defined_match: chunk text mentions symbols this file actually defines.
defined := focus.DefinedSymbols
if len(defined) == 0 && focus.Content != "" {
defined = ExtractDefinedSymbols(focus.Content)
}
if len(defined) > 0 {
var hits int
for _, s := range defined {
if len(s) > 2 && strings.Contains(chunkText, s) {
hits++
}
}
if hits > 0 {
denom := len(defined)
if denom < 1 {
denom = 1
}
ratio := float64(hits) / float64(denom)
if ratio > 1 {
ratio = 1
}
score += 0.6 * ratio
reasons = append(reasons, fmt.Sprintf("defined_match(%d/%d)", hits, len(defined)))
}
}
// token_overlap: jaccard of non-stopword tokens.
if focus.Content != "" {
overlap := Jaccard(Tokenize(focus.Content), Tokenize(chunkText))
if overlap > 0.05 {
score += 0.4 * overlap
reasons = append(reasons, fmt.Sprintf("token_overlap(%.2f)", overlap))
}
}
// prefix_match: same first-2-segments (e.g. crates/queryd).
if focusPath != "" {
fp := FilePrefix(focusPath)
if fp != "" && (strings.Contains(chunkSource, fp) ||
strings.Contains(chunkDocID, fp) ||
strings.Contains(chunkText, fp)) {
score += 0.3
reasons = append(reasons, "prefix_match")
}
}
// import_penalty: chunk mentions only imported symbols, no defined
// ones. Strong signal of adjacency pollution — the chunk is about
// what we IMPORT, not what we ARE.
imported := focus.ImportedSymbols
if len(imported) == 0 && focus.Content != "" {
imported = ExtractImportedSymbols(focus.Content)
}
if len(imported) > 0 && len(defined) > 0 {
var importHits, definedHits int
for _, s := range imported {
if len(s) > 2 && strings.Contains(chunkText, s) {
importHits++
}
}
for _, s := range defined {
if len(s) > 2 && strings.Contains(chunkText, s) {
definedHits++
}
}
if importHits > 0 && definedHits == 0 {
score -= 0.5
reasons = append(reasons, fmt.Sprintf("import_only(%d)", importHits))
}
}
return score, reasons
}
// FilterChunks scores every chunk and partitions by threshold. The
// caller picks the threshold; pass 0 to keep everything (caller-as-
// intent contract — no auto-default substitution, since a literal 0
// is meaningful as "keep everything I scored").
func FilterChunks(focus FocusFile, chunks []CandidateChunk, threshold float64) FilterResult {
kept := make([]ScoredChunk, 0, len(chunks))
dropped := make([]ScoredChunk, 0)
for _, c := range chunks {
score, reasons := ScoreRelevance(focus, c)
sc := ScoredChunk{CandidateChunk: c, Relevance: score, Reasons: reasons}
if score >= threshold {
kept = append(kept, sc)
} else {
dropped = append(dropped, sc)
}
}
return FilterResult{
Kept: kept,
Dropped: dropped,
Threshold: threshold,
FocusPath: focus.Path,
TotalIn: len(chunks),
}
}

View File

@ -1,289 +0,0 @@
package matrix
import (
"strings"
"testing"
)
func TestTokenize(t *testing.T) {
cases := []struct {
text string
want []string // expected tokens (sorted check inside)
}{
{"", nil},
{"the quick brown fox", []string{"quick", "brown", "fox"}}, // stopwords dropped
{"hello WORLD", []string{"hello", "world"}}, // lowercase
{"a b c", nil}, // all under 3 chars
{"struct Foo", []string{"foo"}}, // "struct" is a stopword, identifiers OK
{"crates/queryd/db.go", []string{"crates", "queryd"}}, // db.go: "db" is 2 chars, "go" is 2 chars
}
for _, c := range cases {
got := Tokenize(c.text)
if len(got) != len(c.want) {
t.Errorf("Tokenize(%q): want %d tokens %v, got %d %v", c.text, len(c.want), c.want, len(got), got)
continue
}
for _, w := range c.want {
if _, ok := got[w]; !ok {
t.Errorf("Tokenize(%q): missing token %q in %v", c.text, w, got)
}
}
}
}
func TestJaccard(t *testing.T) {
mk := func(tokens ...string) map[string]struct{} {
m := make(map[string]struct{})
for _, t := range tokens {
m[t] = struct{}{}
}
return m
}
cases := []struct {
name string
a, b map[string]struct{}
want float64
epsilon float64
}{
{"both empty", mk(), mk(), 0, 0},
{"a empty", mk(), mk("x"), 0, 0},
{"identical", mk("x", "y"), mk("x", "y"), 1, 0},
{"disjoint", mk("a", "b"), mk("c", "d"), 0, 0},
{"half overlap", mk("a", "b"), mk("b", "c"), 1.0 / 3.0, 0.001},
}
for _, c := range cases {
got := Jaccard(c.a, c.b)
if got < c.want-c.epsilon || got > c.want+c.epsilon {
t.Errorf("%s: want %.3f, got %.3f", c.name, c.want, got)
}
}
}
func TestExtractDefinedSymbols(t *testing.T) {
rust := `
pub fn search_chunks(query: &str) -> Vec<Chunk> { todo!() }
pub async fn build_index() {}
pub struct ChunkRegistry {}
pub enum Distance { Cosine, Euclidean }
pub trait Searcher {}
pub const MAX_K: usize = 1000;
pub type ChunkMap = HashMap<String, Chunk>;
fn private_helper() {} // not pub, must NOT match
struct PrivateOnly {} // not pub, must NOT match
`
got := ExtractDefinedSymbols(rust)
want := []string{"search_chunks", "build_index", "ChunkRegistry", "Distance", "Searcher", "MAX_K", "ChunkMap"}
if len(got) != len(want) {
t.Errorf("Rust extract: want %v, got %v", want, got)
}
for _, w := range want {
if !contains(got, w) {
t.Errorf("Rust: missing %q in %v", w, got)
}
}
// Negative cases — these should NOT match.
for _, neg := range []string{"private_helper", "PrivateOnly"} {
if contains(got, neg) {
t.Errorf("Rust: should not match %q in %v", neg, got)
}
}
ts := `
export function tokenize(text: string) {}
export async function loadCorpus() {}
export class IndexRegistry {}
export interface FocusFile {}
export const STOPWORDS = new Set();
export let counter = 0;
function privateTs() {} // not export, must NOT match
class Internal {} // not export, must NOT match
`
got = ExtractDefinedSymbols(ts)
want = []string{"tokenize", "loadCorpus", "IndexRegistry", "FocusFile", "STOPWORDS", "counter"}
for _, w := range want {
if !contains(got, w) {
t.Errorf("TS: missing %q in %v", w, got)
}
}
for _, neg := range []string{"privateTs", "Internal"} {
if contains(got, neg) {
t.Errorf("TS: should not match %q in %v", neg, got)
}
}
}
func TestExtractImportedSymbols(t *testing.T) {
rust := `
use catalogd::Registry;
use vectord::{Index, IndexParams};
use std::collections::HashMap;
`
got := ExtractImportedSymbols(rust)
for _, w := range []string{"catalogd", "Registry", "vectord", "Index", "IndexParams", "collections", "HashMap"} {
if !contains(got, w) {
t.Errorf("Rust use: missing %q in %v", w, got)
}
}
for _, neg := range []string{"use", "as"} {
if contains(got, neg) {
t.Errorf("Rust use: should not match keyword %q in %v", neg, got)
}
}
ts := `
import { tokenize, jaccard } from "./relevance";
import express from "express";
`
got = ExtractImportedSymbols(ts)
for _, w := range []string{"tokenize", "jaccard", "express"} {
if !contains(got, w) {
t.Errorf("TS import: missing %q in %v", w, got)
}
}
}
func TestFilePrefix(t *testing.T) {
cases := []struct {
path, want string
}{
{"crates/queryd/src/foo.rs", "crates/queryd"},
{"top.rs", "top.rs"},
{"a/b/c/d", "a/b"},
{"", ""},
}
for _, c := range cases {
got := FilePrefix(c.path)
if got != c.want {
t.Errorf("FilePrefix(%q): want %q, got %q", c.path, c.want, got)
}
}
}
func TestScoreRelevance_PathMatch(t *testing.T) {
focus := FocusFile{Path: "crates/queryd/db.go"}
chunk := CandidateChunk{Source: "lakehouse_arch_v1", DocID: "phase:queryd", Text: "code at crates/queryd/db.go does X"}
score, reasons := ScoreRelevance(focus, chunk)
if score < 1.0 {
t.Errorf("path_match should give >=1.0; got %.2f reasons=%v", score, reasons)
}
if !contains(reasons, "path_match") {
t.Errorf("expected path_match in reasons: %v", reasons)
}
}
func TestScoreRelevance_ImportPenalty(t *testing.T) {
// Focus defines Foo; chunk only mentions Bar (imported). Should
// fire import_only penalty.
focus := FocusFile{
Path: "crates/foo/main.go",
Content: "pub fn run() {}\npub struct Foo {}\nuse barlib::Bar;\n",
DefinedSymbols: []string{"Foo"},
ImportedSymbols: []string{"Bar"},
}
chunk := CandidateChunk{
Source: "barlib_corpus", DocID: "barlib:Bar:42",
Text: "Bar handles the actual lookup logic and returns a Result.",
}
score, reasons := ScoreRelevance(focus, chunk)
if !contains(reasons, "import_only(1)") {
t.Errorf("expected import_only penalty: reasons=%v score=%.2f", reasons, score)
}
if score >= 0 {
// Without other positive signals, score should be net-negative.
t.Errorf("expected negative net score; got %.2f reasons=%v", score, reasons)
}
}
func TestFilterChunks_ThresholdSplitsKeptDropped(t *testing.T) {
focus := FocusFile{Path: "crates/queryd/db.go"}
chunks := []CandidateChunk{
{Source: "code", DocID: "queryd:db.go", Text: "crates/queryd/db.go is the focus"}, // path match → kept
{Source: "elsewhere", DocID: "phase:0", Text: "no match anywhere"}, // dropped
}
res := FilterChunks(focus, chunks, DefaultRelevanceThreshold)
if len(res.Kept) != 1 || len(res.Dropped) != 1 {
t.Errorf("split: kept=%d dropped=%d (want 1/1)", len(res.Kept), len(res.Dropped))
}
if res.TotalIn != 2 {
t.Errorf("TotalIn: want 2, got %d", res.TotalIn)
}
if res.FocusPath != focus.Path {
t.Errorf("FocusPath echo: want %q, got %q", focus.Path, res.FocusPath)
}
// Sanity: everything in Kept has Relevance >= threshold.
for _, c := range res.Kept {
if c.Relevance < DefaultRelevanceThreshold {
t.Errorf("kept chunk below threshold: %v", c)
}
}
for _, c := range res.Dropped {
if c.Relevance >= DefaultRelevanceThreshold {
t.Errorf("dropped chunk at/above threshold: %v", c)
}
}
}
// TestFilterChunks_AdjacencyPollutionScenario is the headline test —
// the exact case the filter exists to catch. Focus file is
// crates/queryd/db.go which defines Connector and imports
// catalogd::Registry. A chunk about catalogd::Registry should be
// dropped (adjacency); a chunk about Connector should be kept.
func TestFilterChunks_AdjacencyPollutionScenario(t *testing.T) {
focus := FocusFile{
Path: "crates/queryd/src/db.go",
Content: `
package queryd
import "catalogd"
pub struct Connector {}
pub fn open_connector() *Connector { return nil }
use catalogd::Registry;
`,
}
chunks := []CandidateChunk{
{
Source: "lakehouse_symbols_v1", DocID: "symbol:queryd::struct::Connector",
Text: "Connector wraps the DuckDB handle. open_connector creates one.",
},
{
Source: "lakehouse_symbols_v1", DocID: "symbol:catalogd::struct::Registry",
Text: "Registry stores manifests. Used by ingestd and queryd.",
},
}
res := FilterChunks(focus, chunks, DefaultRelevanceThreshold)
// Connector chunk should be kept (defined_match).
keptIDs := make([]string, len(res.Kept))
for i, c := range res.Kept {
keptIDs[i] = c.DocID
}
if !contains(keptIDs, "symbol:queryd::struct::Connector") {
t.Errorf("expected Connector chunk kept; got %v", keptIDs)
}
// The Registry chunk MIGHT pass threshold depending on token_overlap
// noise (queryd appears in its text too). The load-bearing assertion:
// Connector ranks ≥ Registry.
connectorRel, registryRel := -999.0, -999.0
for _, c := range append(res.Kept, res.Dropped...) {
if strings.Contains(c.DocID, "Connector") {
connectorRel = c.Relevance
}
if strings.Contains(c.DocID, "Registry") {
registryRel = c.Relevance
}
}
if connectorRel <= registryRel {
t.Errorf("Connector should outrank Registry: connector=%.2f registry=%.2f", connectorRel, registryRel)
}
}
func contains(haystack []string, needle string) bool {
for _, h := range haystack {
if h == needle {
return true
}
}
return false
}

View File

@ -1,551 +0,0 @@
// Package matrix is the multi-corpus retrieval layer above vectord.
// Per docs/SPEC.md §3.4: the matrix indexer composes N single-corpus
// vectord indexes into one retrieve+merge surface, with corpus
// attribution preserved per result. Future work in the same package:
// relevance filter, strong-model downgrade gate, learning-loop
// integration. This file is component 2 of the dependency-ordered
// port plan — multi-corpus retrieve+merge, no filter yet.
//
// Why corpus-as-shard rather than hash-shard a single index:
// different corpora have distinct topology and distinct retrieval
// intent (workers vs candidates vs scrum_findings vs lakehouse_arch).
// Multi-corpus search merges across them by distance — that IS the
// matrix indexer's whole purpose. See feedback_meta_index_vision.md
// and project_small_model_pipeline_vision.md.
package matrix
import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"sort"
"sync"
"time"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord"
)
// Result is one merged hit with corpus attribution. The corpus field
// is load-bearing — losing it would defeat the matrix's purpose
// (knowing WHICH corpus contributed each hit is half the signal).
type Result struct {
ID string `json:"id"`
Distance float32 `json:"distance"`
Corpus string `json:"corpus"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
// SearchRequest is the matrix search input. Either QueryText (matrix
// embeds it via embedd) or QueryVector (already embedded by caller)
// must be set; QueryVector takes precedence if both supplied.
//
// Playbook fields (component 5 — learning loop):
// UsePlaybook=true: after normal retrieve+merge, fetch top similar
// past queries from PlaybookCorpus and apply distance boost to
// any current results that match a recorded answer.
// PlaybookCorpus: index name; empty = DefaultPlaybookCorpus.
// PlaybookTopK: number of similar past queries to consider; 0 =
// DefaultPlaybookTopK.
// PlaybookMaxDistance: cosine ceiling for "similar enough"; 0 =
// DefaultPlaybookMaxDistance.
//
// Metadata filter (post-retrieval structured gate):
// MetadataFilter: map of metadata-field → expected value. Results
// whose metadata doesn't match every key are dropped. Addresses
// the reality-test gap surfaced in the candidates/workers
// experiments — pure semantic retrieval can't gate by status,
// state, etc. Caller can compensate for filter shrinkage by
// requesting larger PerCorpusK.
// Each filter value can be a single value (string|number|bool —
// whatever JSON unmarshals to `any`) or a []any meaning "any
// of these values" (OR semantics within one key, AND across keys).
type SearchRequest struct {
QueryText string `json:"query_text,omitempty"`
QueryVector []float32 `json:"query_vector,omitempty"`
Corpora []string `json:"corpora"`
K int `json:"k"`
PerCorpusK int `json:"per_corpus_k,omitempty"`
Model string `json:"model,omitempty"`
UsePlaybook bool `json:"use_playbook,omitempty"`
PlaybookCorpus string `json:"playbook_corpus,omitempty"`
PlaybookTopK int `json:"playbook_top_k,omitempty"`
PlaybookMaxDistance float64 `json:"playbook_max_distance,omitempty"`
MetadataFilter map[string]any `json:"metadata_filter,omitempty"`
}
// SearchResponse wraps the merged results plus per-corpus return
// counts so callers can detect "this corpus returned nothing"
// without re-querying. PlaybookBoosted is the count of results that
// received a boost from playbook memory; useful for telemetry on
// "how much the learning loop influenced this query."
// MetadataFilterDropped is the count of results dropped by the
// post-retrieval structured filter (when set in the request).
type SearchResponse struct {
Results []Result `json:"results"`
PerCorpusCounts map[string]int `json:"per_corpus_counts"`
PlaybookBoosted int `json:"playbook_boosted,omitempty"`
MetadataFilterDropped int `json:"metadata_filter_dropped,omitempty"`
}
// Retriever holds the HTTP clients to embedd and vectord. Stateless
// otherwise — safe to share across goroutines.
type Retriever struct {
httpClient *http.Client
embeddURL string
vectordURL string
}
// New returns a Retriever configured to call embedd at embeddURL
// and vectord at vectordURL (both gateway-internal upstreams,
// usually 127.0.0.1:3216 and :3215 respectively).
func New(embeddURL, vectordURL string) *Retriever {
return &Retriever{
httpClient: &http.Client{Timeout: 30 * time.Second},
embeddURL: embeddURL,
vectordURL: vectordURL,
}
}
// Errors surfaced to HTTP handlers.
var (
ErrEmptyCorpora = errors.New("matrix: corpora must be non-empty")
ErrEmptyQuery = errors.New("matrix: query_text or query_vector required")
ErrCorpus = errors.New("matrix: corpus search failed") // wraps vectord errors
ErrEmbed = errors.New("matrix: embed failed")
ErrCorpusNotFound = errors.New("matrix: corpus not found") // distinct sentinel for vectord 404
)
// Search runs the matrix retrieve+merge.
//
// Error policy: fail-loud on any corpus error. Silent partial results
// would lie about what was actually searched, which defeats the
// indexer's coverage guarantee. Callers that want best-effort can
// catch the error and re-issue with a smaller corpora list.
func (r *Retriever) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
if len(req.Corpora) == 0 {
return nil, ErrEmptyCorpora
}
if req.K <= 0 {
return nil, errors.New("matrix: k must be > 0")
}
if req.PerCorpusK <= 0 {
req.PerCorpusK = req.K
}
// Resolve query → vector.
qvec := req.QueryVector
if len(qvec) == 0 {
if req.QueryText == "" {
return nil, ErrEmptyQuery
}
v, err := r.embed(ctx, req.QueryText, req.Model)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrEmbed, err)
}
qvec = v
}
// Parallel search across corpora. Each shard is independent;
// fan-out + collect with WaitGroup is cleaner than channels-only.
type shardResult struct {
corpus string
hits []vectord.Result
err error
}
results := make([]shardResult, len(req.Corpora))
var wg sync.WaitGroup
for i, c := range req.Corpora {
wg.Add(1)
go func(i int, corpus string) {
defer wg.Done()
hits, err := r.searchCorpus(ctx, corpus, qvec, req.PerCorpusK)
results[i] = shardResult{corpus: corpus, hits: hits, err: err}
}(i, c)
}
wg.Wait()
var allHits []Result
perCorpus := make(map[string]int, len(req.Corpora))
for _, s := range results {
if s.err != nil {
return nil, fmt.Errorf("%w: %s: %v", ErrCorpus, s.corpus, s.err)
}
perCorpus[s.corpus] = len(s.hits)
for _, h := range s.hits {
allHits = append(allHits, Result{
ID: h.ID, Distance: h.Distance, Corpus: s.corpus, Metadata: h.Metadata,
})
}
}
// Stable sort so equal-distance ties keep input order (which is
// per-corpus order from vectord's HNSW result heap). This matters
// for deterministic test assertions.
sort.SliceStable(allHits, func(i, j int) bool {
return allHits[i].Distance < allHits[j].Distance
})
// Metadata filter (component B — staffing-side structured gate).
// Applied BEFORE top-K truncation so the filter doesn't accidentally
// reduce coverage further. Caller can request larger PerCorpusK to
// compensate when filters are aggressive.
var dropped int
if len(req.MetadataFilter) > 0 {
filtered := make([]Result, 0, len(allHits))
for _, h := range allHits {
if matchesMetadataFilter(h.Metadata, req.MetadataFilter) {
filtered = append(filtered, h)
} else {
dropped++
}
}
allHits = filtered
}
if len(allHits) > req.K {
allHits = allHits[:req.K]
}
resp := &SearchResponse{
Results: allHits,
PerCorpusCounts: perCorpus,
MetadataFilterDropped: dropped,
}
// Playbook boost (component 5). Reuses the query vector — no
// extra embed call. If the playbook corpus doesn't exist (first
// search before any Record), the lookup gracefully no-ops.
if req.UsePlaybook {
hits, err := r.fetchPlaybookHits(ctx, qvec, req)
if err != nil {
// Don't fail the whole search on playbook errors — the
// boost is opportunistic. Log + continue.
slog.Warn("matrix: playbook lookup failed; skipping boost", "err", err)
} else if len(hits) > 0 {
resp.PlaybookBoosted = ApplyPlaybookBoost(resp.Results, hits)
}
}
return resp, nil
}
// fetchPlaybookHits queries the playbook corpus with the same query
// vector and returns hits whose decoded entries are within
// PlaybookMaxDistance. A missing playbook corpus returns nil + nil
// (legitimate no-op state for a system before any Record call).
func (r *Retriever) fetchPlaybookHits(ctx context.Context, qvec []float32, req SearchRequest) ([]PlaybookHit, error) {
corpus := req.PlaybookCorpus
if corpus == "" {
corpus = DefaultPlaybookCorpus
}
topK := req.PlaybookTopK
if topK <= 0 {
topK = DefaultPlaybookTopK
}
maxDist := req.PlaybookMaxDistance
if maxDist <= 0 {
maxDist = DefaultPlaybookMaxDistance
}
rawHits, err := r.searchCorpus(ctx, corpus, qvec, topK)
if errors.Is(err, ErrCorpusNotFound) {
// Cold-start state: no Record call has happened yet, so the
// playbook corpus doesn't exist. Legit no-op, not an error.
return nil, nil
}
if err != nil {
return nil, err
}
out := make([]PlaybookHit, 0, len(rawHits))
for _, h := range rawHits {
if float64(h.Distance) > maxDist {
continue
}
entry, err := UnmarshalPlaybookMetadata(h.Metadata)
if err != nil {
slog.Warn("matrix: skip malformed playbook entry", "id", h.ID, "err", err)
continue
}
out = append(out, PlaybookHit{
PlaybookID: h.ID,
Distance: h.Distance,
Entry: entry,
})
}
return out, nil
}
// Record stores a (query → answer_id) playbook entry in the
// playbook corpus. Embeds the query via embedd, ensures the corpus
// exists (idempotent create), and writes the entry as one vectord
// item with the entry's JSON in metadata.
//
// Uses a deterministic ID derived from (query_text, answer_id,
// answer_corpus) so re-recording the same triple upserts (last
// score wins). Callers wanting to accumulate distinct samples can
// vary one of the three.
//
// corpus="" defaults to DefaultPlaybookCorpus.
func (r *Retriever) Record(ctx context.Context, entry PlaybookEntry, corpus string) (string, error) {
if err := entry.Validate(); err != nil {
return "", err
}
if corpus == "" {
corpus = DefaultPlaybookCorpus
}
qvec, err := r.embed(ctx, entry.QueryText, "")
if err != nil {
return "", fmt.Errorf("playbook record embed: %w", err)
}
if err := r.ensureCorpus(ctx, corpus, len(qvec)); err != nil {
return "", fmt.Errorf("playbook ensure corpus: %w", err)
}
if entry.RecordedAtNs == 0 {
entry.RecordedAtNs = time.Now().UnixNano()
}
pbID := playbookID(entry.QueryText, entry.AnswerID, entry.AnswerCorpus)
meta, err := entry.MarshalMetadata()
if err != nil {
return "", err
}
if err := r.addItem(ctx, corpus, pbID, qvec, meta); err != nil {
return "", fmt.Errorf("playbook add: %w", err)
}
return pbID, nil
}
// playbookID is sha256-truncated 8 bytes (16 hex chars) prefixed
// with "pb-". Deterministic on (query, answer_id, answer_corpus).
func playbookID(query, answerID, answerCorpus string) string {
h := sha256.Sum256([]byte(query + "|" + answerID + "|" + answerCorpus))
return "pb-" + hex.EncodeToString(h[:8])
}
// ensureCorpus creates a vectord index if it doesn't exist.
// 201 = created; 409 = already exists; both fine for idempotent use.
func (r *Retriever) ensureCorpus(ctx context.Context, name string, dim int) error {
body, err := json.Marshal(map[string]any{
"name": name, "dimension": dim, "distance": "cosine",
})
if err != nil {
return err
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost,
r.vectordURL+"/vectors/index", bytes.NewReader(body))
if err != nil {
return err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return err
}
defer resp.Body.Close()
io.Copy(io.Discard, resp.Body)
if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict {
return nil
}
return fmt.Errorf("ensure %q: status %d", name, resp.StatusCode)
}
// addItem POSTs a single-item batch to /vectors/index/{name}/add.
func (r *Retriever) addItem(ctx context.Context, corpus, id string, vec []float32, meta json.RawMessage) error {
body, err := json.Marshal(map[string]any{
"items": []map[string]any{
{"id": id, "vector": vec, "metadata": meta},
},
})
if err != nil {
return err
}
url := r.vectordURL + "/vectors/index/" + corpus + "/add"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return fmt.Errorf("add %q: status %d: %s", corpus, resp.StatusCode, b)
}
return nil
}
// matchesMetadataFilter reports whether a result's metadata satisfies
// the filter. Each filter key must be present in the metadata; the
// value must equal (or for a list filter, contain) the metadata
// value. Missing key = drop. Type mismatches are JSON-equality
// checked (e.g. filter wants 1 but metadata has 1.0 → match via
// canonical JSON form).
//
// Filter value semantics:
// string|number|bool → exact equality (after JSON normalization)
// []any → OR within key (any element matching wins)
//
// AND across keys: every filter key must match.
func matchesMetadataFilter(rawMeta json.RawMessage, filter map[string]any) bool {
if len(filter) == 0 {
return true
}
if len(rawMeta) == 0 {
return false // no metadata can't satisfy any filter
}
var meta map[string]any
if err := json.Unmarshal(rawMeta, &meta); err != nil {
return false
}
for k, expected := range filter {
got, present := meta[k]
if !present {
return false
}
if !valueMatches(got, expected) {
return false
}
}
return true
}
// valueMatches handles single-value and list-value filter semantics.
// JSON-canonical equality so 1 ≡ 1.0 and "true" != true.
func valueMatches(got, expected any) bool {
if list, ok := expected.([]any); ok {
for _, e := range list {
if jsonEqual(got, e) {
return true
}
}
return false
}
return jsonEqual(got, expected)
}
// jsonEqual marshals both sides and compares the canonical forms.
// Handles the float64-vs-int problem inherent to encoding/json
// (which decodes all numbers as float64) — both sides go through
// the same encoder so 1 == 1.0 if both came in as numbers.
func jsonEqual(a, b any) bool {
ab, errA := json.Marshal(a)
bb, errB := json.Marshal(b)
if errA != nil || errB != nil {
return false
}
return string(ab) == string(bb)
}
// Corpora returns the list of vectord index names. Thin proxy to
// GET /vectors/index — exposed at the matrix layer so callers don't
// need direct vectord access.
func (r *Retriever) Corpora(ctx context.Context) ([]string, error) {
url := r.vectordURL + "/vectors/index"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("vectord index list: status %d: %s", resp.StatusCode, b)
}
var out struct {
Names []string `json:"names"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
return out.Names, nil
}
// embed POSTs a single-text /embed call. Reuses embedd's batched
// /embed shape with len(texts)==1; embedd's LRU cache absorbs
// repeat queries (commit 56844c3).
func (r *Retriever) embed(ctx context.Context, text, model string) ([]float32, error) {
body, err := json.Marshal(map[string]any{"texts": []string{text}, "model": model})
if err != nil {
return nil, err
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.embeddURL+"/embed", bytes.NewReader(body))
if err != nil {
return nil, err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, b)
}
var out struct {
Vectors [][]float32 `json:"vectors"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
if len(out.Vectors) == 0 {
return nil, errors.New("embed returned no vectors")
}
return out.Vectors[0], nil
}
// searchCorpus calls vectord /vectors/index/{name}/search. Returns
// ErrCorpusNotFound (wrapped) on HTTP 404 so callers can distinguish
// "this corpus doesn't exist" from "this corpus errored." Per
// 2026-04-29 cross-lineage scrum (Opus + Kimi convergent): caught
// the original strings.Contains "status 404" detection that would
// silently break if the error format changed.
func (r *Retriever) searchCorpus(ctx context.Context, corpus string, vec []float32, k int) ([]vectord.Result, error) {
body, err := json.Marshal(map[string]any{"vector": vec, "k": k})
if err != nil {
return nil, err
}
url := r.vectordURL + "/vectors/index/" + corpus + "/search"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := r.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return nil, fmt.Errorf("%w: %s", ErrCorpusNotFound, corpus)
}
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("status %d: %s", resp.StatusCode, b)
}
var out struct {
Results []vectord.Result `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
return out.Results, nil
}

View File

@ -1,249 +0,0 @@
package observer
// Store: in-memory ring buffer + optional JSONL persistor. Same
// shape as internal/pathway's persistor (afbb506) — opens the file
// per Append rather than holding an fd, which is fine at the
// observer's expected write rate (≤ a few hundred ops/min) and
// keeps the substrate restartable mid-stream.
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log/slog"
"os"
"path/filepath"
"sync"
)
// DefaultRingCap is the in-memory ring buffer cap. Mirrors the Rust
// Phase 24 limit of 2000 (recordExternalOp shifts the head when
// length > 2000).
const DefaultRingCap = 2000
// DefaultRecentScenariosCap is how many recent source=scenario ops
// the Stats endpoint returns. Matches the TS hard-coded slice(-10).
const DefaultRecentScenariosCap = 10
// Store holds the ring buffer + the optional persistor. Thread-safe
// via a single RWMutex (read-heavy via Stats; writes via Record).
type Store struct {
mu sync.RWMutex
ring []ObservedOp
cap int
persistor *Persistor
}
// NewStore returns an empty Store. Pass nil persistor for in-memory
// only (unit tests, ephemeral runs); pass a real Persistor to enable
// jsonl-append-on-record.
func NewStore(persistor *Persistor) *Store {
return &Store{
ring: make([]ObservedOp, 0, DefaultRingCap),
cap: DefaultRingCap,
persistor: persistor,
}
}
// Record validates + persists + appends. Order matters: persist
// first so a crash mid-record doesn't leave the ring ahead of the
// log. Returns ErrInvalidOp on validation failure (no persist, no
// append).
func (s *Store) Record(op ObservedOp) error {
op.EnsureTimestamp()
op.DefaultSource()
if err := op.Validate(); err != nil {
return err
}
if s.persistor != nil {
if err := s.persistor.Append(op); err != nil {
// Best-effort persistence — log but don't fail the
// in-memory record. Mirrors the Rust catch{} in
// persistOp; the ring buffer is the source of truth in
// flight.
slog.Warn("observer: persist failed", "err", err)
}
}
s.mu.Lock()
defer s.mu.Unlock()
s.ring = append(s.ring, op)
if len(s.ring) > s.cap {
// Shift left by one (drop oldest). Avoids unbounded growth
// without a per-write reallocation.
copy(s.ring, s.ring[1:])
s.ring = s.ring[:len(s.ring)-1]
}
return nil
}
// Recent returns a copy of the ring buffer's current state. Most
// recent entries are at the end (append-order).
func (s *Store) Recent() []ObservedOp {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]ObservedOp, len(s.ring))
copy(out, s.ring)
return out
}
// Stats aggregates the ring buffer. Mirrors the Rust /stats
// response shape exactly.
func (s *Store) Stats() Stats {
s.mu.RLock()
defer s.mu.RUnlock()
stats := Stats{
Total: len(s.ring),
BySource: make(map[string]int),
}
for _, op := range s.ring {
if op.Success {
stats.Successes++
} else {
stats.Failures++
}
src := string(op.Source)
if src == "" {
src = string(SourceMCP)
}
stats.BySource[src]++
}
// Last N scenario ops (most-recent-first → match Rust slice(-10)).
scenarios := make([]ScenarioOpDigest, 0, DefaultRecentScenariosCap)
for i := len(s.ring) - 1; i >= 0 && len(scenarios) < DefaultRecentScenariosCap; i-- {
op := s.ring[i]
if op.Source != SourceScenario {
continue
}
scenarios = append([]ScenarioOpDigest{{
TS: op.Timestamp,
OK: op.Success,
Staffer: op.StafferID,
Kind: op.EventKind,
Role: op.Role,
}}, scenarios...)
}
stats.RecentScenarios = scenarios
return stats
}
// Load replays the persistor's JSONL log into the ring buffer.
// Resets the ring (current state is discarded) — same semantics as
// pathway.Store.Load. Corruption-tolerant: malformed lines log
// warnings and the load proceeds.
//
// Returns the number of ops successfully replayed.
func (s *Store) Load() (int, error) {
if s.persistor == nil {
return 0, nil
}
s.mu.Lock()
defer s.mu.Unlock()
s.ring = s.ring[:0]
return s.persistor.Replay(func(op ObservedOp) error {
s.ring = append(s.ring, op)
if len(s.ring) > s.cap {
copy(s.ring, s.ring[1:])
s.ring = s.ring[:len(s.ring)-1]
}
return nil
})
}
// ─── Persistor ──────────────────────────────────────────────────
// Persistor wraps a single JSONL file. Open-per-append — same
// pattern as internal/pathway. Each line is one ObservedOp.
type Persistor struct {
path string
}
// NewPersistor returns a Persistor for the given file path. Parent
// directory is created on demand. Empty path is invalid (caller
// passes nil to NewStore for the no-persist case).
func NewPersistor(path string) (*Persistor, error) {
if path == "" {
return nil, errors.New("observer: persistor path is empty")
}
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("observer: create dir: %w", err)
}
return &Persistor{path: path}, nil
}
// Path returns the file path the persistor writes to.
func (p *Persistor) Path() string { return p.path }
// Append writes one ObservedOp as a JSONL line.
func (p *Persistor) Append(op ObservedOp) error {
line, err := json.Marshal(op)
if err != nil {
return fmt.Errorf("observer: marshal op: %w", err)
}
f, err := os.OpenFile(p.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("observer: open log: %w", err)
}
defer f.Close()
if _, err := f.Write(line); err != nil {
return fmt.Errorf("observer: write op: %w", err)
}
if _, err := f.Write([]byte{'\n'}); err != nil {
return fmt.Errorf("observer: write newline: %w", err)
}
return nil
}
// Replay reads the log line-by-line and invokes apply for each op.
// Returns the count successfully applied. Missing file = 0 + nil
// (legitimate cold-start state). Malformed lines log a warning and
// the replay continues.
func (p *Persistor) Replay(apply func(ObservedOp) error) (int, error) {
f, err := os.Open(p.path)
if errors.Is(err, fs.ErrNotExist) {
return 0, nil
}
if err != nil {
return 0, fmt.Errorf("observer: open log: %w", err)
}
defer f.Close()
scanner := bufio.NewScanner(f)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1<<20) // 1 MiB per line cap
applied, skipped, lineNo := 0, 0, 0
for scanner.Scan() {
lineNo++
raw := scanner.Bytes()
if len(raw) == 0 {
continue
}
var op ObservedOp
if err := json.Unmarshal(raw, &op); err != nil {
slog.Warn("observer: replay skipped malformed line",
"path", p.path, "line", lineNo, "err", err.Error())
skipped++
continue
}
if err := apply(op); err != nil {
slog.Warn("observer: replay apply failed",
"path", p.path, "line", lineNo, "err", err.Error())
skipped++
continue
}
applied++
}
if err := scanner.Err(); err != nil {
return applied, fmt.Errorf("observer: scan log: %w", err)
}
if skipped > 0 {
slog.Info("observer: replay completed with skips",
"path", p.path, "applied", applied, "skipped", skipped)
}
return applied, nil
}

View File

@ -1,193 +0,0 @@
package observer
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func mkOp(success bool, source Source) ObservedOp {
return ObservedOp{
Timestamp: time.Now().UTC().Format(time.RFC3339),
Endpoint: "/v1/test",
InputSummary: "test op",
Success: success,
DurationMs: 42,
OutputSummary: "ok",
Source: source,
}
}
func TestRecord_RequiresEndpointAndTimestamp(t *testing.T) {
s := NewStore(nil)
bad := ObservedOp{Endpoint: ""} // EnsureTimestamp will fill, but Endpoint empty stays
if err := s.Record(bad); err == nil {
t.Error("expected error on empty endpoint")
}
good := mkOp(true, SourceMCP)
if err := s.Record(good); err != nil {
t.Errorf("good op: %v", err)
}
}
func TestRecord_DefaultsTimestampAndSource(t *testing.T) {
s := NewStore(nil)
op := ObservedOp{
Endpoint: "/x",
InputSummary: "no ts no source",
Success: true,
}
if err := s.Record(op); err != nil {
t.Fatal(err)
}
stored := s.Recent()[0]
if stored.Timestamp == "" {
t.Error("Timestamp should be defaulted")
}
if stored.Source != SourceMCP {
t.Errorf("Source: want %q, got %q", SourceMCP, stored.Source)
}
}
func TestStats_Aggregates(t *testing.T) {
s := NewStore(nil)
for i := 0; i < 5; i++ {
_ = s.Record(mkOp(true, SourceMCP))
}
for i := 0; i < 3; i++ {
_ = s.Record(mkOp(false, SourceScenario))
}
for i := 0; i < 2; i++ {
_ = s.Record(mkOp(true, SourceLangfuse))
}
st := s.Stats()
if st.Total != 10 {
t.Errorf("total: want 10, got %d", st.Total)
}
if st.Successes != 7 {
t.Errorf("successes: want 7, got %d", st.Successes)
}
if st.Failures != 3 {
t.Errorf("failures: want 3, got %d", st.Failures)
}
if st.BySource["mcp"] != 5 || st.BySource["scenario"] != 3 || st.BySource["langfuse"] != 2 {
t.Errorf("by_source mismatch: %+v", st.BySource)
}
if len(st.RecentScenarios) != 3 {
t.Errorf("recent scenarios: want 3, got %d", len(st.RecentScenarios))
}
}
func TestStats_RecentScenariosCappedAndOrdered(t *testing.T) {
s := NewStore(nil)
// Record 15 scenario ops; only the last 10 should appear.
for i := 0; i < 15; i++ {
op := mkOp(true, SourceScenario)
op.StafferID = "staffer-" + string(rune('a'+i))
_ = s.Record(op)
time.Sleep(time.Millisecond) // ensure timestamps order-distinguishable
}
st := s.Stats()
if len(st.RecentScenarios) != DefaultRecentScenariosCap {
t.Errorf("cap: want %d, got %d", DefaultRecentScenariosCap, len(st.RecentScenarios))
}
// Last entry should be the most recently added (staffer-o, the 15th).
last := st.RecentScenarios[len(st.RecentScenarios)-1]
if last.Staffer != "staffer-o" {
t.Errorf("most recent: want staffer-o, got %q", last.Staffer)
}
}
func TestRingBuffer_BoundedByDefaultCap(t *testing.T) {
s := NewStore(nil)
s.cap = 5 // shrink for testability
for i := 0; i < 12; i++ {
op := mkOp(true, SourceMCP)
op.InputSummary = string(rune('a' + i))
_ = s.Record(op)
}
r := s.Recent()
if len(r) != 5 {
t.Errorf("ring size: want 5, got %d", len(r))
}
// Oldest 7 dropped; first remaining should have InputSummary "h" (8th).
if r[0].InputSummary != "h" {
t.Errorf("oldest after rollover: want 'h', got %q", r[0].InputSummary)
}
}
func TestPersistor_RoundTrip(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "ops.jsonl")
p, err := NewPersistor(path)
if err != nil {
t.Fatal(err)
}
s := NewStore(p)
for i := 0; i < 4; i++ {
op := mkOp(i%2 == 0, SourceMCP)
op.InputSummary = string(rune('a' + i))
if err := s.Record(op); err != nil {
t.Fatal(err)
}
}
// Sanity: file has 4 lines.
bs, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
lines := strings.Split(strings.TrimSuffix(string(bs), "\n"), "\n")
if len(lines) != 4 {
t.Errorf("file lines: want 4, got %d", len(lines))
}
// Rehydrate into a fresh Store.
s2 := NewStore(p)
n, err := s2.Load()
if err != nil {
t.Fatal(err)
}
if n != 4 {
t.Errorf("loaded: want 4, got %d", n)
}
r := s2.Recent()
if len(r) != 4 {
t.Errorf("rehydrated ring: want 4, got %d", len(r))
}
// Order preserved.
for i, want := range []string{"a", "b", "c", "d"} {
if r[i].InputSummary != want {
t.Errorf("op %d: want %q, got %q", i, want, r[i].InputSummary)
}
}
}
func TestPersistor_CorruptionTolerant(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "ops.jsonl")
// Pre-seed with one valid + one corrupt + one valid line.
valid1 := `{"timestamp":"2026-04-29T12:00:00Z","endpoint":"/x","input_summary":"a","success":true,"duration_ms":1,"output_summary":"ok","source":"mcp"}`
corrupt := `{this is not json`
valid2 := `{"timestamp":"2026-04-29T12:00:01Z","endpoint":"/y","input_summary":"b","success":false,"duration_ms":2,"output_summary":"err","source":"scenario"}`
if err := os.WriteFile(path, []byte(valid1+"\n"+corrupt+"\n"+valid2+"\n"), 0o644); err != nil {
t.Fatal(err)
}
p, err := NewPersistor(path)
if err != nil {
t.Fatal(err)
}
s := NewStore(p)
n, err := s.Load()
if err != nil {
t.Fatal(err)
}
if n != 2 {
t.Errorf("applied: want 2 (valid pair), got %d (corrupt should skip)", n)
}
}

View File

@ -1,131 +0,0 @@
// Package observer is the Go port of mcp-server/observer.ts (Rust
// system, 852 lines TS) — the "third-party witness" loop that records
// every observed operation, surfaces failures, and feeds learnings
// back into the substrate.
//
// What this package owns (this commit):
// - ObservedOp data model + ring buffer + JSONL persistence
// - Stats aggregation (total / successes / failures / by_source)
// - Source taxonomy (mcp / scenario / langfuse / overseer_correction)
//
// What's deferred to follow-up commits:
// - /review endpoint with cloud-LLM hand-review (the heuristic
// plus qwen3-coder fall-back path)
// - tailOverseerCorrections (background loop reading
// overseer_corrections.jsonl)
// - analyzeErrors / consolidatePlaybooks periodic loops
// - escalateFailureClusterToLLMTeam (failure clustering trigger)
//
// /relevance was already ported in 9588bd8 (component 3 of SPEC §3.4)
// and lives in internal/matrix/relevance.go; the observer package
// doesn't re-implement it.
package observer
import (
"errors"
"time"
)
// Source is the provenance of an observed op. Empty string defaults
// to SourceMCP for back-compat with Phase 24 callers.
type Source string
const (
SourceMCP Source = "mcp"
SourceScenario Source = "scenario"
SourceLangfuse Source = "langfuse"
SourceOverseerCorrection Source = "overseer_correction"
)
// ObservedOp is one entry in the observer's ring buffer (and JSONL
// log when persistence is configured). Mirrors the Rust ObservedOp
// shape exactly so the on-wire JSON round-trips between the two
// implementations during the Rust→Go cutover.
//
// Optional fields use omitempty so absent values don't bloat the
// JSONL file. Numeric zero values are intentionally treated as
// "not set" by the JSON layer; if a real zero needs to be
// persisted, future schema-version bump can switch to pointers.
type ObservedOp struct {
Timestamp string `json:"timestamp"` // ISO 8601
Endpoint string `json:"endpoint"`
InputSummary string `json:"input_summary"`
Success bool `json:"success"`
DurationMs int64 `json:"duration_ms"`
OutputSummary string `json:"output_summary"`
Error string `json:"error,omitempty"`
Source Source `json:"source,omitempty"`
StafferID string `json:"staffer_id,omitempty"`
SigHash string `json:"sig_hash,omitempty"`
EventKind string `json:"event_kind,omitempty"`
Role string `json:"role,omitempty"`
City string `json:"city,omitempty"`
State string `json:"state,omitempty"`
Count int `json:"count,omitempty"`
RescueAttempted bool `json:"rescue_attempted,omitempty"`
RescueSucceeded bool `json:"rescue_succeeded,omitempty"`
TaskClass string `json:"task_class,omitempty"`
Correction string `json:"correction,omitempty"`
AppliedAtTurn int `json:"applied_at_turn,omitempty"`
}
// Stats is the aggregated view of the ring buffer — useful for
// dashboards and the GET /stats endpoint. RecentScenarios holds the
// most recent N source=scenario ops (default cap 10) so operators
// can see what the staffing scenarios are emitting at a glance.
type Stats struct {
Total int `json:"total"`
Successes int `json:"successes"`
Failures int `json:"failures"`
BySource map[string]int `json:"by_source"`
RecentScenarios []ScenarioOpDigest `json:"recent_scenario_ops"`
}
// ScenarioOpDigest is the slim per-op shape returned in
// Stats.RecentScenarios — matches the TS digest exactly:
// {ts, ok, staffer, kind, role}.
type ScenarioOpDigest struct {
TS string `json:"ts"`
OK bool `json:"ok"`
Staffer string `json:"staffer"`
Kind string `json:"kind"`
Role string `json:"role"`
}
// Errors surfaced to HTTP handlers.
var (
ErrInvalidOp = errors.New("observer: invalid op (timestamp + endpoint required)")
)
// Validate returns an error if required fields are missing. Called
// by Record before the op is added to the ring buffer.
func (op ObservedOp) Validate() error {
if op.Timestamp == "" {
return ErrInvalidOp
}
if op.Endpoint == "" {
return ErrInvalidOp
}
return nil
}
// EnsureTimestamp populates Timestamp with the current UTC ISO 8601
// time if it's empty. Useful for HTTP handlers that take the body
// as authoritative but need to default the timestamp when absent.
func (op *ObservedOp) EnsureTimestamp() {
if op.Timestamp == "" {
op.Timestamp = time.Now().UTC().Format(time.RFC3339)
}
}
// DefaultSource sets Source to SourceMCP if empty. Mirrors the Rust
// `op.source ?? "mcp"` pattern in recordExternalOp.
func (op *ObservedOp) DefaultSource() {
if op.Source == "" {
op.Source = SourceMCP
}
}

View File

@ -1,130 +0,0 @@
// persistor.go — JSONL append-only persistence for pathway memory.
//
// Each event is one JSON line. Append is O(1) (open append, write,
// close — Go's *os.File default fsync policy is "rely on OS" which
// is fine here; correctness on power-loss is best-effort, not
// transactional). Replay reads the file once at startup.
//
// Corruption recovery: malformed lines log a warn (counted in
// Replay's return) but do not stop the load. Partial state is
// better than no state for an agent substrate.
//
// What's NOT here:
// - Compaction. JSONL grows linearly with mutations; below 100K
// traces this is fine. Compaction will land when needed and
// will emit a snapshot file + tail JSONL.
// - fsync per write. We rely on the OS's eventual fsync; trace
// loss on hard crash is acceptable for the substrate's
// "remember most things" guarantee.
package pathway
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log/slog"
"os"
"path/filepath"
)
// Persistor wraps a single JSONL file. Construct with NewPersistor;
// it does NOT load on construction — callers must call Store.Load()
// to replay.
type Persistor struct {
path string
}
// NewPersistor returns a persistor for the given file path. The
// parent directory is created on demand. The file is created lazily
// on first Append.
func NewPersistor(path string) (*Persistor, error) {
if path == "" {
return nil, errors.New("pathway: persistor path is empty")
}
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("pathway: create dir: %w", err)
}
return &Persistor{path: path}, nil
}
// Path returns the underlying file path. Useful for tests + logs.
func (p *Persistor) Path() string { return p.path }
// Append writes one event to the JSONL log. Each call opens the
// file in append mode, writes one line, and closes — simple but
// correct. A pooled persistent fd is a future optimization if
// profiling shows append-rate matters.
func (p *Persistor) Append(e event) error {
line, err := json.Marshal(e)
if err != nil {
return fmt.Errorf("pathway: marshal event: %w", err)
}
f, err := os.OpenFile(p.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("pathway: open log: %w", err)
}
defer f.Close()
if _, err := f.Write(line); err != nil {
return fmt.Errorf("pathway: write event: %w", err)
}
if _, err := f.Write([]byte{'\n'}); err != nil {
return fmt.Errorf("pathway: write newline: %w", err)
}
return nil
}
// Replay reads the log line-by-line and invokes apply for each
// event. Returns the count of events successfully applied. A
// missing file is NOT an error (means "no prior state"); a
// partially-corrupt file logs warns and continues.
func (p *Persistor) Replay(apply func(event) error) (int, error) {
f, err := os.Open(p.path)
if errors.Is(err, fs.ErrNotExist) {
return 0, nil
}
if err != nil {
return 0, fmt.Errorf("pathway: open log: %w", err)
}
defer f.Close()
scanner := bufio.NewScanner(f)
// Big buffer for unusually long content — 1 MiB per line cap.
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1<<20)
applied := 0
skipped := 0
lineNo := 0
for scanner.Scan() {
lineNo++
raw := scanner.Bytes()
if len(raw) == 0 {
continue
}
var e event
if err := json.Unmarshal(raw, &e); err != nil {
slog.Warn("pathway: replay skipped malformed line",
"path", p.path, "line", lineNo, "err", err.Error())
skipped++
continue
}
if err := apply(e); err != nil {
slog.Warn("pathway: replay event apply failed",
"path", p.path, "line", lineNo, "op", e.Op, "err", err.Error())
skipped++
continue
}
applied++
}
if err := scanner.Err(); err != nil {
return applied, fmt.Errorf("pathway: scan log: %w", err)
}
if skipped > 0 {
slog.Info("pathway: replay completed with skips",
"path", p.path, "applied", applied, "skipped", skipped)
}
return applied, nil
}

View File

@ -1,184 +0,0 @@
package pathway
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"strings"
"testing"
)
// persistor_test covers the corruption-recovery contract per
// Sprint 2 row 7: malformed JSONL lines must not halt replay.
func TestPersistor_MissingFileIsNotError(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "nonexistent.jsonl")
p, err := NewPersistor(path)
if err != nil {
t.Fatalf("NewPersistor on missing file should not error, got %v", err)
}
n, err := p.Replay(func(event) error { return nil })
if err != nil {
t.Errorf("Replay on missing file should be 0,nil; got %d, %v", n, err)
}
if n != 0 {
t.Errorf("Replay on missing file replayed %d events, want 0", n)
}
}
func TestPersistor_AppendThenReplay(t *testing.T) {
p := mustPersistor(t)
if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}}); err != nil {
t.Fatalf("Append: %v", err)
}
if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}}); err != nil {
t.Fatalf("Append: %v", err)
}
var seen []string
n, err := p.Replay(func(e event) error {
if e.Trace != nil {
seen = append(seen, e.Trace.UID)
}
return nil
})
if err != nil {
t.Fatalf("Replay: %v", err)
}
if n != 2 {
t.Errorf("Replay applied %d events, want 2", n)
}
if len(seen) != 2 || seen[0] != "A" || seen[1] != "B" {
t.Errorf("seen = %v, want [A B]", seen)
}
}
func TestPersistor_CorruptedLines_Skipped(t *testing.T) {
p := mustPersistor(t)
// Mix of valid and corrupted lines.
good1 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}})
bad := []byte(`{this is not json}`)
good2 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}})
emptyLine := []byte(``)
good3 := mustMarshal(t, event{Op: opAdd, Trace: &Trace{UID: "C", Content: json.RawMessage(`{}`)}})
contents := []byte{}
for _, line := range [][]byte{good1, bad, good2, emptyLine, good3} {
contents = append(contents, line...)
contents = append(contents, '\n')
}
if err := os.WriteFile(p.Path(), contents, 0o644); err != nil {
t.Fatalf("write file: %v", err)
}
var applied []string
n, err := p.Replay(func(e event) error {
if e.Trace != nil {
applied = append(applied, e.Trace.UID)
}
return nil
})
if err != nil {
t.Fatalf("Replay: %v", err)
}
// 3 valid + 1 bad + 1 empty (skipped silently) = 3 applied.
if n != 3 {
t.Errorf("Replay applied %d, want 3 (1 corrupt line skipped)", n)
}
if len(applied) != 3 || applied[0] != "A" || applied[1] != "B" || applied[2] != "C" {
t.Errorf("applied = %v, want [A B C]", applied)
}
}
func TestPersistor_ApplyError_Skipped(t *testing.T) {
// If the apply function returns error for an event, replay
// should keep going (the error is logged, not raised).
p := mustPersistor(t)
_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}})
_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "B", Content: json.RawMessage(`{}`)}})
_ = p.Append(event{Op: opAdd, Trace: &Trace{UID: "C", Content: json.RawMessage(`{}`)}})
count := 0
n, err := p.Replay(func(e event) error {
if e.Trace != nil && e.Trace.UID == "B" {
return errors.New("simulated apply error on B")
}
count++
return nil
})
if err != nil {
t.Fatalf("Replay: %v", err)
}
if n != 2 || count != 2 {
t.Errorf("Replay applied %d (callback called %d), want 2 each (B's error skipped)", n, count)
}
}
func TestPersistor_NewPersistor_EmptyPath_Errors(t *testing.T) {
_, err := NewPersistor("")
if err == nil {
t.Error("NewPersistor with empty path should error")
}
}
func TestPersistor_CreatesParentDir(t *testing.T) {
dir := t.TempDir()
nested := filepath.Join(dir, "nested", "deep", "pathway.jsonl")
p, err := NewPersistor(nested)
if err != nil {
t.Fatalf("NewPersistor: %v", err)
}
if err := p.Append(event{Op: opAdd, Trace: &Trace{UID: "A", Content: json.RawMessage(`{}`)}}); err != nil {
t.Fatalf("Append after creating nested dir: %v", err)
}
}
func TestPersistor_LongLine_HandlesUpTo1MiB(t *testing.T) {
p := mustPersistor(t)
// Build a content blob ~750 KiB so the JSON line is ~800 KiB
// (under the 1 MiB scanner cap).
blob := strings.Repeat("x", 750*1024)
bigContent, _ := json.Marshal(map[string]string{"data": blob})
tr := &Trace{UID: "BIG", Content: bigContent}
if err := p.Append(event{Op: opAdd, Trace: tr}); err != nil {
t.Fatalf("Append big trace: %v", err)
}
count := 0
n, _ := p.Replay(func(e event) error {
if e.Trace != nil && e.Trace.UID == "BIG" {
count++
}
return nil
})
if n != 1 || count != 1 {
t.Errorf("big-line replay: got %d events / %d matches, want 1 each", n, count)
}
}
// ── helpers ──
func mustPersistor(t *testing.T) *Persistor {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "test.jsonl")
p, err := NewPersistor(path)
if err != nil {
t.Fatalf("NewPersistor: %v", err)
}
return p
}
func mustMarshal(t *testing.T, e event) []byte {
t.Helper()
b, err := json.Marshal(e)
if err != nil {
t.Fatalf("marshal: %v", err)
}
return b
}

View File

@ -1,381 +0,0 @@
// store.go — the in-memory side of pathway memory. Persistence
// (load/append-on-mutate) is in persistor.go; the Store can be
// constructed without persistence for tests and ephemeral uses.
package pathway
import (
"bytes"
"encoding/json"
"errors"
"sync"
"time"
"github.com/google/uuid"
)
// Store is the in-memory pathway memory. Thread-safe via a single
// RWMutex (read-heavy workloads are the norm; mutations are
// individual operations not hot loops).
type Store struct {
mu sync.RWMutex
// traces[uid] → *Trace. Single map covers both retired and
// active traces; Search filters retired by default.
traces map[string]*Trace
// persistor is optional — nil = in-memory only (test mode
// and ephemeral G2 uses).
persistor *Persistor
// nowFn returns "the current time in nanoseconds" — overridden
// in tests for deterministic timestamps.
nowFn func() int64
// uidFn generates new UIDs — overridden in tests for
// deterministic UID sequences.
uidFn func() string
}
// NewStore builds an empty Store. Pass nil persistor for in-memory
// mode. The returned store is ready to receive operations; if
// persistor is non-nil, call Load(ctx) before issuing operations to
// rehydrate prior state.
func NewStore(persistor *Persistor) *Store {
return &Store{
traces: make(map[string]*Trace),
persistor: persistor,
nowFn: func() int64 { return time.Now().UnixNano() },
uidFn: func() string { return uuid.New().String() },
}
}
// Load replays the persistor's JSONL log and rebuilds in-memory
// state. Safe to call multiple times — each call resets the in-
// memory state to whatever the log says. Corruption (malformed
// lines, broken events) is logged-not-fatal: the load proceeds
// with the partial state it can recover.
//
// Returns the number of events successfully applied.
func (s *Store) Load() (int, error) {
if s.persistor == nil {
return 0, nil
}
s.mu.Lock()
defer s.mu.Unlock()
s.traces = make(map[string]*Trace) // reset
return s.persistor.Replay(func(e event) error {
return s.applyEventLocked(e)
})
}
// applyEventLocked is the single point where events update the
// in-memory map. Used by both Load (replaying log) and the
// mutating methods (after appending to the log). Caller MUST hold
// s.mu in write mode.
func (s *Store) applyEventLocked(e event) error {
switch e.Op {
case opAdd, opRevise:
if e.Trace == nil || e.Trace.UID == "" {
return ErrInvalidContent
}
// Add semantics: if UID already exists, this should have been
// a replay — but be permissive on Replay to handle older logs.
s.traces[e.Trace.UID] = e.Trace
return nil
case opUpdate:
t, ok := s.traces[e.UID]
if !ok {
return ErrNotFound
}
t.Content = e.Content
t.UpdatedAtNs = s.nowFn()
return nil
case opRetire:
t, ok := s.traces[e.UID]
if !ok {
return ErrNotFound
}
t.Retired = true
t.UpdatedAtNs = s.nowFn()
return nil
case opReplay:
t, ok := s.traces[e.UID]
if !ok {
return ErrNotFound
}
t.ReplayCount++
return nil
default:
return errors.New("pathway: unknown op")
}
}
// Add stores a new trace with a fresh UID and replay_count=1.
// Returns the stored trace (with UID + timestamps populated).
func (s *Store) Add(content json.RawMessage, tags ...string) (*Trace, error) {
if !json.Valid(content) {
return nil, ErrInvalidContent
}
s.mu.Lock()
defer s.mu.Unlock()
now := s.nowFn()
t := &Trace{
UID: s.uidFn(),
Content: content,
CreatedAtNs: now,
UpdatedAtNs: now,
ReplayCount: 1,
Tags: copyTags(tags),
}
if err := s.appendAndApplyLocked(event{Op: opAdd, Trace: t}); err != nil {
return nil, err
}
// Clone before returning so the caller can't mutate the in-memory
// trace through the returned pointer (matches Get's contract).
return cloneTrace(t), nil
}
// AddIdempotent stores a trace under the given UID, OR — if the
// UID already exists — increments its ReplayCount. Used by agent
// loops that want to record "I tried this same thing again."
func (s *Store) AddIdempotent(uid string, content json.RawMessage, tags ...string) (*Trace, error) {
if uid == "" {
return nil, ErrEmptyUID
}
if !json.Valid(content) {
return nil, ErrInvalidContent
}
s.mu.Lock()
defer s.mu.Unlock()
if existing, ok := s.traces[uid]; ok {
// Replay: increment count, persist as opReplay event.
if err := s.appendAndApplyLocked(event{Op: opReplay, UID: uid}); err != nil {
return nil, err
}
// Return a copy to avoid the caller mutating the in-memory
// trace through the returned pointer.
return cloneTrace(existing), nil
}
now := s.nowFn()
t := &Trace{
UID: uid,
Content: content,
CreatedAtNs: now,
UpdatedAtNs: now,
ReplayCount: 1,
Tags: copyTags(tags),
}
if err := s.appendAndApplyLocked(event{Op: opAdd, Trace: t}); err != nil {
return nil, err
}
return cloneTrace(t), nil
}
// Update replaces the content of an existing trace. Same UID, new
// content. NOT a revision — use Revise when the new content
// represents a change-of-belief that should preserve the old.
func (s *Store) Update(uid string, content json.RawMessage) error {
if uid == "" {
return ErrEmptyUID
}
if !json.Valid(content) {
return ErrInvalidContent
}
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.traces[uid]; !ok {
return ErrNotFound
}
return s.appendAndApplyLocked(event{Op: opUpdate, UID: uid, Content: content})
}
// Revise creates a new trace whose PredecessorUID points at an
// existing trace. Old trace stays accessible via Get and History.
// Returns the new trace.
func (s *Store) Revise(predecessorUID string, content json.RawMessage, tags ...string) (*Trace, error) {
if predecessorUID == "" {
return nil, ErrEmptyUID
}
if !json.Valid(content) {
return nil, ErrInvalidContent
}
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.traces[predecessorUID]; !ok {
return nil, ErrPredecessorMissing
}
now := s.nowFn()
t := &Trace{
UID: s.uidFn(),
Content: content,
PredecessorUID: predecessorUID,
CreatedAtNs: now,
UpdatedAtNs: now,
ReplayCount: 1,
Tags: copyTags(tags),
}
if err := s.appendAndApplyLocked(event{Op: opRevise, Trace: t}); err != nil {
return nil, err
}
return cloneTrace(t), nil
}
// Retire marks a trace as retired. Retired traces are excluded
// from Search by default but accessible via Get and History.
func (s *Store) Retire(uid string) error {
if uid == "" {
return ErrEmptyUID
}
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.traces[uid]; !ok {
return ErrNotFound
}
return s.appendAndApplyLocked(event{Op: opRetire, UID: uid})
}
// Get returns a copy of the trace with the given UID. Includes
// retired traces (caller decides what to do with them).
func (s *Store) Get(uid string) (*Trace, error) {
s.mu.RLock()
defer s.mu.RUnlock()
t, ok := s.traces[uid]
if !ok {
return nil, ErrNotFound
}
return cloneTrace(t), nil
}
// History returns the chain of traces from this UID backward
// through PredecessorUID links. Slot 0 is the queried trace; slot
// 1 is its predecessor; and so on. Cycle-safe: a UID that appears
// twice during the walk returns ErrCycle (only happens if the
// persistence file was hand-edited or there's a bug elsewhere).
func (s *Store) History(uid string) ([]*Trace, error) {
s.mu.RLock()
defer s.mu.RUnlock()
var chain []*Trace
visited := make(map[string]struct{})
cursor := uid
for cursor != "" {
if _, seen := visited[cursor]; seen {
return nil, ErrCycle
}
visited[cursor] = struct{}{}
t, ok := s.traces[cursor]
if !ok {
if len(chain) == 0 {
return nil, ErrNotFound
}
// Predecessor missing mid-chain — return what we have.
break
}
chain = append(chain, cloneTrace(t))
cursor = t.PredecessorUID
}
return chain, nil
}
// Search returns traces matching the filter. Excludes retired by
// default; pass IncludeRetired: true to include them. Returns a
// new slice of trace copies — caller can mutate freely.
func (s *Store) Search(filter SearchFilter) []*Trace {
s.mu.RLock()
defer s.mu.RUnlock()
var out []*Trace
for _, t := range s.traces {
if t.Retired && !filter.IncludeRetired {
continue
}
if filter.Tag != "" && !containsTag(t.Tags, filter.Tag) {
continue
}
if filter.ContentContains != "" &&
!bytes.Contains(t.Content, []byte(filter.ContentContains)) {
continue
}
if filter.CreatedAfterNs > 0 && t.CreatedAtNs < filter.CreatedAfterNs {
continue
}
if filter.CreatedBeforeNs > 0 && t.CreatedAtNs > filter.CreatedBeforeNs {
continue
}
out = append(out, cloneTrace(t))
}
return out
}
// Stats returns lifetime counters useful for /stats endpoints and
// operator dashboards.
type Stats struct {
Total int
Active int
Retired int
}
func (s *Store) Stats() Stats {
s.mu.RLock()
defer s.mu.RUnlock()
st := Stats{Total: len(s.traces)}
for _, t := range s.traces {
if t.Retired {
st.Retired++
} else {
st.Active++
}
}
return st
}
// appendAndApplyLocked is the single-point write path: persist the
// event first (so a crash mid-mutation doesn't leave in-memory
// state ahead of the log), then apply it in memory. Caller holds
// s.mu in write mode.
func (s *Store) appendAndApplyLocked(e event) error {
if s.persistor != nil {
if err := s.persistor.Append(e); err != nil {
return err
}
}
return s.applyEventLocked(e)
}
// cloneTrace returns a deep copy so callers can't mutate the
// in-memory trace through the returned pointer.
func cloneTrace(t *Trace) *Trace {
c := *t
if t.Content != nil {
c.Content = append(json.RawMessage(nil), t.Content...)
}
if t.Tags != nil {
c.Tags = append([]string(nil), t.Tags...)
}
return &c
}
func copyTags(in []string) []string {
if len(in) == 0 {
return nil
}
out := make([]string, len(in))
copy(out, in)
return out
}
func containsTag(tags []string, want string) bool {
for _, t := range tags {
if t == want {
return true
}
}
return false
}

View File

@ -1,398 +0,0 @@
package pathway
import (
"encoding/json"
"errors"
"path/filepath"
"strconv"
"strings"
"testing"
)
// Closes Sprint 2 design-bar work from the audit. Tests cover all 7
// claim rows from claim-coverage-table.md: ADD, UPDATE, REVISE,
// RETIRE, HISTORY chain cycle-safe, replay-count duplicate ADD,
// corrupted memory row recovery (corrupted_test.go).
// newTestStore returns an in-memory Store with deterministic UID +
// time generation for repeatable assertions.
func newTestStore(t *testing.T) *Store {
t.Helper()
s := NewStore(nil)
var counter int
var clock int64
s.uidFn = func() string {
counter++
return "uid-" + strconv.Itoa(counter)
}
s.nowFn = func() int64 {
clock++
return clock
}
return s
}
func newPersistedStore(t *testing.T) (*Store, string) {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "pathway.jsonl")
p, err := NewPersistor(path)
if err != nil {
t.Fatalf("NewPersistor: %v", err)
}
s := NewStore(p)
var counter int
var clock int64
s.uidFn = func() string {
counter++
return "uid-" + strconv.Itoa(counter)
}
s.nowFn = func() int64 {
clock++
return clock
}
return s, path
}
// ── Sprint 2 row 1: ADD a new pathway trace ────────────────────
func TestAdd_AssignsUIDAndTimestamps(t *testing.T) {
s := newTestStore(t)
tr, err := s.Add(json.RawMessage(`{"k":"v"}`), "tag-a")
if err != nil {
t.Fatalf("Add: %v", err)
}
if tr.UID != "uid-1" {
t.Errorf("UID = %q, want uid-1", tr.UID)
}
if tr.ReplayCount != 1 {
t.Errorf("ReplayCount = %d, want 1", tr.ReplayCount)
}
if tr.Retired {
t.Error("freshly-added trace should NOT be retired")
}
if tr.CreatedAtNs == 0 || tr.UpdatedAtNs == 0 {
t.Error("timestamps unset")
}
if len(tr.Tags) != 1 || tr.Tags[0] != "tag-a" {
t.Errorf("Tags = %v, want [tag-a]", tr.Tags)
}
}
func TestAdd_RejectsInvalidJSON(t *testing.T) {
s := newTestStore(t)
_, err := s.Add(json.RawMessage(`not json`))
if !errors.Is(err, ErrInvalidContent) {
t.Errorf("expected ErrInvalidContent, got %v", err)
}
}
// ── Sprint 2 row 2: UPDATE replaces existing trace by uid ──────
func TestUpdate_ReplacesContentSameUID(t *testing.T) {
s := newTestStore(t)
tr, _ := s.Add(json.RawMessage(`{"v":1}`))
if err := s.Update(tr.UID, json.RawMessage(`{"v":2}`)); err != nil {
t.Fatalf("Update: %v", err)
}
got, _ := s.Get(tr.UID)
if string(got.Content) != `{"v":2}` {
t.Errorf("content = %s, want updated", got.Content)
}
if got.UpdatedAtNs == tr.UpdatedAtNs {
t.Error("UpdatedAtNs should bump on Update")
}
}
func TestUpdate_MissingUID_Errors(t *testing.T) {
s := newTestStore(t)
err := s.Update("nonexistent", json.RawMessage(`{}`))
if !errors.Is(err, ErrNotFound) {
t.Errorf("expected ErrNotFound, got %v", err)
}
}
// ── Sprint 2 row 3: REVISE creates a new revision linked via history ──
func TestRevise_LinksToPredecessorViaHistory(t *testing.T) {
s := newTestStore(t)
root, _ := s.Add(json.RawMessage(`{"v":1}`))
rev, err := s.Revise(root.UID, json.RawMessage(`{"v":2}`))
if err != nil {
t.Fatalf("Revise: %v", err)
}
if rev.PredecessorUID != root.UID {
t.Errorf("PredecessorUID = %q, want %q", rev.PredecessorUID, root.UID)
}
if rev.UID == root.UID {
t.Error("Revise must produce a NEW UID")
}
}
func TestRevise_PredecessorMissing_Errors(t *testing.T) {
s := newTestStore(t)
_, err := s.Revise("ghost-uid", json.RawMessage(`{}`))
if !errors.Is(err, ErrPredecessorMissing) {
t.Errorf("expected ErrPredecessorMissing, got %v", err)
}
}
func TestRevise_ChainOfThree_BackwardWalk(t *testing.T) {
s := newTestStore(t)
a, _ := s.Add(json.RawMessage(`{"v":1}`))
b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`))
c, _ := s.Revise(b.UID, json.RawMessage(`{"v":3}`))
chain, err := s.History(c.UID)
if err != nil {
t.Fatalf("History: %v", err)
}
want := []string{c.UID, b.UID, a.UID}
if len(chain) != 3 {
t.Fatalf("chain length = %d, want 3", len(chain))
}
for i, tr := range chain {
if tr.UID != want[i] {
t.Errorf("chain[%d].UID = %q, want %q", i, tr.UID, want[i])
}
}
}
// ── Sprint 2 row 4: RETIRE marks trace excluded from retrieval ──
func TestRetire_ExcludedFromSearch(t *testing.T) {
s := newTestStore(t)
a, _ := s.Add(json.RawMessage(`{"v":1}`), "common")
b, _ := s.Add(json.RawMessage(`{"v":2}`), "common")
if err := s.Retire(a.UID); err != nil {
t.Fatalf("Retire: %v", err)
}
results := s.Search(SearchFilter{Tag: "common"})
if len(results) != 1 || results[0].UID != b.UID {
t.Errorf("Search excluded retired? got %d results, want 1 (active only)", len(results))
}
// IncludeRetired flag returns both.
withRetired := s.Search(SearchFilter{Tag: "common", IncludeRetired: true})
if len(withRetired) != 2 {
t.Errorf("IncludeRetired Search returned %d, want 2", len(withRetired))
}
}
func TestRetire_StillAccessibleViaGet(t *testing.T) {
// Per ADR-004: "Retired traces are excluded from Search by default
// but accessible via Get and History." Locks that contract.
s := newTestStore(t)
tr, _ := s.Add(json.RawMessage(`{"v":1}`))
s.Retire(tr.UID)
got, err := s.Get(tr.UID)
if err != nil {
t.Fatalf("retired trace Get: %v", err)
}
if !got.Retired {
t.Error("Get should preserve retired flag")
}
}
func TestRetire_StillAccessibleViaHistory(t *testing.T) {
s := newTestStore(t)
a, _ := s.Add(json.RawMessage(`{"v":1}`))
b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`))
s.Retire(a.UID)
chain, err := s.History(b.UID)
if err != nil {
t.Fatalf("History: %v", err)
}
if len(chain) != 2 {
t.Errorf("chain length = %d, want 2 (revision + retired root)", len(chain))
}
if !chain[1].Retired {
t.Error("retired predecessor should still appear in History with Retired=true")
}
}
// ── Sprint 2 row 5: HISTORY chain is cycle-safe ────────────────
func TestHistory_CycleDetected(t *testing.T) {
// Cycles can't form via the public API (new UIDs every Revise),
// but corruption could create one. Inject one directly into the
// internal map and verify History rejects it.
s := newTestStore(t)
s.traces["A"] = &Trace{UID: "A", PredecessorUID: "B"}
s.traces["B"] = &Trace{UID: "B", PredecessorUID: "A"}
_, err := s.History("A")
if !errors.Is(err, ErrCycle) {
t.Errorf("expected ErrCycle, got %v", err)
}
}
func TestHistory_PredecessorMissing_TruncatesChain(t *testing.T) {
s := newTestStore(t)
tr := &Trace{UID: "X", PredecessorUID: "ghost"}
s.traces["X"] = tr
chain, err := s.History("X")
if err != nil {
t.Fatalf("History on partial chain: %v", err)
}
if len(chain) != 1 {
t.Errorf("partial chain returned %d, want 1 (truncate at missing predecessor)", len(chain))
}
}
func TestHistory_UnknownUID_ErrorsClean(t *testing.T) {
s := newTestStore(t)
_, err := s.History("nope")
if !errors.Is(err, ErrNotFound) {
t.Errorf("expected ErrNotFound, got %v", err)
}
}
// ── Sprint 2 row 6: replay_count increments on duplicate ADD ───
func TestAddIdempotent_IncrementsReplayCount(t *testing.T) {
s := newTestStore(t)
first, err := s.AddIdempotent("custom-uid", json.RawMessage(`{"v":1}`))
if err != nil {
t.Fatalf("first AddIdempotent: %v", err)
}
if first.ReplayCount != 1 {
t.Errorf("first ReplayCount = %d, want 1", first.ReplayCount)
}
second, err := s.AddIdempotent("custom-uid", json.RawMessage(`{"v":"different"}`))
if err != nil {
t.Fatalf("second AddIdempotent: %v", err)
}
if second.ReplayCount != 2 {
t.Errorf("after second add, ReplayCount = %d, want 2", second.ReplayCount)
}
// Original content preserved (replay does NOT overwrite).
if !strings.Contains(string(second.Content), "v") ||
!strings.Contains(string(second.Content), "1") {
t.Errorf("replay should preserve original content, got %s", second.Content)
}
}
func TestAddIdempotent_RejectsEmptyUID(t *testing.T) {
s := newTestStore(t)
_, err := s.AddIdempotent("", json.RawMessage(`{}`))
if !errors.Is(err, ErrEmptyUID) {
t.Errorf("expected ErrEmptyUID, got %v", err)
}
}
// ── Sprint 2 row 7: corrupted memory row recovery ─────────────
func TestPersistor_RoundTrip(t *testing.T) {
s, path := newPersistedStore(t)
a, _ := s.Add(json.RawMessage(`{"v":1}`), "alpha")
b, _ := s.Revise(a.UID, json.RawMessage(`{"v":2}`), "alpha")
s.Retire(a.UID)
_ = b
// Open fresh store against same file, replay.
p, _ := NewPersistor(path)
s2 := NewStore(p)
n, err := s2.Load()
if err != nil {
t.Fatalf("Load: %v", err)
}
if n != 3 {
t.Errorf("replayed %d events, want 3", n)
}
stats := s2.Stats()
if stats.Total != 2 {
t.Errorf("Stats.Total = %d, want 2", stats.Total)
}
if stats.Retired != 1 {
t.Errorf("Stats.Retired = %d, want 1", stats.Retired)
}
got, _ := s2.Get(a.UID)
if !got.Retired {
t.Error("retired flag lost across persistence round-trip")
}
}
// ── Search filter coverage ─────────────────────────────────────
func TestSearch_TagFilter(t *testing.T) {
s := newTestStore(t)
s.Add(json.RawMessage(`{"v":1}`), "production")
s.Add(json.RawMessage(`{"v":2}`), "test")
s.Add(json.RawMessage(`{"v":3}`), "production", "edge")
prodHits := s.Search(SearchFilter{Tag: "production"})
if len(prodHits) != 2 {
t.Errorf("tag=production returned %d, want 2", len(prodHits))
}
edgeHits := s.Search(SearchFilter{Tag: "edge"})
if len(edgeHits) != 1 {
t.Errorf("tag=edge returned %d, want 1", len(edgeHits))
}
}
func TestSearch_ContentContainsFilter(t *testing.T) {
s := newTestStore(t)
s.Add(json.RawMessage(`{"role":"welder","city":"Chicago"}`))
s.Add(json.RawMessage(`{"role":"electrician","city":"Detroit"}`))
s.Add(json.RawMessage(`{"role":"safety","city":"Chicago"}`))
chi := s.Search(SearchFilter{ContentContains: "Chicago"})
if len(chi) != 2 {
t.Errorf("ContentContains=Chicago returned %d, want 2", len(chi))
}
}
func TestStats_TracksAllStates(t *testing.T) {
s := newTestStore(t)
a, _ := s.Add(json.RawMessage(`{}`))
s.Add(json.RawMessage(`{}`))
s.Add(json.RawMessage(`{}`))
s.Retire(a.UID)
st := s.Stats()
if st.Total != 3 {
t.Errorf("Total = %d, want 3", st.Total)
}
if st.Active != 2 {
t.Errorf("Active = %d, want 2", st.Active)
}
if st.Retired != 1 {
t.Errorf("Retired = %d, want 1", st.Retired)
}
}
// ── Concurrency safety ────────────────────────────────────────
func TestStore_ConcurrentAdd(t *testing.T) {
s := newTestStore(t)
const N = 100
done := make(chan bool, N)
for i := 0; i < N; i++ {
go func() {
_, err := s.Add(json.RawMessage(`{"x":1}`))
if err != nil {
t.Errorf("concurrent Add: %v", err)
}
done <- true
}()
}
for i := 0; i < N; i++ {
<-done
}
if s.Stats().Total != N {
t.Errorf("after %d concurrent Adds, Total = %d", N, s.Stats().Total)
}
}

View File

@ -1,89 +0,0 @@
// Package pathway implements Mem0-style versioned trace memory per
// ADR-004. Pathway memory is an append-only event log of opaque
// traces with Add / Update / Revise / Retire / History / Search
// operations. Persisted via JSONL (one event per line) with
// corruption recovery on load.
//
// Why this exists: agents need to remember what they tried and
// what worked. Mem0 is the lowest-common-denominator memory
// substrate; building on its surface means agent loops written
// against any Mem0-aware library work here. See feedback_meta_
// index_vision.md for the north-star learning-loop framing.
package pathway
import (
"encoding/json"
"errors"
)
// Trace is one entry in pathway memory. Content is opaque to the
// substrate — callers store whatever JSON shape they want; this
// layer just preserves and indexes it.
type Trace struct {
UID string `json:"uid"`
Content json.RawMessage `json:"content"`
PredecessorUID string `json:"predecessor_uid,omitempty"`
CreatedAtNs int64 `json:"created_at_ns"`
UpdatedAtNs int64 `json:"updated_at_ns"`
Retired bool `json:"retired"`
ReplayCount int `json:"replay_count"`
Tags []string `json:"tags,omitempty"`
}
// op is the wire-format kind tag for JSONL persistence. Internal
// to the package — operations exposed publicly are method calls
// on Store; the JSONL form is its own concern.
type op string
const (
opAdd op = "add"
opUpdate op = "update"
opRevise op = "revise"
opRetire op = "retire"
opReplay op = "replay"
)
// event is one line of the JSONL log. Trace is included for ops
// that introduce or replace a trace; UID alone suffices for retire
// and replay; Content alone suffices for update (reuses the
// existing trace's UID via the UID field).
type event struct {
Op op `json:"op"`
Trace *Trace `json:"trace,omitempty"`
UID string `json:"uid,omitempty"`
Content json.RawMessage `json:"content,omitempty"`
}
// Errors surfaced to callers. Sentinel-based so HTTP handlers (when
// cmd/pathwayd lands) can map to status codes via errors.Is.
var (
ErrNotFound = errors.New("pathway: trace not found")
ErrAlreadyExists = errors.New("pathway: trace already exists")
ErrPredecessorMissing = errors.New("pathway: predecessor trace missing")
ErrCycle = errors.New("pathway: history cycle detected")
ErrEmptyUID = errors.New("pathway: empty uid")
ErrInvalidContent = errors.New("pathway: invalid content")
)
// SearchFilter narrows a Search to matching traces. Empty filter
// returns everything (excluding retired; flip IncludeRetired to
// override). All set fields are AND-combined.
type SearchFilter struct {
// Tag returns traces whose Tags slice contains this string.
Tag string
// ContentContains returns traces whose Content contains this
// substring (treats Content as raw bytes; caller's contract
// for whether that's meaningful).
ContentContains string
// CreatedAfterNs returns traces with CreatedAtNs >= this value.
CreatedAfterNs int64
// CreatedBeforeNs returns traces with CreatedAtNs <= this value.
// Zero = no upper bound.
CreatedBeforeNs int64
// IncludeRetired flips the default "exclude retired" behavior.
IncludeRetired bool
}

View File

@ -26,10 +26,7 @@ type Config struct {
Queryd QuerydConfig `toml:"queryd"`
Vectord VectordConfig `toml:"vectord"`
Embedd EmbeddConfig `toml:"embedd"`
Pathwayd PathwaydConfig `toml:"pathwayd"`
Matrixd MatrixdConfig `toml:"matrixd"`
Observerd ObserverdConfig `toml:"observerd"`
S3 S3Config `toml:"s3"`
S3 S3Config `toml:"s3"`
Log LogConfig `toml:"log"`
Auth AuthConfig `toml:"auth"`
}
@ -53,20 +50,17 @@ type IngestConfig struct {
// GatewayConfig adds the upstream URLs the reverse proxy fronts.
// Each route family (/v1/storage, /v1/catalog, /v1/ingest, /v1/sql,
// /v1/vectors, /v1/embed, /v1/pathway, /v1/matrix, /v1/observer)
// has its own upstream so we can scale services independently or
// move them to different boxes without touching gateway code.
// /v1/vectors, /v1/embed) has its own upstream so we can scale
// services independently or move them to different boxes without
// touching gateway code.
type GatewayConfig struct {
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
CatalogdURL string `toml:"catalogd_url"`
IngestdURL string `toml:"ingestd_url"`
QuerydURL string `toml:"queryd_url"`
VectordURL string `toml:"vectord_url"`
EmbeddURL string `toml:"embedd_url"`
PathwaydURL string `toml:"pathwayd_url"`
MatrixdURL string `toml:"matrixd_url"`
ObserverdURL string `toml:"observerd_url"`
Bind string `toml:"bind"`
StoragedURL string `toml:"storaged_url"`
CatalogdURL string `toml:"catalogd_url"`
IngestdURL string `toml:"ingestd_url"`
QuerydURL string `toml:"queryd_url"`
VectordURL string `toml:"vectord_url"`
EmbeddURL string `toml:"embedd_url"`
}
// EmbeddConfig drives the embed service. ProviderURL points at the
@ -91,35 +85,6 @@ type VectordConfig struct {
StoragedURL string `toml:"storaged_url"`
}
// PathwaydConfig drives the pathway-memory service (cmd/pathwayd).
// PersistPath: file path to the JSONL log; empty = in-memory only
// (test/dev). Production sets a stable path under /var/lib/lakehouse
// or similar so traces survive restart.
type PathwaydConfig struct {
Bind string `toml:"bind"`
PersistPath string `toml:"persist_path"`
}
// MatrixdConfig drives the matrix-indexer service (cmd/matrixd).
// Per docs/SPEC.md §3.4: multi-corpus retrieve+merge over vectord
// with embed-via-embedd for query text. Both upstream URLs are
// required — matrixd has no in-process fallback.
type MatrixdConfig struct {
Bind string `toml:"bind"`
EmbeddURL string `toml:"embedd_url"`
VectordURL string `toml:"vectord_url"`
}
// ObserverdConfig drives the observer service (cmd/observerd).
// PersistPath: file path to the JSONL ops log; empty = in-memory
// only (test/dev). Production sets a stable path under
// /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
// Mirrors the PathwaydConfig pattern.
type ObserverdConfig struct {
Bind string `toml:"bind"`
PersistPath string `toml:"persist_path"`
}
// QuerydConfig adds queryd-specific knobs. queryd talks DuckDB
// directly to MinIO via DuckDB's httpfs extension (so no storaged
// URL needed), and reads the catalog over HTTP for view registration.
@ -196,9 +161,6 @@ func DefaultConfig() Config {
QuerydURL: "http://127.0.0.1:3214",
VectordURL: "http://127.0.0.1:3215",
EmbeddURL: "http://127.0.0.1:3216",
PathwaydURL: "http://127.0.0.1:3217",
MatrixdURL: "http://127.0.0.1:3218",
ObserverdURL: "http://127.0.0.1:3219",
},
Storaged: ServiceConfig{Bind: "127.0.0.1:3211"},
Catalogd: CatalogConfig{Bind: "127.0.0.1:3212", StoragedURL: "http://127.0.0.1:3211"},
@ -218,20 +180,6 @@ func DefaultConfig() Config {
DefaultModel: "nomic-embed-text",
CacheSize: 10_000, // ~30 MiB at d=768; set to 0 to disable
},
Pathwayd: PathwaydConfig{
Bind: "127.0.0.1:3217",
// PersistPath empty by default = in-memory only. Production
// sets to e.g. /var/lib/lakehouse/pathway/state.jsonl.
},
Matrixd: MatrixdConfig{
Bind: "127.0.0.1:3218",
EmbeddURL: "http://127.0.0.1:3216",
VectordURL: "http://127.0.0.1:3215",
},
Observerd: ObserverdConfig{
Bind: "127.0.0.1:3219",
// PersistPath empty by default = in-memory only.
},
Queryd: QuerydConfig{
Bind: "127.0.0.1:3214",
CatalogdURL: "http://127.0.0.1:3212",

View File

@ -1,104 +0,0 @@
package vectord
import (
"fmt"
"math/rand"
"testing"
)
// BenchmarkSingleAdd vs BenchmarkBatchAdd quantifies the lock-amortization
// win for the HTTP-batch shape. Same N items, same vectors; one path
// takes the lock N times, the other takes it once. Run with:
// go test ./internal/vectord/ -bench=. -benchmem -benchtime=1x
func BenchmarkSingleAdd(b *testing.B) {
for _, n := range []int{16, 128, 1024} {
b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
items := makeBatch(n, 768)
for i := 0; i < b.N; i++ {
idx := mustIndex(b)
for _, it := range items {
if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
b.Fatalf("Add: %v", err)
}
}
}
})
}
}
func BenchmarkBatchAdd(b *testing.B) {
for _, n := range []int{16, 128, 1024} {
b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
items := makeBatch(n, 768)
for i := 0; i < b.N; i++ {
idx := mustIndex(b)
if err := idx.BatchAdd(items); err != nil {
b.Fatalf("BatchAdd: %v", err)
}
}
})
}
}
// TestBatchAdd_IntraBatchDedup guards the 2026-04-29 scrum BLOCK:
// without dedup, coder/hnsw's "node not added" length-invariant
// panics when the same ID appears twice in one batch. Last-write-
// wins semantics; the second vector for a duplicate ID replaces the
// first.
func TestBatchAdd_IntraBatchDedup(t *testing.T) {
idx := mustIndex(t)
items := []BatchItem{
{ID: "a", Vector: makeVec(768, 1)},
{ID: "b", Vector: makeVec(768, 2)},
{ID: "a", Vector: makeVec(768, 99)}, // duplicate — should win
}
if err := idx.BatchAdd(items); err != nil {
t.Fatalf("BatchAdd: %v", err)
}
if idx.Len() != 2 {
t.Errorf("Len: want 2, got %d", idx.Len())
}
// "a" should hold the LATER vector (the 99 one), not the first.
v, _, ok := idx.Lookup("a")
if !ok {
t.Fatal("a not found")
}
if v[0] != 99 {
t.Errorf("last-write-wins: want vec[0]=99, got %v", v[0])
}
}
func makeVec(dim int, val float32) []float32 {
v := make([]float32, dim)
v[0] = val
v[1] = 1 // non-zero-norm under cosine
return v
}
func mustIndex(tb testing.TB) *Index {
tb.Helper()
idx, err := NewIndex(IndexParams{
Name: "bench",
Dimension: 768,
M: DefaultM,
EfSearch: DefaultEfSearch,
Distance: DistanceCosine,
})
if err != nil {
tb.Fatalf("NewIndex: %v", err)
}
return idx
}
func makeBatch(n, dim int) []BatchItem {
rng := rand.New(rand.NewSource(int64(n)))
out := make([]BatchItem, n)
for i := range out {
v := make([]float32, dim)
for j := range v {
v[j] = rng.Float32()*2 - 1
}
out[i] = BatchItem{ID: fmt.Sprintf("k-%06d", i), Vector: v}
}
return out
}

View File

@ -225,106 +225,6 @@ func validateVector(vec []float32, distance string) error {
return nil
}
// BatchItem is one entry in a BatchAdd call. Same per-field
// contract as Add: ID + Vector required, Metadata follows
// upsert-style semantics (nil = leave existing alone).
type BatchItem struct {
ID string
Vector []float32
Metadata json.RawMessage
}
// BatchAdd inserts a slice of items under a single write-lock, with
// one variadic call into coder/hnsw's Graph.Add. Net win vs. a loop
// of single Add calls: N→1 lock acquisitions per HTTP batch and one
// variadic library call instead of N.
//
// Contract: items MUST be pre-validated by the caller (id non-empty,
// vector dimension matches, vector finite + non-zero-norm under
// cosine). Pre-validation lives in the HTTP handler so per-item
// error messages stay precise; reproducing it here would force
// position-encoded errors on every consumer.
//
// Intra-batch duplicate IDs: dedup'd internally with last-write-wins
// semantics (matches map-style behavior — second occurrence of an
// ID replaces the first). Without dedup, coder/hnsw's "node not
// added" length-invariant panics on the second occurrence. Caught
// by 2026-04-29 cross-lineage scrum (Opus BLOCK).
func (i *Index) BatchAdd(items []BatchItem) error {
if len(items) == 0 {
return nil
}
// Intra-batch dedup, last-write-wins. Walk forward, record the
// LAST index for each ID, then keep only items whose index is
// the recorded last. Preserves order of last occurrences in the
// original positions.
if hasDup := containsDuplicateID(items); hasDup {
items = dedupBatchLastWins(items)
}
i.mu.Lock()
defer i.mu.Unlock()
// Pre-pass: drop any existing IDs so coder/hnsw's variadic Add
// never sees a re-add. Same library-quirk handling as single
// Add — Len()==1 needs a full graph reset because Delete of the
// last node leaves layers[0] entryless.
for _, it := range items {
if _, exists := i.g.Lookup(it.ID); exists {
if i.g.Len() == 1 {
i.resetGraphLocked()
} else {
i.g.Delete(it.ID)
}
}
}
nodes := make([]hnsw.Node[string], len(items))
for j, it := range items {
nodes[j] = hnsw.MakeNode(it.ID, it.Vector)
}
i.g.Add(nodes...)
for _, it := range items {
if it.Metadata != nil {
i.meta[it.ID] = it.Metadata
}
}
return nil
}
// containsDuplicateID is a fast pre-check — if no dups, skip the
// dedup allocation. Most batches won't have dups so this is a hot
// path.
func containsDuplicateID(items []BatchItem) bool {
seen := make(map[string]struct{}, len(items))
for _, it := range items {
if _, ok := seen[it.ID]; ok {
return true
}
seen[it.ID] = struct{}{}
}
return false
}
// dedupBatchLastWins keeps only the last occurrence of each ID,
// preserving the relative order of those last occurrences. This
// matches map-style "set X to A then to B" semantics: B wins.
func dedupBatchLastWins(items []BatchItem) []BatchItem {
lastIdx := make(map[string]int, len(items))
for j, it := range items {
lastIdx[it.ID] = j
}
out := make([]BatchItem, 0, len(lastIdx))
for j, it := range items {
if lastIdx[it.ID] == j {
out = append(out, it)
}
}
return out
}
// Delete removes id from the index. Returns true if present.
func (i *Index) Delete(id string) bool {
i.mu.Lock()

View File

@ -1,214 +0,0 @@
package workflow
// modes.go — adapters that wrap §3.4 capabilities + §3.5 drift +
// distillation scorer as workflow.Mode functions. Each mode follows
// the same glue pattern: marshal the generic input map through a
// typed struct (so workflow YAML schemas are self-documenting and
// validation errors are clear), call the underlying capability,
// return a generic output map.
//
// Pure modes (no I/O): MatrixRelevance, MatrixDowngrade,
// DistillationScore, DriftScorer.
//
// HTTP modes: MatrixSearch + PlaybookRecord — observerd talks to
// matrixd over HTTP since the search/record paths need vectord
// access. Constructed via factory funcs that take the matrixd base
// URL + an http.Client.
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/drift"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
)
// ─── Pure-function wrappers ─────────────────────────────────────
// MatrixRelevance wraps matrix.FilterChunks. Input shape:
//
// {
// "focus": {"Path":"...", "Content":"...", ...},
// "chunks": [{"source":"...", "doc_id":"...", "text":"...", "score":0.8}, ...],
// "threshold": 0.3 # optional; default = matrix.DefaultRelevanceThreshold
// }
//
// Output: {"kept":[...], "dropped":[...], "threshold":N, "total_in":N}.
func MatrixRelevance(_ Context, input map[string]any) (map[string]any, error) {
var req struct {
Focus matrix.FocusFile `json:"focus"`
Chunks []matrix.CandidateChunk `json:"chunks"`
Threshold float64 `json:"threshold"`
}
if err := remarshalInput(input, &req); err != nil {
return nil, fmt.Errorf("matrix.relevance: %w", err)
}
threshold := req.Threshold
if threshold == 0 {
threshold = matrix.DefaultRelevanceThreshold
}
res := matrix.FilterChunks(req.Focus, req.Chunks, threshold)
return map[string]any{
"kept": res.Kept,
"dropped": res.Dropped,
"threshold": res.Threshold,
"total_in": res.TotalIn,
}, nil
}
// MatrixDowngrade wraps matrix.MaybeDowngrade. Input shape:
//
// {
// "mode": "codereview_lakehouse",
// "model": "x-ai/grok-4.1-fast",
// "forced_mode": false, # optional
// "force_full_override": false # optional
// }
//
// Output: matrix.DowngradeDecision JSON.
func MatrixDowngrade(_ Context, input map[string]any) (map[string]any, error) {
var req struct {
Mode string `json:"mode"`
Model string `json:"model"`
ForcedMode bool `json:"forced_mode"`
ForceFullOverride bool `json:"force_full_override"`
}
if err := remarshalInput(input, &req); err != nil {
return nil, fmt.Errorf("matrix.downgrade: %w", err)
}
if req.Mode == "" || req.Model == "" {
return nil, fmt.Errorf("matrix.downgrade: mode and model are required")
}
dec := matrix.MaybeDowngrade(matrix.DowngradeInput{
Mode: req.Mode,
Model: req.Model,
ForcedMode: req.ForcedMode,
ForceFullOverride: req.ForceFullOverride,
})
return map[string]any{
"mode": dec.Mode,
"downgraded_from": dec.DowngradedFrom,
"reason": dec.Reason,
}, nil
}
// DistillationScore wraps distillation.ScoreRecord — re-runs the
// scorer over a single EvidenceRecord. Useful as a workflow node
// that grades a freshly-produced evidence row.
//
// Input: a JSON EvidenceRecord under the key "record":
//
// {"record": {"run_id":"...", "task_id":"...", ...}}
//
// Output: ScoreOutput-ish map with category, reasons, sub_scores.
func DistillationScore(_ Context, input map[string]any) (map[string]any, error) {
var req struct {
Record distillation.EvidenceRecord `json:"record"`
}
if err := remarshalInput(input, &req); err != nil {
return nil, fmt.Errorf("distillation.score: %w", err)
}
if req.Record.RunID == "" {
return nil, fmt.Errorf("distillation.score: record.run_id required")
}
out := distillation.ScoreRecord(req.Record)
return map[string]any{
"category": string(out.Category),
"reasons": out.Reasons,
"sub_scores": out.SubScores,
}, nil
}
// DriftScorer wraps drift.ComputeScorerDrift. Input shape:
//
// {
// "inputs": [
// {"record": {...EvidenceRecord...}, "persisted_category": "accepted"},
// ...
// ],
// "include_entries": false # optional, default false
// }
//
// Output: ScorerDriftReport JSON.
func DriftScorer(_ Context, input map[string]any) (map[string]any, error) {
var req struct {
Inputs []drift.ScorerDriftInput `json:"inputs"`
IncludeEntries bool `json:"include_entries"`
}
if err := remarshalInput(input, &req); err != nil {
return nil, fmt.Errorf("drift.scorer: %w", err)
}
if len(req.Inputs) == 0 {
return nil, fmt.Errorf("drift.scorer: inputs must be non-empty")
}
report := drift.ComputeScorerDrift(req.Inputs, req.IncludeEntries)
bs, err := json.Marshal(report)
if err != nil {
return nil, err
}
var asMap map[string]any
if err := json.Unmarshal(bs, &asMap); err != nil {
return nil, err
}
return asMap, nil
}
// ─── HTTP-backed modes ──────────────────────────────────────────
// MatrixSearch returns a workflow.Mode bound to a matrixd base URL
// and HTTP client. The mode posts to /v1/matrix/search via the
// gateway-internal upstream (caller passes the URL).
//
// Input shape mirrors matrix.SearchRequest (see retrieve.go).
// Output is the matrix.SearchResponse JSON.
func MatrixSearch(matrixdURL string, hc *http.Client) Mode {
return func(ctx Context, input map[string]any) (map[string]any, error) {
bs, err := json.Marshal(input)
if err != nil {
return nil, fmt.Errorf("matrix.search: marshal: %w", err)
}
req, err := http.NewRequestWithContext(ctx.Ctx, http.MethodPost,
matrixdURL+"/matrix/search", bytes.NewReader(bs))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := hc.Do(req)
if err != nil {
return nil, fmt.Errorf("matrix.search: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("matrix.search: status %d: %s", resp.StatusCode, body)
}
var out map[string]any
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("matrix.search: decode: %w", err)
}
return out, nil
}
}
// ─── Helpers ─────────────────────────────────────────────────────
// remarshalInput round-trips a generic input map through JSON into
// the typed target struct. Same trick as the matrixd handlers — gives
// us schema validation for free without writing custom field-by-field
// coercion.
func remarshalInput(input map[string]any, target any) error {
bs, err := json.Marshal(input)
if err != nil {
return err
}
return json.Unmarshal(bs, target)
}
// silence "imported and not used" if context isn't referenced after
// the MatrixSearch factory is used. Compiler will catch the real case.
var _ = context.Background

View File

@ -1,211 +0,0 @@
package workflow
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestMatrixRelevance_FiltersAdjacencyPollution(t *testing.T) {
input := map[string]any{
"focus": map[string]any{
"Path": "crates/queryd/src/db.go",
"Content": "pub struct Connector {}\nuse catalogd::Registry;",
},
"chunks": []any{
map[string]any{
"source": "lakehouse_symbols_v1",
"doc_id": "symbol:queryd::struct::Connector",
"text": "Connector wraps the DuckDB handle.",
"score": 0.9,
},
map[string]any{
"source": "lakehouse_symbols_v1",
"doc_id": "symbol:catalogd::struct::Registry",
"text": "Registry stores manifests. Used by ingestd.",
"score": 0.85,
},
},
"threshold": 0.3,
}
out, err := MatrixRelevance(Context{}, input)
if err != nil {
t.Fatalf("MatrixRelevance: %v", err)
}
if out["total_in"].(int) != 2 {
t.Errorf("total_in: want 2, got %v", out["total_in"])
}
// Connector should be in kept (path/symbol match), Registry in dropped (import-only).
keptStr, _ := json.Marshal(out["kept"])
if !strings.Contains(string(keptStr), "Connector") {
t.Errorf("expected Connector in kept; kept=%s", keptStr)
}
}
func TestMatrixDowngrade_StrongModelDowngrades(t *testing.T) {
out, err := MatrixDowngrade(Context{}, map[string]any{
"mode": "codereview_lakehouse",
"model": "x-ai/grok-4.1-fast",
})
if err != nil {
t.Fatalf("MatrixDowngrade: %v", err)
}
if out["mode"] != "codereview_isolation" {
t.Errorf("strong model should downgrade; got mode=%v", out["mode"])
}
if out["downgraded_from"] != "codereview_lakehouse" {
t.Errorf("downgraded_from: %v", out["downgraded_from"])
}
}
func TestMatrixDowngrade_WeakModelKept(t *testing.T) {
out, err := MatrixDowngrade(Context{}, map[string]any{
"mode": "codereview_lakehouse",
"model": "qwen3.5:latest",
})
if err != nil {
t.Fatal(err)
}
if out["mode"] != "codereview_lakehouse" {
t.Errorf("weak model should keep lakehouse; got %v", out["mode"])
}
}
func TestMatrixDowngrade_MissingFieldsError(t *testing.T) {
_, err := MatrixDowngrade(Context{}, map[string]any{"mode": "codereview_lakehouse"})
if err == nil {
t.Error("missing model should error")
}
}
func TestDistillationScore_ScrumReviewAccepted(t *testing.T) {
out, err := DistillationScore(Context{}, map[string]any{
"record": map[string]any{
"run_id": "r-1",
"task_id": "t-1",
"timestamp": "2026-04-29T12:00:00Z",
"schema_version": 1,
"provenance": map[string]any{
"source_file": "data/_kb/scrum_reviews.jsonl",
"sig_hash": "abc",
"recorded_at": "2026-04-29T12:00:01Z",
},
"success_markers": []any{"accepted_on_attempt_1"},
},
})
if err != nil {
t.Fatal(err)
}
if out["category"] != "accepted" {
t.Errorf("scrum_review attempt_1: want accepted, got %v", out["category"])
}
reasons, _ := out["reasons"].([]string)
if len(reasons) == 0 || !strings.Contains(reasons[0], "first attempt") {
t.Errorf("reasons missing 'first attempt': %v", reasons)
}
}
func TestDistillationScore_RejectsEmptyRecord(t *testing.T) {
_, err := DistillationScore(Context{}, map[string]any{
"record": map[string]any{},
})
if err == nil {
t.Error("empty record should error")
}
}
func TestDriftScorer_AllMatchedReturnsZeroDrift(t *testing.T) {
out, err := DriftScorer(Context{}, map[string]any{
"inputs": []any{
map[string]any{
"Record": map[string]any{
"run_id": "r-1", "task_id": "t-1",
"timestamp": "2026-04-29T12:00:00Z", "schema_version": 1,
"provenance": map[string]any{
"source_file": "data/_kb/scrum_reviews.jsonl",
"sig_hash": "x", "recorded_at": "2026-04-29T12:00:01Z",
},
"success_markers": []any{"accepted_on_attempt_1"},
},
"PersistedCategory": "accepted",
},
},
})
if err != nil {
t.Fatal(err)
}
if out["drifted"].(float64) != 0 {
t.Errorf("no-drift case: drifted=%v", out["drifted"])
}
if out["matched"].(float64) != 1 {
t.Errorf("matched: want 1, got %v", out["matched"])
}
}
func TestDriftScorer_RequiresInputs(t *testing.T) {
_, err := DriftScorer(Context{}, map[string]any{"inputs": []any{}})
if err == nil {
t.Error("empty inputs should error")
}
}
func TestMatrixSearch_HTTPFlow(t *testing.T) {
// Fake matrixd that echoes a canned SearchResponse.
mux := http.NewServeMux()
mux.HandleFunc("/matrix/search", func(w http.ResponseWriter, r *http.Request) {
var body map[string]any
_ = json.NewDecoder(r.Body).Decode(&body)
w.Header().Set("Content-Type", "application/json")
// Echo back deterministically with a synthesized result list.
_ = json.NewEncoder(w).Encode(map[string]any{
"results": []any{
map[string]any{"id": "w-1", "distance": 0.1, "corpus": "workers"},
},
"per_corpus_counts": map[string]any{"workers": 1},
"received_corpora": body["corpora"], // for round-trip verification
})
})
srv := httptest.NewServer(mux)
defer srv.Close()
mode := MatrixSearch(srv.URL, srv.Client())
out, err := mode(
Context{Ctx: context.Background()},
map[string]any{
"query_text": "forklift",
"corpora": []any{"workers"},
"k": 5,
},
)
if err != nil {
t.Fatalf("MatrixSearch: %v", err)
}
results, ok := out["results"].([]any)
if !ok || len(results) != 1 {
t.Errorf("results: %v", out["results"])
}
if first, ok := results[0].(map[string]any); ok {
if first["id"] != "w-1" {
t.Errorf("id: %v", first["id"])
}
}
}
func TestMatrixSearch_NonOKStatusErrors(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "matrixd is down", http.StatusBadGateway)
}))
defer srv.Close()
mode := MatrixSearch(srv.URL, srv.Client())
_, err := mode(Context{Ctx: context.Background()}, map[string]any{})
if err == nil {
t.Error("502 should error")
}
if !strings.Contains(err.Error(), "502") {
t.Errorf("error should mention 502: %v", err)
}
}

View File

@ -1,389 +0,0 @@
package workflow
import (
"context"
"fmt"
"regexp"
"strings"
"time"
)
// Runner executes Workflows. Modes are registered up-front; the
// catalog is immutable after Build (callers compose by registering
// at startup, then Run() the catalog repeatedly).
type Runner struct {
modes map[string]Mode
}
// NewRunner returns an empty Runner. Use RegisterMode to populate.
func NewRunner() *Runner {
return &Runner{modes: make(map[string]Mode)}
}
// RegisterMode adds a capability under the given name. Re-registering
// the same name overwrites — useful for tests that want to replace a
// mode with a stub. In production, register-once-at-startup is the
// expected pattern.
func (r *Runner) RegisterMode(name string, mode Mode) {
r.modes[name] = mode
}
// Modes returns the currently-registered mode names. Useful for
// /v1/observer/workflow/modes-style discovery endpoints.
func (r *Runner) Modes() []string {
out := make([]string, 0, len(r.modes))
for name := range r.modes {
out = append(out, name)
}
return out
}
// Run executes a workflow. Validates structure, resolves nodes
// topologically, executes each node with $-reference substitution,
// records per-node results in RunResult.
//
// Aborting errors (cycle, missing dep, unknown mode) return early
// with StatusAborted — no nodes execute. Per-node mode errors are
// recorded in NodeResult.Error and execution continues with
// independent nodes; downstream nodes that depended on the failing
// one are SKIPPED with an explanatory error so the cascade is
// visible in the result rather than silent.
func (r *Runner) Run(ctx context.Context, w Workflow) (RunResult, error) {
if err := w.Validate(); err != nil {
return RunResult{
Workflow: w.Name, Status: StatusAborted,
StartedAt: time.Now(),
}, err
}
order, err := topoSort(w.Nodes)
if err != nil {
return RunResult{
Workflow: w.Name, Status: StatusAborted,
StartedAt: time.Now(),
}, err
}
// Verify every node's mode is registered before starting — fail
// loud if someone references a typo'd mode name. Catches the bug
// in 5ms instead of after 6 nodes have already run.
for _, node := range w.Nodes {
modeName := effectiveMode(node)
if _, ok := r.modes[modeName]; !ok {
return RunResult{
Workflow: w.Name, Status: StatusAborted,
StartedAt: time.Now(),
}, fmt.Errorf("%w: %q (node %q)", ErrUnknownMode, modeName, node.ID)
}
}
t0 := time.Now()
results := make(map[string]NodeResult, len(w.Nodes))
resultsList := make([]NodeResult, 0, len(w.Nodes))
failedNodes := make(map[string]bool) // node IDs whose result was Error
skippedNodes := make(map[string]bool)
for _, nodeID := range order {
node := findNode(w.Nodes, nodeID)
modeName := effectiveMode(node)
// Skip if any dependency failed or was skipped — cascades
// failure visibly so callers can see the chain.
var skipReason string
for _, dep := range node.DependsOn {
if failedNodes[dep] {
skipReason = fmt.Sprintf("upstream node %q failed", dep)
break
}
if skippedNodes[dep] {
skipReason = fmt.Sprintf("upstream node %q was skipped", dep)
break
}
}
if skipReason != "" {
res := NodeResult{
NodeID: node.ID, Mode: modeName,
Error: skipReason,
StartedAt: time.Now(),
}
results[node.ID] = res
resultsList = append(resultsList, res)
skippedNodes[node.ID] = true
continue
}
nodeStart := time.Now()
mode := r.modes[modeName] // pre-validated above; safe lookup
// Build the mode's input map with $-references resolved.
input, refErr := buildInput(node, results)
if refErr != nil {
res := NodeResult{
NodeID: node.ID, Mode: modeName,
Error: refErr.Error(),
StartedAt: nodeStart,
DurationMs: time.Since(nodeStart).Milliseconds(),
}
results[node.ID] = res
resultsList = append(resultsList, res)
failedNodes[node.ID] = true
continue
}
modeCtx := Context{
Ctx: ctx,
WorkflowName: w.Name,
NodeID: node.ID,
Provider: w.Provider,
Model: w.Model,
}
output, err := mode(modeCtx, input)
res := NodeResult{
NodeID: node.ID,
Mode: modeName,
Output: output,
StartedAt: nodeStart,
DurationMs: time.Since(nodeStart).Milliseconds(),
}
if err != nil {
res.Error = err.Error()
failedNodes[node.ID] = true
}
results[node.ID] = res
resultsList = append(resultsList, res)
}
status := StatusSucceeded
if len(failedNodes) > 0 || len(skippedNodes) > 0 {
status = StatusPartial
}
return RunResult{
Workflow: w.Name,
Status: status,
Nodes: resultsList,
StartedAt: t0,
DurationMs: time.Since(t0).Milliseconds(),
}, nil
}
// effectiveMode returns the node's explicit mode if set, else
// "llm.chat" (the implicit Archon convention).
func effectiveMode(n Node) string {
if n.Mode != "" {
return n.Mode
}
return "llm.chat"
}
// findNode is O(n) but called once per execution step on already-
// validated workflows; n is small (typical workflow ≤10 nodes).
func findNode(nodes []Node, id string) Node {
for _, n := range nodes {
if n.ID == id {
return n
}
}
return Node{} // never reached on a Validated workflow
}
// ─── Input building + reference substitution ────────────────────
// buildInput composes the input map a mode receives. Builds from
// node.Inputs (deep-copy with $-refs substituted) plus injects the
// "prompt" key from node.Prompt with $-refs substituted.
//
// $-reference syntax: $node_id.output.key — resolves to that key
// in the prior node's output map. $node_id.output (no .key)
// resolves to the whole output map. JSON-stringified inline.
func buildInput(node Node, results map[string]NodeResult) (map[string]any, error) {
out := make(map[string]any, len(node.Inputs)+1)
for k, v := range node.Inputs {
resolved, err := resolveRefs(v, results)
if err != nil {
return nil, err
}
out[k] = resolved
}
if node.Prompt != "" {
resolvedPrompt, err := substituteStringRefs(node.Prompt, results)
if err != nil {
return nil, err
}
out["prompt"] = resolvedPrompt
}
return out, nil
}
// resolveRefs walks any value (string, map, slice, scalar) and
// substitutes $-references in any string elements.
func resolveRefs(v any, results map[string]NodeResult) (any, error) {
switch x := v.(type) {
case string:
return substituteStringRefs(x, results)
case map[string]any:
out := make(map[string]any, len(x))
for k, vv := range x {
r, err := resolveRefs(vv, results)
if err != nil {
return nil, err
}
out[k] = r
}
return out, nil
case []any:
out := make([]any, len(x))
for i, vv := range x {
r, err := resolveRefs(vv, results)
if err != nil {
return nil, err
}
out[i] = r
}
return out, nil
default:
return v, nil // numbers, bools, nil — pass through
}
}
// refRe matches $node_id or $node_id.output.key (where key is
// dotted-path). Captures: 1=node_id, 2=optional ".output[.key]"
// suffix.
var refRe = regexp.MustCompile(`\$([a-zA-Z_][a-zA-Z0-9_]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`)
// substituteStringRefs replaces $node.output.key references in a
// string with the resolved value (JSON-stringified for non-string
// targets so the result is always a string).
func substituteStringRefs(s string, results map[string]NodeResult) (string, error) {
var firstErr error
out := refRe.ReplaceAllStringFunc(s, func(match string) string {
if firstErr != nil {
return match
}
// Re-parse the match because ReplaceAllStringFunc gives the
// whole match without submatches.
m := refRe.FindStringSubmatch(match)
nodeID := m[1]
path := strings.TrimPrefix(m[2], ".")
nodeRes, ok := results[nodeID]
if !ok {
firstErr = fmt.Errorf("%w: $%s (no such node, or node not yet run)", ErrUnresolvedRef, nodeID)
return match
}
// path "output" or "output.X.Y" walks into nodeRes.Output
val, err := walkPath(nodeRes.Output, path)
if err != nil {
firstErr = fmt.Errorf("%w: $%s — %v", ErrUnresolvedRef, nodeID+m[2], err)
return match
}
return stringifyValue(val)
})
return out, firstErr
}
// walkPath resolves a dotted path against a nested map. Empty path
// returns the whole map. The first segment must be "output" — a
// convention that matches the SPEC §3.8 reference shape and prevents
// accidental access to other NodeResult fields.
func walkPath(output map[string]any, path string) (any, error) {
if path == "" {
return output, nil // bare $node — entire NodeResult.Output
}
parts := strings.Split(path, ".")
if parts[0] != "output" {
return nil, fmt.Errorf("path must start with .output (got %q)", parts[0])
}
parts = parts[1:]
var cur any = output
for _, p := range parts {
m, ok := cur.(map[string]any)
if !ok {
return nil, fmt.Errorf("cannot traverse into %T at segment %q", cur, p)
}
cur, ok = m[p]
if !ok {
return nil, fmt.Errorf("key %q not found in output", p)
}
}
return cur, nil
}
// stringifyValue renders a value as a string. For JSON-shaped values
// (maps, slices, complex types), uses fmt.Sprintf %v which is
// adequate for prompt-substitution. JSON marshaling would be cleaner
// for complex types but adds a dep cycle for v0.
func stringifyValue(v any) string {
switch x := v.(type) {
case string:
return x
case nil:
return ""
default:
return fmt.Sprint(x)
}
}
// ─── DAG resolution ──────────────────────────────────────────────
// topoSort returns node IDs in a topologically-sorted order such
// that every dependency precedes its dependent. Cycles return an
// error (Validate catches them first; this is defense in depth).
func topoSort(nodes []Node) ([]string, error) {
indeg := make(map[string]int, len(nodes))
graph := make(map[string][]string, len(nodes))
for _, n := range nodes {
if _, ok := indeg[n.ID]; !ok {
indeg[n.ID] = 0
}
for _, dep := range n.DependsOn {
graph[dep] = append(graph[dep], n.ID)
indeg[n.ID]++
}
}
// Kahn's algorithm — preserve original order for ties so output
// is deterministic across runs.
queue := make([]string, 0, len(nodes))
for _, n := range nodes {
if indeg[n.ID] == 0 {
queue = append(queue, n.ID)
}
}
out := make([]string, 0, len(nodes))
for len(queue) > 0 {
cur := queue[0]
queue = queue[1:]
out = append(out, cur)
for _, child := range graph[cur] {
indeg[child]--
if indeg[child] == 0 {
queue = append(queue, child)
}
}
}
if len(out) != len(nodes) {
// Find a node still with non-zero indeg — that's where the
// cycle is reachable from.
for id, deg := range indeg {
if deg > 0 {
return nil, fmt.Errorf("%w: starting at node %q", ErrCycle, id)
}
}
return nil, ErrCycle
}
return out, nil
}
// detectCycle is the predicate-only variant called from Validate;
// returns the offending node ID + true if a cycle exists.
func detectCycle(nodes []Node) (string, bool) {
_, err := topoSort(nodes)
if err == nil {
return "", false
}
// Best-effort extract — topoSort wraps the cycle-starting ID in
// the error message; for v0 just signal "yes, somewhere."
for _, n := range nodes {
_ = n
}
return "(see runner error for details)", true
}

View File

@ -1,284 +0,0 @@
package workflow
import (
"context"
"errors"
"fmt"
"strings"
"testing"
)
// fixtureEcho returns the input map verbatim. Useful for testing
// runner mechanics without external dependencies.
func fixtureEcho(_ Context, input map[string]any) (map[string]any, error) {
out := make(map[string]any, len(input))
for k, v := range input {
out[k] = v
}
return out, nil
}
// fixtureFail always errors. Useful for testing skip-on-failed-dep.
func fixtureFail(_ Context, _ map[string]any) (map[string]any, error) {
return nil, fmt.Errorf("fixture: intentional failure")
}
// fixtureUpper returns {"upper": strings.ToUpper(input["prompt"])}.
func fixtureUpper(_ Context, input map[string]any) (map[string]any, error) {
prompt, _ := input["prompt"].(string)
return map[string]any{"upper": strings.ToUpper(prompt)}, nil
}
func newTestRunner() *Runner {
r := NewRunner()
r.RegisterMode("fixture.echo", fixtureEcho)
r.RegisterMode("fixture.fail", fixtureFail)
r.RegisterMode("fixture.upper", fixtureUpper)
return r
}
func TestValidate_RequiresName(t *testing.T) {
w := Workflow{Name: "", Nodes: []Node{{ID: "a", Mode: "fixture.echo"}}}
if err := w.Validate(); err == nil {
t.Error("empty name should fail validation")
}
}
func TestValidate_RequiresNodes(t *testing.T) {
w := Workflow{Name: "x"}
if err := w.Validate(); err == nil {
t.Error("empty nodes should fail validation")
}
}
func TestValidate_DuplicateNodeID(t *testing.T) {
w := Workflow{Name: "x", Nodes: []Node{
{ID: "a", Mode: "fixture.echo"},
{ID: "a", Mode: "fixture.echo"},
}}
if err := w.Validate(); !errors.Is(err, ErrDuplicateNodeID) {
t.Errorf("want ErrDuplicateNodeID, got %v", err)
}
}
func TestValidate_MissingDep(t *testing.T) {
w := Workflow{Name: "x", Nodes: []Node{
{ID: "a", Mode: "fixture.echo", DependsOn: []string{"ghost"}},
}}
if err := w.Validate(); !errors.Is(err, ErrMissingDep) {
t.Errorf("want ErrMissingDep, got %v", err)
}
}
func TestValidate_DetectsCycle(t *testing.T) {
w := Workflow{Name: "x", Nodes: []Node{
{ID: "a", Mode: "fixture.echo", DependsOn: []string{"b"}},
{ID: "b", Mode: "fixture.echo", DependsOn: []string{"a"}},
}}
if err := w.Validate(); !errors.Is(err, ErrCycle) {
t.Errorf("want ErrCycle, got %v", err)
}
}
func TestRun_SingleNode(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "single", Nodes: []Node{
{ID: "a", Mode: "fixture.echo", Prompt: "hello"},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatal(err)
}
if res.Status != StatusSucceeded {
t.Errorf("status: want succeeded, got %q", res.Status)
}
if len(res.Nodes) != 1 {
t.Fatalf("nodes: want 1, got %d", len(res.Nodes))
}
if res.Nodes[0].Output["prompt"] != "hello" {
t.Errorf("echo round-trip: %+v", res.Nodes[0].Output)
}
}
func TestRun_DAG_RefSubstitution(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "chain", Nodes: []Node{
{ID: "shape", Mode: "fixture.upper", Prompt: "hello world"},
{ID: "weakness", Mode: "fixture.echo",
Prompt: "Given $shape.output.upper find issue",
DependsOn: []string{"shape"}},
{ID: "improvement", Mode: "fixture.echo",
Prompt: "Based on $weakness.output.prompt do better",
DependsOn: []string{"weakness"}},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatalf("Run: %v", err)
}
if res.Status != StatusSucceeded {
t.Errorf("status: %q", res.Status)
}
// Order check: shape → weakness → improvement
wantOrder := []string{"shape", "weakness", "improvement"}
for i, want := range wantOrder {
if res.Nodes[i].NodeID != want {
t.Errorf("execution order %d: want %q, got %q", i, want, res.Nodes[i].NodeID)
}
}
// shape uppercases "hello world" → "HELLO WORLD"
if up := res.Nodes[0].Output["upper"]; up != "HELLO WORLD" {
t.Errorf("shape.upper: %q", up)
}
// weakness sees "Given HELLO WORLD find issue" in its prompt
wp, _ := res.Nodes[1].Output["prompt"].(string)
if !strings.Contains(wp, "HELLO WORLD") {
t.Errorf("weakness ref-substitution failed: %q", wp)
}
// improvement sees the SUBSTITUTED weakness prompt
ip, _ := res.Nodes[2].Output["prompt"].(string)
if !strings.Contains(ip, "HELLO WORLD") {
t.Errorf("improvement chain-substitution failed: %q", ip)
}
}
func TestRun_FailedNodeSkipsDownstream(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "skipchain", Nodes: []Node{
{ID: "a", Mode: "fixture.fail"},
{ID: "b", Mode: "fixture.echo", DependsOn: []string{"a"}},
{ID: "c", Mode: "fixture.echo"}, // independent of a — should still run
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatal(err)
}
if res.Status != StatusPartial {
t.Errorf("status: want partial, got %q", res.Status)
}
byID := make(map[string]NodeResult)
for _, n := range res.Nodes {
byID[n.NodeID] = n
}
if byID["a"].Error == "" {
t.Error("a should have errored")
}
if byID["b"].Error == "" || !strings.Contains(byID["b"].Error, "upstream") {
t.Errorf("b should be skipped with upstream-failure reason; got %q", byID["b"].Error)
}
if byID["c"].Error != "" {
t.Errorf("c is independent; should run successfully; got error: %q", byID["c"].Error)
}
}
func TestRun_UnknownModeAborts(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "bad", Nodes: []Node{
{ID: "a", Mode: "fixture.does_not_exist"},
}}
res, err := r.Run(context.Background(), w)
if !errors.Is(err, ErrUnknownMode) {
t.Errorf("want ErrUnknownMode, got %v", err)
}
if res.Status != StatusAborted {
t.Errorf("status: want aborted, got %q", res.Status)
}
}
func TestRun_UnresolvedReferenceErrors(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "badref", Nodes: []Node{
{ID: "a", Mode: "fixture.echo",
Prompt: "references $ghost.output but ghost doesn't exist"},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatalf("Run: %v", err)
}
if res.Nodes[0].Error == "" {
t.Error("unresolved $ghost should error the node")
}
if !strings.Contains(res.Nodes[0].Error, "no such node") {
t.Errorf("error should explain no-such-node; got %q", res.Nodes[0].Error)
}
}
func TestRun_ImplicitLLMChatFallback(t *testing.T) {
r := NewRunner()
r.RegisterMode("llm.chat", fixtureEcho) // pretend llm.chat exists
w := Workflow{Name: "implicit", Nodes: []Node{
{ID: "a", Prompt: "no Mode field — should default to llm.chat"},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatal(err)
}
if res.Status != StatusSucceeded {
t.Errorf("implicit llm.chat: status %q", res.Status)
}
if res.Nodes[0].Mode != "llm.chat" {
t.Errorf("effective mode: want llm.chat, got %q", res.Nodes[0].Mode)
}
}
func TestRun_ProvenanceRecording(t *testing.T) {
r := newTestRunner()
w := Workflow{Name: "trace", Nodes: []Node{
{ID: "x", Mode: "fixture.echo", Prompt: "trace me"},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatal(err)
}
n := res.Nodes[0]
if n.NodeID != "x" || n.Mode != "fixture.echo" {
t.Errorf("provenance: node=%q mode=%q", n.NodeID, n.Mode)
}
if n.StartedAt.IsZero() {
t.Error("started_at should be set")
}
if n.DurationMs < 0 {
t.Errorf("duration_ms: %d", n.DurationMs)
}
}
func TestRun_InputsResolveRefs(t *testing.T) {
// Verify that node.Inputs (not just Prompt) honors $-substitution.
r := newTestRunner()
w := Workflow{Name: "inputs", Nodes: []Node{
{ID: "a", Mode: "fixture.echo", Prompt: "first"},
{ID: "b", Mode: "fixture.echo",
Inputs: map[string]any{
"copied": "$a.output.prompt",
"static": "literal",
},
DependsOn: []string{"a"}},
}}
res, err := r.Run(context.Background(), w)
if err != nil {
t.Fatal(err)
}
bOut := res.Nodes[1].Output
if bOut["copied"] != "first" {
t.Errorf("inputs ref: want 'first', got %q", bOut["copied"])
}
if bOut["static"] != "literal" {
t.Errorf("inputs static: want 'literal', got %q", bOut["static"])
}
}
func TestTopoSort_Stable(t *testing.T) {
// Independent nodes preserve their declaration order.
nodes := []Node{
{ID: "z"}, {ID: "y"}, {ID: "x"},
}
got, err := topoSort(nodes)
if err != nil {
t.Fatal(err)
}
want := []string{"z", "y", "x"}
for i := range want {
if got[i] != want[i] {
t.Errorf("position %d: want %q, got %q", i, want[i], got[i])
}
}
}

View File

@ -1,172 +0,0 @@
// Package workflow is the Observer-KB workflow runner per SPEC §3.8 —
// the orchestrator that chains §3.4 modes (matrix.search, relevance,
// downgrade, distillation.score, drift.scorer) plus free-form llm.chat
// into multi-pass measurement pipelines.
//
// The architectural intent is documented in PRD's "Observer as system
// resource" section: workflows ARE observation patterns whose every
// step is recorded as an ObservedOp via observerd. The mode catalog
// is the registry of capabilities; the runner is the engine that
// composes them.
//
// First slice (this commit): types + DAG runner + reference
// substitution + a fixture.echo mode for testing the mechanics.
// Real-mode integrations (matrix.search, distillation.score, etc.)
// land in follow-up commits.
//
// YAML shape mirrors /home/profit/lakehouse/.archon/workflows/
// lakehouse-architect-review.yaml so existing Archon workflows load
// directly, with one Go-side addition: an optional `mode` field on
// each node so the runner can dispatch to non-LLM modes.
package workflow
import (
"context"
"errors"
"fmt"
"time"
)
// Workflow is one loadable workflow definition. Matches Archon's
// YAML shape; Provider + Model are informational in v0 (only used
// by llm.chat-style modes that need a backend) and ignored by other
// modes.
type Workflow struct {
Name string `yaml:"name" json:"name"`
Description string `yaml:"description" json:"description"`
Provider string `yaml:"provider" json:"provider,omitempty"`
Model string `yaml:"model" json:"model,omitempty"`
Nodes []Node `yaml:"nodes" json:"nodes"`
}
// Node is one step in the workflow DAG. ID must be unique within a
// workflow; DependsOn lists the IDs of nodes that must complete
// before this one runs.
//
// Mode is the registered capability the node dispatches to. When
// omitted, the runner assumes "llm.chat" using the workflow's
// Provider+Model (matching Archon's implicit-LLM convention).
//
// Inputs is a free-form map passed to the mode after $-reference
// substitution. The Prompt field is a convenience — it's added to
// the input map under the key "prompt" before mode dispatch, so
// llm.chat-style modes get free-form text without a wrapping object.
type Node struct {
ID string `yaml:"id" json:"id"`
Mode string `yaml:"mode" json:"mode,omitempty"`
Prompt string `yaml:"prompt" json:"prompt,omitempty"`
Inputs map[string]any `yaml:"inputs" json:"inputs,omitempty"`
AllowedTools []string `yaml:"allowed_tools" json:"allowed_tools,omitempty"`
Effort string `yaml:"effort" json:"effort,omitempty"`
IdleTimeoutMs int `yaml:"idle_timeout" json:"idle_timeout,omitempty"`
DependsOn []string `yaml:"depends_on" json:"depends_on,omitempty"`
}
// NodeResult captures one node's execution outcome. Output is the
// mode's return map; Error is non-nil iff the mode returned an
// error. StartedAt + DurationMs feed observerd's provenance recording.
type NodeResult struct {
NodeID string `json:"node_id"`
Mode string `json:"mode"`
Output map[string]any `json:"output,omitempty"`
Error string `json:"error,omitempty"`
StartedAt time.Time `json:"started_at"`
DurationMs int64 `json:"duration_ms"`
}
// RunResult is the full workflow execution outcome — every node's
// result in execution order, plus the workflow name and a summary
// status (succeeded if every node ran without error, partial if any
// errored).
type RunResult struct {
Workflow string `json:"workflow"`
Status RunStatus `json:"status"`
Nodes []NodeResult `json:"nodes"`
StartedAt time.Time `json:"started_at"`
DurationMs int64 `json:"duration_ms"`
}
// RunStatus tags the overall workflow outcome.
type RunStatus string
const (
StatusSucceeded RunStatus = "succeeded"
StatusPartial RunStatus = "partial" // some nodes errored, others succeeded
StatusAborted RunStatus = "aborted" // hard error halted execution (cycle, missing dep, unknown mode)
)
// Mode is the function signature every registered capability honors.
// Input + output are generic maps so workflows compose freely; the
// mode function is responsible for shape-checking its own inputs.
//
// Returning an error doesn't abort the whole workflow — the runner
// records the error in NodeResult and continues with downstream
// nodes that don't depend on this one. That mirrors observerd's
// "log + continue" partial-failure semantics so a single mode bug
// doesn't kill a 7-node measurement chain.
type Mode func(ctx Context, input map[string]any) (map[string]any, error)
// Context is what a Mode receives. Carries the standard Go
// context.Context (for cancellation) plus a workflow-scoped
// metadata bag for cross-mode coordination (e.g. a workflow's
// model hint that llm.chat-style modes consume).
type Context struct {
Ctx context.Context
// WorkflowName is the parent workflow.Name — useful when a mode
// records ObservedOps so the source can be traced back to the
// workflow that triggered it.
WorkflowName string
// NodeID is the currently-executing node — paired with
// WorkflowName forms a unique provenance key.
NodeID string
// Provider + Model carry the workflow's defaults; modes that
// need them (llm.chat) pull from here, others ignore.
Provider string
Model string
}
// Errors surfaced to callers. Cycle / missing-dependency / unknown-
// mode are *aborting* errors — the runner can't proceed. Per-node
// mode errors are recorded but don't abort.
var (
ErrCycle = errors.New("workflow: dependency cycle detected")
ErrMissingDep = errors.New("workflow: node depends on unknown id")
ErrUnknownMode = errors.New("workflow: unknown mode")
ErrDuplicateNodeID = errors.New("workflow: duplicate node id")
ErrUnresolvedRef = errors.New("workflow: unresolved $node.output reference")
)
// Validate checks structural invariants on a Workflow before
// execution: unique node IDs, every depends_on points to a known
// id, no cycles. Returns nil on success or a wrapped sentinel.
func (w Workflow) Validate() error {
if w.Name == "" {
return fmt.Errorf("workflow: name is required")
}
if len(w.Nodes) == 0 {
return fmt.Errorf("workflow: at least one node required")
}
seen := make(map[string]struct{}, len(w.Nodes))
for _, n := range w.Nodes {
if n.ID == "" {
return fmt.Errorf("workflow: node id must be non-empty")
}
if _, dup := seen[n.ID]; dup {
return fmt.Errorf("%w: %q", ErrDuplicateNodeID, n.ID)
}
seen[n.ID] = struct{}{}
}
for _, n := range w.Nodes {
for _, dep := range n.DependsOn {
if _, ok := seen[dep]; !ok {
return fmt.Errorf("%w: node %q depends on %q (no such node)",
ErrMissingDep, n.ID, dep)
}
}
}
if cyclicID, ok := detectCycle(w.Nodes); ok {
return fmt.Errorf("%w: starting at node %q", ErrCycle, cyclicID)
}
return nil
}

View File

@ -12,9 +12,6 @@ ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
observerd_url = "http://127.0.0.1:3219"
[storaged]
bind = "127.0.0.1:3211"
@ -50,26 +47,6 @@ catalogd_url = "http://127.0.0.1:3212"
secrets_path = "/etc/lakehouse/secrets-go.toml"
refresh_every = "30s"
[pathwayd]
bind = "127.0.0.1:3217"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/pathway/state.jsonl so traces survive restart.
persist_path = ""
[matrixd]
bind = "127.0.0.1:3218"
# matrixd calls embedd (query-text → vector) and vectord (per-corpus
# search) directly. Localhost defaults; in distributed deployments
# these point at the gateway's upstream addresses.
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
[observerd]
bind = "127.0.0.1:3219"
# Empty = in-memory only (dev/test). Production sets a path under
# /var/lib/lakehouse/observer/ops.jsonl so ops survive restart.
persist_path = ""
[s3]
endpoint = "http://localhost:9000"
region = "us-east-1"

View File

@ -1,217 +0,0 @@
# Audit Re-run #2 — 2026-04-29 (after Phases AH + matrix §3.4 + workflow §3.8)
**Baseline audit:** `reports/scrum/golang-lakehouse-scrum-test.md` at commit `91edd43` — composite **35 / 60**.
**Rerun-1 head:** `4840c10` — composite **43 / 60** (Δ baseline = +8).
**Rerun-2 head:** `c7e3124`**30 commits past rerun-1**. Composite **50 / 60. Δ rerun-1 = +7. Δ baseline = +15.**
This is the second delta document. Both prior reports remain immutable history. Working tree was dirty on entry (5 in-flight files under `cmd/observerd/` + `internal/{observer,workflow}/`); audit ran on stashed-clean `c7e3124` so the score reflects shipped state, not WIP.
---
## What landed since rerun-1
| Commit | What |
|---|---|
| `4840c10` | (rerun-1 baseline — 04_query refresh-tick race fix) |
| `125e1c8` | tests close R-002 / R-003 / R-008 — `internal/{shared,storeclient,queryd/db}` Go tests |
| `6af0520` | A: fail-loud on non-loopback bind — closes worst case of R-001 |
| `423a381` | D: storaged per-prefix PUT cap — vectord `_vectors/` → 4 GiB (ADR-002) |
| `0d18ffa` | ADR-003: inter-service auth posture — Bearer + IP allowlist |
| `1ec85b0` | Batch 2: perf baseline — multi-sample + warmup + MAD threshold |
| `0f79bce` | Batch 3: `cmd/<bin>/main_test.go × 6` — closes R-005 |
| `fb08232` | Batch 4: embed fixture-mode — partial R-006 closure |
| `56844c3` | embed cache — LRU at `/v1/embed` for repeat-query elimination |
| `8f4c16f` | mcpd: Go MCP SDK port — replaces Bun mcp-server tool surface |
| `fa56134` | ADR-003 wiring: Bearer token + IP allowlist middleware |
| `ad1670d` | storaged cap smoke — verifies ADR-002 at 300 MiB |
| `2a6234f` | ADR-004 + `internal/pathway`: Mem0 versioned trace substrate |
| `afbb506` | pathwayd: HTTP service over `internal/pathway` · 11/11 smoke gate |
| `f1c1883` | vectord BatchAdd — single-lock variadic batch |
| `71b35fb` | SPEC §1 + §3.4: name matrix indexer as a port target |
| `a7620c8` | PRD: name the product vision — small-model pipeline + 5-loop substrate |
| `c1d96b7` | matrixd: multi-corpus retrieve+merge — SPEC §3.4 component 2 of 5 |
| `166470f` | corpusingest: extract reusable text→vector ingest pipeline |
| `0d1553c` | candidates corpus: first deep-field reality test on real staffing data |
| `9588bd8` | matrix relevance filter — SPEC §3.4 component 3 of 5 |
| `3968ec8` | matrix strong-model downgrade gate — SPEC §3.4 component 4 of 5 |
| `a97881d` | workers corpus + multi-corpus reality test — matrix indexer end-to-end |
| `31b4088` | multi_corpus_e2e WORKERS_LIMIT knob + embed-text-not-sample-size finding |
| `06e7152` | matrix playbook memory + boost — SPEC §3.4 component 5 of 5 (LEARNING LOOP) |
| `a730fc2` | scrum fixes: 4 real findings landed, 4 false positives dismissed |
| `7f42089` | D: embed-text iteration — clean negative finding (3 variants tested) |
| `57d0df1` | E (partial): distillation port — scorer + contamination firewall |
| `be65f85` | F: drift quantification — scorer drift first |
| `b199093` | B: matrix metadata filter — post-retrieval structured gate |
| `6392772` | C: bulk playbook record — operational rating wiring |
| `bc9ab93` | H: observerd — autonomous-iteration witness loop (SPEC §2 port) |
| `97dd3f8` | SPEC §3.5/§3.6/§3.7/§3.8 — name F/B/C as port targets + Archon-style workflow runner |
| `e30da6e` | §3.8 first slice: workflow runner skeleton + DAG executor + observerd integration |
| `c7e3124` | §3.8 second slice: real modes wired (matrix.relevance/downgrade/search, distillation.score, drift.scorer) |
This is the wave that took the system from "G0+G2 substrate plus 500K validation" to **"all five small-model-pipeline loops have at least a first port"** (per `project_small_model_pipeline_vision.md`).
---
## Score delta — double column
Same 6 dimensions, scored 010 with citations. `Δ R1` = vs rerun-1 (`4840c10`); `Δ Base` = vs original audit (`91edd43`).
| Dimension | Base | R1 | **R2** | Δ R1 | Δ Base | Evidence for the move |
|---|---:|---:|---:|---:|---:|---|
| **Reproducibility** | 7 | 9 | **9** | 0 | +2 | `just verify` PASS in 31s wall (`_evidence/rerun2/just_verify.log`) — vet + 30 packages of `go test -short` + 9 core smokes. `just doctor` all-green for go/gcc/minio/ollama/secrets. **8 additional domain smokes also PASS** (pathway, matrix, relevance, downgrade, observer, playbook, workflow, storaged_cap → `_evidence/rerun2/smoke_*.log`). New recipes: `smoke-g2-fixtures` (R-006 partial close) + `smoke-storaged-cap`. **Still 1**: no `.github/workflows/`; no fixture-mode for storage (only embed). |
| **Test Coverage** | 6 | 8 | **9** | +1 | +3 | **321 Go test functions** across 40 test files (was 13 at baseline, ~77 at R1 — **3× the test surface**). `internal/shared` has 4 test files (`auth_test.go`, `bind_test.go`, `config_test.go`, `server_test.go`); `internal/storeclient/client_test.go` exists; `internal/queryd/db_test.go` + `registrar_test.go` exist — **R-002 / R-003 / R-008 all closed**. Six original cmd binaries now have `main_test.go` (catalogd/embedd/ingestd/queryd/storaged/vectord) — **R-005 mostly closed**. **Still 1**: `cmd/{matrixd,observerd,pathwayd,fake_ollama}/main_test.go` absent — three of those are new daemons that need wiring tests. |
| **Trust Boundary Safety** | 7 | 7 | **9** | +2 | +2 | **ADR-003 shipped** (`docs/DECISIONS.md` §3): `internal/shared/auth.go` 64-line Bearer middleware with constant-time compare via `crypto/subtle` + IP allowlist (`internal/shared/auth.go:62-64`). 4 auth tests in `auth_test.go` cover wrong-token, raw-token-without-prefix, IP-only, both-required (`internal/shared/auth_test.go:77,86,108,162`). `redactCreds` still scrubs S3 keys from queryd error chain (`internal/queryd/db.go`). One `fmt.Sprintf` SQL site remains (`internal/queryd/registrar.go:153`) — properly escaped via `quoteIdent` + `sqlEscape`. 13 `MaxBytesReader` sites in cmd/, 5 loopback bindings. **Still 1**: auth is opt-in (empty token = G0 dev mode); no CORS posture (R-010); 2 `/home/profit/lakehouse/...` paths in `scripts/staffing_*/main.go` flag-defaults. |
| **Agent Memory Correctness** | 3 | 4 | **9** | +5 | +6 | **All five SPEC §3.4 components shipped**: corpus builders (`internal/corpusingest`), retrieve+merge (`matrixd /matrix/search`), relevance filter (`internal/matrix/relevance.go` 376 LoC + 289 LoC test), strong-model downgrade gate (`internal/matrix/downgrade.go` 137 LoC + 100 LoC test), playbook memory + boost (`internal/matrix/playbook.go` 196 LoC + 180 LoC test) — including the **learning loop**. Pathway substrate ratified (ADR-004, `internal/pathway/store.go` 381 LoC + 398 LoC test). **Mem0-style ops all proven**: `TestAdd_AssignsUIDAndTimestamps`, `TestUpdate_ReplacesContentSameUID`, `TestRevise_LinksToPredecessorViaHistory`, `TestRevise_ChainOfThree_BackwardWalk`, `TestRetire_ExcludedFromSearch`, `TestRetire_StillAccessibleViaGet`, `TestHistory_CycleDetected`, `TestHistory_PredecessorMissing_TruncatesChain`, `TestAddIdempotent_RejectsEmptyUID`**every Sprint 2 design-bar acceptance has a test**. Observer ported (`internal/observer/store.go` 249 LoC + 193 LoC test). pathway smoke 11/11. **Still 1**: distillation port partial (scorer + firewall only — `57d0df1` "E (partial)"); drift is "scorer drift first" (`be65f85`) not full quantification. |
| **Deployment Readiness** | 4 | 5 | **5** | 0 | +1 | `just doctor` actionable per-dep install (`scripts/doctor.sh`); `just install-hooks` documented; pre-push hook still installed. **Still 5**: no `REPLICATION.md`, no `secrets-go.toml.example`, no `deploy/systemd/*.service`, no `Dockerfile`, no readiness vs. liveness split. Sprint 4 stories all open. |
| **Maintainability** | 8 | 8 | **9** | +1 | +1 | **4 ADRs ratified** (was 1 at R1): ADR-001 foundational, ADR-002 storaged per-prefix cap, ADR-003 auth posture, ADR-004 pathway data model — **the auth + cap + memory-model decisions are locked before downstream code retrofits them**. Every binary still 100400 LoC (no god-files). Per-package test files: every `internal/` package has ≥1 test file (was: 5 packages had zero at baseline). `CLAUDE_REFACTOR_GUARDRAILS.md` codifies the maintenance discipline. `tests/proof/FINAL_REPORT.md` answers the 9 mandated questions. **Still 1**: no `CONTRIBUTING.md`; the proof harness adds 24-claim maintenance surface that needs keeping current. |
**Composite: 35 → 43 → 50. 83% of max.**
---
## Code surface delta
| Metric | Baseline (`91edd43`) | R1 (`4840c10`) | **R2 (`c7e3124`)** | Δ R1 |
|---|---:|---:|---:|---:|
| Total Go LoC | ~6,587 | ~7,800 (est) | **19,381** | ~2.5× |
| Go files | ~50 | ~62 | **93** | +31 |
| Test files | 13 | ~22 | **40** | +18 |
| Go test functions | ~77 | ~109 | **321** | +212 |
| `cmd/<bin>/` | 7 | 7 | **12** | +5 |
| `internal/<pkg>/` | 11 | 11 | **18** | +7 |
| Smoke scripts | 9 | 9 | **21** | +12 |
| ADRs ratified | 0 | 1 | **4** | +3 |
| Routes (cmd-level) | ~22 | ~22 | **37** | +15 |
| Untested cmd binaries | 6 / 7 | 6 / 7 | **4 / 12** | 2 abs, 1/3 ratio |
The wave is **substrate-bearing**, not throughput-bearing. Every internal package has tests; the gap is now the **wiring layer** for the 3 new daemons.
---
## Risk register status updates
12 risks in `reports/scrum/risk-register.md`. Status table at `c7e3124`:
| Risk | Severity | Before R2 | After R2 | Evidence |
|---|---|---|---|---|
| R-001 queryd /sql RCE-eq off-loopback | HIGH | open | **partial** | `6af0520` fail-loud on non-loopback bind (closes worst case); ADR-003 + `internal/shared/auth.go` available to wrap; **but auth is opt-in** — needs deploy story decision before fully closing |
| R-002 internal/shared zero tests | HIGH | open | **CLOSED** | 4 test files (`auth_test.go` + `bind_test.go` + `config_test.go` + `server_test.go`), all PASS in `just verify` |
| R-003 internal/storeclient zero tests | HIGH | open | **CLOSED** | `internal/storeclient/client_test.go`, PASS |
| R-004 smokes not gated | MED | closed (R1) | **CLOSED** | unchanged from R1 |
| R-005 6/7 cmd/main.go untested | MED | partial | **partial** | 6 of original 7 closed (`0f79bce` Batch 3); 4 new daemons (`fake_ollama`/`matrixd`/`observerd`/`pathwayd`) reopen the gap on different surface |
| R-006 no fixture-only smokes | MED | open | **partial** | `scripts/g2_smoke_fixtures.sh` (`fb08232`) closes embed half via fake_ollama; storage half deferred |
| R-007 zero auth middleware | MED | open | **partial** | `internal/shared/auth.go` shipped with 4 tests (`fa56134`); opt-in by default until deploy posture decision |
| R-008 queryd/db.go untested | MED | open | **CLOSED** | `internal/queryd/db_test.go` + `registrar_test.go` (`125e1c8`) |
| R-009 registrar.go fmt.Sprintf SQL | LOW | open | open | unchanged — escaping via `quoteIdent`+`sqlEscape` is correct, regression test still missing |
| R-010 no CORS posture | LOW | open | open | unchanged — no `Access-Control-*` headers anywhere |
| R-011 g2 smoke model assertion | LOW | note | note | unchanged |
| R-012 empty tests/ dir | LOW | closed (R1) | **CLOSED** | unchanged from R1 |
**Net since R1: 3 closed (R-002, R-003, R-008), 3 advanced to partial (R-001, R-006, R-007), R-005 stays partial on different surface, 3 unchanged.**
---
## Sprint backlog progress
### Sprint 0 — Reproducibility Gate
| Story | R1 | R2 |
|---|---|---|
| S0.1 `just doctor` | DONE | DONE |
| S0.2 `just smoke-fixtures` | open | **partial** (`smoke-g2-fixtures`) |
| S0.3 `just verify` + pre-push | DONE | DONE |
| S0.4 `cmd/<bin>/main_test.go` × 6 | partial | **partial → mostly DONE** (6 of original 7; 3 new daemons absent) |
| S0.5 internal/shared, storeclient, queryd/db tests | open | **DONE** |
| S0.6 `tests/` dir cleanup | DONE | DONE |
**4 of 6 done, 2 partial.** Highest-leverage open work: tests for the 3 new daemons + storage-half of fixture mode.
### Sprint 1 — Trust Boundary Gate
- Replace SQL string interp with parameterized: still 1 site, properly escaped (R-009 LOW)
- Observer fail-open → `degraded`/`cycle`: not yet codified — observer is ported but ADR-002-style fail-safe ADR not written
- Auth/localhost-only guardrails: **shipped** (ADR-003 + auth.go), opt-in posture
- Schema validation per public endpoint: per-handler validation exists (validateKey etc.); not framework-level
**Status: ~60% of Sprint 1 closed, observer fail-safe semantics ADR is the outstanding doc-only piece.**
### Sprint 2 — Memory Correctness Gate
| Story | R1 | R2 |
|---|---|---|
| ADD/UPDATE/REVISE/RETIRE/HISTORY tests | design-bar | **DONE** (`internal/pathway/store_test.go`) |
| Cycle detection tests | design-bar | **DONE** (`TestHistory_CycleDetected`) |
| Retired-trace exclusion tests | design-bar | **DONE** (`TestRetire_ExcludedFromSearch`) |
| Duplicate trace replay_count tests | design-bar | partial (`TestAddIdempotent_RejectsEmptyUID`; replay_count semantics) |
| Corrupted memory row recovery test | design-bar | open |
**Status: Sprint 2 acceptance criteria mostly green — the core invariants are tested. Audit/event receipt on every memory mutation is the missing piece.**
### Sprint 3 — Agent Loop Reality Gate
- Deterministic mini corpus: `tests/proof/fixtures/` exists
- search → verify → observer review → playbook seal → second-run retrieval: `scripts/multi_corpus_e2e.sh` + `scripts/playbook_smoke.sh` exercise this; full chain via `scripts/workflow_smoke.sh`
- Negative case observer rejects hallucinated claim: covered by observer_smoke (semantics open for review)
- Health endpoint content-type regression: covered by proof harness `00_health`
**Status: Sprint 3 has working substrate; explicit "single command proves the full loop" with input/output/verdict/receipt evidence is partial.**
### Sprint 4 — Deployment Gate
**Status: unchanged from R1.** No `REPLICATION.md`, no `.env.example`, no `*.service` units, no `Dockerfile`. `just doctor` is the closest piece. This is the largest open Sprint.
---
## New findings from this rerun
Two real findings worth recording.
### F1 — 3 new daemons lack `cmd/<bin>/main_test.go`
- **Where:** `cmd/matrixd/`, `cmd/observerd/`, `cmd/pathwayd/`
- **What:** Same gap-class as R-005 was, just on net-new code. Each daemon mounts ≥4 routes (matrixd: 6, observerd: 4, pathwayd: 9 → 19 routes total) with no wiring test.
- **Severity:** MEDIUM. The internal packages backing each daemon (`internal/matrix`, `internal/observer`, `internal/pathway`) have full unit tests — but no test proves `cmd/pathwayd/main.go` actually wires `/pathway/revise` to `(*pathway.Store).Revise`. A handler-rename refactor would silently break the route surface.
- **Action:** Re-open R-005 against the new daemons. ~1 hr to add three `main_test.go` files patterned on `cmd/storaged/main_test.go`.
### F2 — `scripts/staffing_*/main.go` has hardcoded data paths in flag defaults
- **Where:** `scripts/staffing_candidates/main.go:217` and `scripts/staffing_workers/main.go:269` reference `/home/profit/lakehouse/data/datasets/{candidates,workers_500k}.parquet`.
- **What:** Flag defaults reach into the Rust legacy tree at `/home/profit/lakehouse/...`. Throwaway driver scripts (not services), and the values are flag-overridable, but they couple the Go repo to the Rust filesystem layout.
- **Severity:** LOW. Doesn't affect any service. Worth noting because audit Sprint 4 explicitly calls out "no hardcoded `/home/profit` paths" as an acceptance criterion.
- **Action:** Either move the parquet under `golangLAKEHOUSE/data/` (preferred for self-containment) or document the cross-tree dependency in `RESEARCH_LOG_2026-04-28.md` and accept it.
---
## What this rerun does NOT change
- **Sprint 4 (deployment) remains the largest open gap.** R-1 said this; R-2 says this; without `REPLICATION.md` + systemd units, the cutover from Rust at `devop.live/lakehouse/` (G5) cannot be operator-validated.
- **Auth is opt-in.** Empty-token default is fine for G0 development but means the moment any Go binary binds non-loopback in prod, a posture decision is required. R-001 + R-007 cannot fully close until that decision is recorded.
- **CORS posture (R-010) is still unspecified.** The Bun-served Rust UI handles browser CORS today; if a Go service ever fronts a browser, this needs a decision.
- **Distillation and drift are first-port-only.** `57d0df1` ships scorer + contamination firewall (E partial); `be65f85` ships scorer-drift only (F first slice). The full distillation pipeline (sample export, audit_baselines lineage) and full drift signal are not yet ported.
---
## Recommended next moves (ordered by leverage / cost)
1. **Three `main_test.go` files for `matrixd` + `observerd` + `pathwayd`** (~1 hr). Closes the regenerated R-005, ratchets every future route addition through `just verify`.
2. **ADR-005: observer fail-safe semantics** (~30 min, doc-only). The observer is ported (`internal/observer/store.go`), but the upstream "verdict:accept on crash" anti-pattern still has no Go-side decision locked. Doing this now is half the cost of doing it after a regression.
3. **Auth posture decision for non-loopback deploy** (~1 hr, ADR or annotated decision in `RESEARCH_LOG`). Locks R-001 + R-007 from "opt-in middleware exists" to "wired-by-default for X, opt-in for Y". Required input for any G5 cutover plan.
4. **Sprint 4 minimal first slice** (~3 hr): `secrets-go.toml.example` + `deploy/systemd/<bin>.service.tmpl` × 12 binaries + `REPLICATION.md` skeleton. Highest-leverage Sprint 4 starter; the systemd units mostly mirror Rust's layout.
5. **Storage-half of fixture mode** (~3 hr): `MockS3Storage` interface satisfying `internal/storaged.Bucket`, smoke variant that points storaged at it. Closes R-006 fully and decouples CI from MinIO.
The remaining items (full drift port, full distillation port, observer audit-event receipt, corrupted-memory recovery test) are real engineering — Sprint 2/3 followups, not Sprint-0 polish.
---
## Methodology note — same as prior reports
All claims cite a file, line, or command. Evidence captured under `reports/scrum/_evidence/rerun2/`:
- `just_verify.log` — full vet + 30 packages × `go test -short` + 9 core smokes, exit 0, 31s wall
- `just_doctor.log` — 5 dependency probes, all green
- `govet.log``go vet ./...` exit 0
- `gotest_short.log` — full short-test pass
- `just_list.log` — recipe inventory
- `smoke_{pathway,matrix,relevance,downgrade,observer,playbook,workflow,storaged_cap}.log` — 8 additional domain smokes, all PASS
What was NOT inspected this round (deferred):
- Cross-binary failure cascades (kill matrixd mid-search, observe observerd state) — Sprint 1 follow-up
- Supply-chain audit of go.sum diffs since R1
- Performance regression vs the perf baseline shipped in `1ec85b0``just proof performance` exists, not run here
---
_Rerun-2 produced under the same "no vibes" rule as the original audit. The 50/60 reflects what's verifiably shipped at `c7e3124`, not what's planned. Working tree restored from stash after audit completion._

View File

@ -1,98 +0,0 @@
#!/usr/bin/env bash
# Candidates end-to-end — first deep-field reality test.
#
# Spins up storaged + embedd + vectord + matrixd + gateway, ingests
# the 1000-candidate corpus from
# /home/profit/lakehouse/data/datasets/candidates.parquet via the
# corpusingest substrate, then runs a real staffing query through
# /v1/matrix/search and prints the top 5 hits.
#
# Requires: Ollama on :11434 with nomic-embed-text loaded. If absent,
# this script exits 0 with a "skipped" message — same contract as
# g2_smoke.
#
# Usage: ./scripts/candidates_e2e.sh
# ./scripts/candidates_e2e.sh "your custom query here"
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
QUERY="${1:-Python AWS Docker engineer in Chicago available now}"
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
echo "[candidates-e2e] Ollama not reachable on :11434 — skipping (matches g2_smoke contract)"
exit 0
fi
echo "[candidates-e2e] building binaries..."
go build -o bin/ ./cmd/storaged ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway ./scripts/staffing_candidates
pkill -f "bin/(storaged|embedd|vectord|matrixd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/e2e.toml"
cleanup() {
echo "[candidates-e2e] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Custom toml: vectord persistence disabled so the candidates index
# doesn't survive the run. Without this, re-running pollutes the
# shared MinIO `_vectors/` prefix and breaks g1p_smoke's "this is
# the only persisted index" assertion (caught 2026-04-29).
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[candidates-e2e] launching stack..."
./bin/storaged -config "$CFG" > /tmp/storaged.log 2>&1 & PIDS+=($!)
poll_health 3211 || { echo "storaged failed"; tail /tmp/storaged.log; exit 1; }
./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
echo "[candidates-e2e] stack up; running ingest + reality test query..."
echo
./bin/staffing_candidates -query "$QUERY"

View File

@ -1,159 +0,0 @@
#!/usr/bin/env bash
# Downgrade smoke — strong-model auto-downgrade gate via matrixd.
# All assertions go through gateway :3110 → /v1/matrix/downgrade.
#
# Validates the 5-row truth table from mode.rs::execute pass5:
# 1. Lakehouse + strong + no force → DOWNGRADE
# 2. Lakehouse + strong + forced_mode=true → keep
# 3. Lakehouse + strong + force_full_override → keep
# 4. Lakehouse + weak (qwen3.5:latest) → keep
# 5. Non-lakehouse mode → gate not applicable
# 6. Negative path: empty mode → 400
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[downgrade-smoke] building matrixd + vectord + gateway..."
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/downgrade.toml"
cleanup() {
echo "[downgrade-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[downgrade-smoke] launching vectord → matrixd → gateway..."
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; exit 1; }
FAILED=0
URL=http://127.0.0.1:3110/v1/matrix/downgrade
# Helper for body→{mode, downgraded_from} extraction.
post() {
curl -sS -X POST "$URL" -H 'Content-Type: application/json' -d "$1"
}
# ── 1. Downgrade fires ───────────────────────────────────────────
echo "[downgrade-smoke] strong model + no force → downgrade fires:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from')"
if [ "$M" = "codereview_isolation" ] && [ "$D" = "codereview_lakehouse" ]; then
echo " ✓ codereview_lakehouse → codereview_isolation (downgraded_from=lakehouse)"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 2. Forced mode bypasses ──────────────────────────────────────
echo "[downgrade-smoke] forced_mode=true bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","forced_mode":true}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ caller-forced mode preserved, no downgrade"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 3. force_full_override bypasses ──────────────────────────────
echo "[downgrade-smoke] force_full_override=true bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"x-ai/grok-4.1-fast","force_full_override":true}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ env-override bypass, no downgrade"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 4. Weak model bypasses ───────────────────────────────────────
echo "[downgrade-smoke] weak model (qwen3.5:latest) bypasses:"
RESP="$(post '{"mode":"codereview_lakehouse","model":"qwen3.5:latest"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
if [ "$M" = "codereview_lakehouse" ] && [ "$D" = "" ]; then
echo " ✓ weak model keeps lakehouse"
else
echo " ✗ mode=$M downgraded_from=$D"; FAILED=1
fi
# ── 5. Non-lakehouse mode → gate not applicable ──────────────────
echo "[downgrade-smoke] non-lakehouse mode → gate not applicable:"
RESP="$(post '{"mode":"codereview_isolation","model":"x-ai/grok-4.1-fast"}')"
M="$(echo "$RESP" | jq -r '.mode')"
D="$(echo "$RESP" | jq -r '.downgraded_from // ""')"
R="$(echo "$RESP" | jq -r '.reason')"
if [ "$M" = "codereview_isolation" ] && [ "$D" = "" ] && echo "$R" | grep -q "not applicable"; then
echo " ✓ codereview_isolation passes through unchanged"
else
echo " ✗ mode=$M downgraded_from=$D reason='$R'"; FAILED=1
fi
# ── 6. Negative: empty mode → 400 ────────────────────────────────
echo "[downgrade-smoke] empty mode → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST "$URL" \
-H 'Content-Type: application/json' -d '{"mode":"","model":"x"}')"
if [ "$HTTP" = "400" ]; then
echo " ✓ empty mode → 400"
else
echo " ✗ got $HTTP"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[downgrade-smoke] Downgrade gate acceptance: PASSED"
exit 0
else
echo "[downgrade-smoke] Downgrade gate acceptance: FAILED"
exit 1
fi

View File

@ -1,230 +0,0 @@
#!/usr/bin/env bash
# Matrix smoke — multi-corpus retrieve+merge via matrixd (SPEC §3.4).
# All assertions go through gateway :3110.
#
# Validates:
# - Multi-corpus search returns hits from BOTH corpora
# - Each result carries its corpus attribution (load-bearing — losing
# it defeats the matrix's purpose)
# - Merged top-k is ordered by distance across corpora
# - /matrix/corpora lists known indexes
# - Empty corpora list → 400
# - Bad corpus name → 502 (matrix bubbles vectord's 404 as upstream error)
#
# Uses query_vector (not query_text) to skip the embedd dependency so
# this smoke runs without Ollama. End-to-end embed→matrix→search has
# its own integration test (next commit).
#
# Usage: ./scripts/matrix_smoke.sh
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[matrix-smoke] building matrixd + vectord + gateway..."
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/matrix.toml"
cleanup() {
echo "[matrix-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Custom toml: vectord persistence disabled (don't pollute storaged
# state with the test corpora).
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[matrix-smoke] launching vectord → matrixd → gateway..."
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
DIM=4
# Create two corpora — corpus_a and corpus_b — each with a few
# vectors at known distances from a chosen query vector.
echo "[matrix-smoke] create two corpora:"
for c in corpus_a corpus_b; do
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/vectors/index \
-H 'Content-Type: application/json' \
-d "{\"name\":\"$c\",\"dimension\":$DIM,\"distance\":\"euclidean\"}")"
if [ "$HTTP" != "201" ]; then echo " ✗ create $c$HTTP"; FAILED=1; fi
done
echo " ✓ corpus_a and corpus_b created"
# Add vectors. Use euclidean distance for predictable arithmetic.
# Query vector will be [1,0,0,0]. Distances from it:
# corpus_a/a-near : [1.1, 0, 0, 0] ≈ 0.1
# corpus_a/a-mid : [1, 0.5, 0, 0] ≈ 0.5
# corpus_a/a-far : [3, 0, 0, 0] ≈ 2.0
# corpus_b/b-near : [1.05, 0, 0, 0] ≈ 0.05 (closest globally)
# corpus_b/b-mid : [1, 0.7, 0, 0] ≈ 0.7
# corpus_b/b-far : [4, 0, 0, 0] ≈ 3.0
echo "[matrix-smoke] add vectors to both corpora:"
curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_a/add" \
-H 'Content-Type: application/json' \
-d '{"items":[
{"id":"a-near","vector":[1.1,0,0,0],"metadata":{"label":"a near"}},
{"id":"a-mid","vector":[1,0.5,0,0],"metadata":{"label":"a mid"}},
{"id":"a-far","vector":[3,0,0,0],"metadata":{"label":"a far"}}
]}'
curl -sS -o /dev/null -X POST "http://127.0.0.1:3110/v1/vectors/index/corpus_b/add" \
-H 'Content-Type: application/json' \
-d '{"items":[
{"id":"b-near","vector":[1.05,0,0,0],"metadata":{"label":"b near"}},
{"id":"b-mid","vector":[1,0.7,0,0],"metadata":{"label":"b mid"}},
{"id":"b-far","vector":[4,0,0,0],"metadata":{"label":"b far"}}
]}'
echo " ✓ 3 + 3 vectors loaded"
# ── 1. /matrix/corpora lists both ─────────────────────────────────
echo "[matrix-smoke] /matrix/corpora lists both:"
RESP="$(curl -sS http://127.0.0.1:3110/v1/matrix/corpora)"
COUNT="$(echo "$RESP" | jq -r '.count')"
HAS_A="$(echo "$RESP" | jq -r '.corpora | index("corpus_a") != null')"
HAS_B="$(echo "$RESP" | jq -r '.corpora | index("corpus_b") != null')"
if [ "$COUNT" = "2" ] && [ "$HAS_A" = "true" ] && [ "$HAS_B" = "true" ]; then
echo " ✓ count=2, both corpora listed"
else
echo " ✗ resp: $RESP"; FAILED=1
fi
# ── 2. multi-corpus search returns hits from BOTH ─────────────────
echo "[matrix-smoke] /matrix/search multi-corpus retrieve+merge:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d '{"query_vector":[1,0,0,0],"corpora":["corpus_a","corpus_b"],"k":4,"per_corpus_k":3}')"
RESULTS_LEN="$(echo "$RESP" | jq -r '.results | length')"
A_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_a')"
B_COUNT="$(echo "$RESP" | jq -r '.per_corpus_counts.corpus_b')"
HAS_A_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_a")] | length > 0')"
HAS_B_RESULT="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="corpus_b")] | length > 0')"
if [ "$RESULTS_LEN" = "4" ] && [ "$A_COUNT" = "3" ] && [ "$B_COUNT" = "3" ] && [ "$HAS_A_RESULT" = "true" ] && [ "$HAS_B_RESULT" = "true" ]; then
echo " ✓ 4 merged results · 3+3 per-corpus · both corpora represented"
else
echo " ✗ len=$RESULTS_LEN per_corpus={a:$A_COUNT b:$B_COUNT} a_hit=$HAS_A_RESULT b_hit=$HAS_B_RESULT"
echo " full: $RESP"
FAILED=1
fi
# ── 3. distance-merged top-k correct across corpora ───────────────
echo "[matrix-smoke] top hit comes from corpus_b (b-near is globally closest):"
TOP_ID="$(echo "$RESP" | jq -r '.results[0].id')"
TOP_CORPUS="$(echo "$RESP" | jq -r '.results[0].corpus')"
if [ "$TOP_ID" = "b-near" ] && [ "$TOP_CORPUS" = "corpus_b" ]; then
echo " ✓ top hit: id=b-near corpus=corpus_b (closer than corpus_a's a-near)"
else
echo " ✗ top: id=$TOP_ID corpus=$TOP_CORPUS (expected b-near/corpus_b)"
FAILED=1
fi
# ── 4. corpus attribution preserved in metadata ───────────────────
echo "[matrix-smoke] metadata preserved on merged results:"
TOP_LABEL="$(echo "$RESP" | jq -r '.results[0].metadata.label')"
if [ "$TOP_LABEL" = "b near" ]; then
echo " ✓ metadata.label round-trips through matrix"
else
echo " ✗ label=$TOP_LABEL"; FAILED=1
fi
# ── 5. distances ascending in result list ─────────────────────────
echo "[matrix-smoke] results sorted by distance ascending:"
ASCENDING="$(echo "$RESP" | jq -r '[.results[].distance] | . == (sort)')"
if [ "$ASCENDING" = "true" ]; then
echo " ✓ distances ascending"
else
echo " ✗ distances not sorted: $(echo "$RESP" | jq -c '[.results[].distance]')"
FAILED=1
fi
# ── 6. negative paths ─────────────────────────────────────────────
echo "[matrix-smoke] empty corpora → 400:"
HTTP_400="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d '{"query_vector":[1,0,0,0],"corpora":[],"k":4}')"
echo "[matrix-smoke] missing corpus name → 502:"
HTTP_502="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d '{"query_vector":[1,0,0,0],"corpora":["does_not_exist"],"k":4}')"
echo "[matrix-smoke] no query (empty text and vector) → 400:"
HTTP_400b="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d '{"corpora":["corpus_a"],"k":4}')"
if [ "$HTTP_400" = "400" ] && [ "$HTTP_502" = "502" ] && [ "$HTTP_400b" = "400" ]; then
echo " ✓ empty=400, missing-corpus=502, no-query=400"
else
echo " ✗ empty=$HTTP_400 missing=$HTTP_502 noquery=$HTTP_400b"
FAILED=1
fi
# ── 7. metadata filter (component B — staffing-side structured gate)
echo "[matrix-smoke] metadata_filter drops non-matching results:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d '{"query_vector":[1,0,0,0],"corpora":["corpus_a","corpus_b"],"k":4,"per_corpus_k":3,
"metadata_filter":{"label":["a near","b near"]}}')"
RESULTS_LEN="$(echo "$RESP" | jq -r '.results | length')"
DROPPED="$(echo "$RESP" | jq -r '.metadata_filter_dropped')"
KEPT_LABELS="$(echo "$RESP" | jq -r '[.results[].metadata.label] | sort | join(",")')"
if [ "$RESULTS_LEN" = "2" ] && [ "$DROPPED" = "4" ] && [ "$KEPT_LABELS" = "a near,b near" ]; then
echo " ✓ filter kept 2 ('a near' + 'b near'), dropped 4 mid/far entries"
else
echo " ✗ len=$RESULTS_LEN dropped=$DROPPED labels=$KEPT_LABELS"
echo " full: $RESP"
FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[matrix-smoke] Matrix acceptance gate: PASSED"
exit 0
else
echo "[matrix-smoke] Matrix acceptance gate: FAILED"
exit 1
fi

View File

@ -1,132 +0,0 @@
#!/usr/bin/env bash
# Multi-corpus reality test — first deep-field test with TWO real
# staffing corpora composed via /v1/matrix/search.
#
# Pipeline:
# - Bring up the Go stack (storaged, embedd, vectord, matrixd, gateway)
# - Ingest workers (5000 rows from workers_500k.parquet)
# - Ingest candidates (1000 rows from candidates.parquet)
# - Run a real query through /v1/matrix/search with both corpora
# - Print the merged top-k with corpus attribution
#
# Headline assertion: results include hits from BOTH corpora (the
# whole point of multi-corpus matrix retrieval).
#
# Requires: Ollama on :11434 with nomic-embed-text loaded. Skips
# (exit 0) when Ollama is absent.
#
# Usage: ./scripts/multi_corpus_e2e.sh
# ./scripts/multi_corpus_e2e.sh "your custom query"
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
QUERY="${1:-Forklift operator with OSHA-30 certification, warehouse experience}"
WORKERS_LIMIT="${WORKERS_LIMIT:-5000}"
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
echo "[multi-corpus-e2e] Ollama not reachable on :11434 — skipping"
exit 0
fi
echo "[multi-corpus-e2e] building binaries..."
go build -o bin/ ./cmd/storaged ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway \
./scripts/staffing_workers ./scripts/staffing_candidates
pkill -f "bin/(storaged|embedd|vectord|matrixd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/e2e.toml"
cleanup() {
echo "[multi-corpus-e2e] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Ephemeral mode (vectord storaged_url=""); same rationale as
# candidates_e2e — don't pollute MinIO _vectors/ between runs.
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[multi-corpus-e2e] launching stack..."
./bin/storaged -config "$CFG" > /tmp/storaged.log 2>&1 & PIDS+=($!)
poll_health 3211 || { echo "storaged failed"; exit 1; }
./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
poll_health 3216 || { echo "embedd failed"; exit 1; }
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; exit 1; }
echo
echo "[multi-corpus-e2e] ingest workers (limit=$WORKERS_LIMIT)..."
./bin/staffing_workers -limit "$WORKERS_LIMIT"
echo
echo "[multi-corpus-e2e] ingest candidates..."
./bin/staffing_candidates -skip-populate=false -query "$QUERY" 2>&1 | grep -v "^\[candidates\]\(matrix\|reality\)" || true
echo
echo "[multi-corpus-e2e] /matrix/corpora — confirm both registered:"
curl -sS http://127.0.0.1:3110/v1/matrix/corpora | jq -c
echo
echo "[multi-corpus-e2e] multi-corpus query: $QUERY"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d "{\"query_text\":\"$QUERY\",\"corpora\":[\"workers\",\"candidates\"],\"k\":8,\"per_corpus_k\":6}")"
# Sanity / headline assertions
WORKER_HITS="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="workers")] | length')"
CAND_HITS="$(echo "$RESP" | jq -r '[.results[] | select(.corpus=="candidates")] | length')"
TOTAL="$(echo "$RESP" | jq -r '.results | length')"
echo
echo "[multi-corpus-e2e] merged top-$TOTAL: workers=$WORKER_HITS candidates=$CAND_HITS"
echo "$RESP" | jq -r '.results[] | " \(.corpus | .[0:1]) d=\(.distance | tostring | .[0:6]) \(.id) \(.metadata.role // .metadata.skills // "n/a")"'
if [ "$WORKER_HITS" -gt 0 ] && [ "$CAND_HITS" -gt 0 ]; then
echo
echo "[multi-corpus-e2e] PASS: both corpora represented in merged top-$TOTAL"
exit 0
else
echo
echo "[multi-corpus-e2e] FAIL: corpus mix was workers=$WORKER_HITS candidates=$CAND_HITS"
exit 1
fi

View File

@ -1,142 +0,0 @@
#!/usr/bin/env bash
# Observer smoke — autonomous-iteration witness service end-to-end.
# All assertions go through gateway :3110.
#
# Validates:
# - POST /observer/event records an op (success path + scenario source)
# - GET /observer/stats aggregates by source + counts successes/failures
# - Stats.recent_scenario_ops surfaces scenario digests
# - Validation: empty endpoint → 400
# - Persistence: kill+restart observerd preserves ops via JSONL replay
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[observer-smoke] building observerd + gateway..."
go build -o bin/ ./cmd/observerd ./cmd/gateway
pkill -f "bin/(observerd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
PERSIST="$TMP/ops.jsonl"
CFG="$TMP/observer.toml"
cleanup() {
echo "[observer-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
observerd_url = "http://127.0.0.1:3219"
[observerd]
bind = "127.0.0.1:3219"
persist_path = "$PERSIST"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
launch_observerd() {
./bin/observerd -config "$CFG" > /tmp/observerd.log 2>&1 &
OBSERVERD_PID=$!
PIDS+=($OBSERVERD_PID)
poll_health 3219 || { echo "observerd failed"; tail /tmp/observerd.log; return 1; }
}
echo "[observer-smoke] launching observerd → gateway..."
launch_observerd
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
# ── 1. Record 5 ops: 3 success + 2 fail across 2 sources ─────────
echo "[observer-smoke] record 5 ops:"
for i in 1 2 3; do
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/observer/event \
-H 'Content-Type: application/json' \
-d "{\"endpoint\":\"/v1/test\",\"input_summary\":\"ok-$i\",\"success\":true,\"duration_ms\":10,\"output_summary\":\"ok\",\"source\":\"mcp\"}"
done
for i in 1 2; do
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/observer/event \
-H 'Content-Type: application/json' \
-d "{\"endpoint\":\"/v1/test\",\"input_summary\":\"fail-$i\",\"success\":false,\"duration_ms\":10,\"output_summary\":\"err\",\"error\":\"boom\",\"source\":\"scenario\",\"staffer_id\":\"st-$i\",\"event_kind\":\"fill\",\"role\":\"Forklift\"}"
done
echo " ✓ 5 events posted"
# ── 2. Stats aggregation ─────────────────────────────────────────
echo "[observer-smoke] /observer/stats aggregates correctly:"
STATS="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
TOT="$(echo "$STATS" | jq -r '.total')"
OK="$(echo "$STATS" | jq -r '.successes')"
ERR="$(echo "$STATS" | jq -r '.failures')"
MCP="$(echo "$STATS" | jq -r '.by_source.mcp')"
SCEN="$(echo "$STATS" | jq -r '.by_source.scenario')"
RECENT_LEN="$(echo "$STATS" | jq -r '.recent_scenario_ops | length')"
if [ "$TOT" = "5" ] && [ "$OK" = "3" ] && [ "$ERR" = "2" ] && [ "$MCP" = "3" ] && [ "$SCEN" = "2" ] && [ "$RECENT_LEN" = "2" ]; then
echo " ✓ total=5 (3 ok + 2 fail) · by_source: mcp=3 scenario=2 · 2 scenario digests"
else
echo " ✗ total=$TOT ok=$OK err=$ERR mcp=$MCP scen=$SCEN recent=$RECENT_LEN"
echo " full: $STATS"
FAILED=1
fi
# ── 3. Validation: empty endpoint → 400 ──────────────────────────
echo "[observer-smoke] empty endpoint → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/observer/event \
-H 'Content-Type: application/json' \
-d '{"endpoint":"","input_summary":"x","success":true,"duration_ms":1,"output_summary":"x"}')"
if [ "$HTTP" = "400" ]; then
echo " ✓ empty endpoint rejected"
else
echo " ✗ got $HTTP"; FAILED=1
fi
# ── 4. Persistence: kill + restart preserves ops ─────────────────
echo "[observer-smoke] kill + restart observerd → ops survive:"
kill $OBSERVERD_PID 2>/dev/null || true
wait $OBSERVERD_PID 2>/dev/null || true
sleep 0.3
launch_observerd
sleep 0.2
STATS2="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
TOT2="$(echo "$STATS2" | jq -r '.total')"
OK2="$(echo "$STATS2" | jq -r '.successes')"
ERR2="$(echo "$STATS2" | jq -r '.failures')"
if [ "$TOT2" = "5" ] && [ "$OK2" = "3" ] && [ "$ERR2" = "2" ]; then
echo " ✓ total=5 ok=3 err=2 preserved through restart"
else
echo " ✗ post-restart total=$TOT2 ok=$OK2 err=$ERR2"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[observer-smoke] Observer acceptance gate: PASSED"
exit 0
else
echo "[observer-smoke] Observer acceptance gate: FAILED"
exit 1
fi

View File

@ -1,248 +0,0 @@
#!/usr/bin/env bash
# Pathway smoke — pathwayd Mem0-style versioned trace memory (ADR-004).
# All assertions go through gateway :3110.
#
# Validates:
# - All 9 HTTP routes (add, add_idempotent, update, revise, retire,
# get, history, search, stats)
# - Revise creates a predecessor link; History walks the chain
# backward (the audit-trail property pathway memory exists for)
# - Retire excludes from Search default; still accessible via Get
# - AddIdempotent on existing UID bumps replay_count, doesn't replace
# - Negative paths: 404 on unknown UIDs, 404 on missing predecessor,
# 400 on invalid content
# - Persistence: kill + restart pathwayd → all traces survive
#
# Usage: ./scripts/pathway_smoke.sh
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[pathway-smoke] building pathwayd + gateway..."
go build -o bin/ ./cmd/pathwayd ./cmd/gateway
pkill -f "bin/(pathwayd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
PERSIST="$TMP/pathway.jsonl"
CFG="$TMP/pathwayd.toml"
cleanup() {
echo "[pathway-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Custom toml — same defaults as lakehouse.toml but with persist_path
# pointing at the temp file so kill+restart actually rehydrates.
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
[pathwayd]
bind = "127.0.0.1:3217"
persist_path = "$PERSIST"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
launch_pathwayd() {
./bin/pathwayd -config "$CFG" > /tmp/pathwayd.log 2>&1 &
PATHWAYD_PID=$!
PIDS+=($PATHWAYD_PID)
poll_health 3217 || { echo "pathwayd failed"; tail /tmp/pathwayd.log; return 1; }
}
launch_gateway() {
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; return 1; }
}
echo "[pathway-smoke] launching pathwayd → gateway..."
launch_pathwayd
launch_gateway
FAILED=0
# ── 1. Add ────────────────────────────────────────────────────────
echo "[pathway-smoke] Add → fresh UID + replay_count=1:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/add \
-H 'Content-Type: application/json' \
-d '{"content":{"approach":"forklift-OSHA-30","outcome":"hired"},"tags":["staffing","fill"]}')"
UID_A="$(echo "$RESP" | jq -r '.uid')"
RC_A="$(echo "$RESP" | jq -r '.replay_count')"
if [ -n "$UID_A" ] && [ "$UID_A" != "null" ] && [ "$RC_A" = "1" ]; then
echo " ✓ uid=$UID_A replay_count=1"
else
echo " ✗ resp: $RESP"; FAILED=1
fi
# ── 2. Get ────────────────────────────────────────────────────────
echo "[pathway-smoke] Get → returns same trace:"
RESP="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A")"
APPROACH="$(echo "$RESP" | jq -r '.content.approach')"
if [ "$APPROACH" = "forklift-OSHA-30" ]; then
echo " ✓ content.approach round-trips"
else
echo " ✗ resp: $RESP"; FAILED=1
fi
# ── 3. AddIdempotent (replay) ─────────────────────────────────────
echo "[pathway-smoke] AddIdempotent same UID → replay_count++:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/add_idempotent \
-H 'Content-Type: application/json' \
-d "{\"uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30\",\"outcome\":\"hired\"}}")"
RC_REPLAY="$(echo "$RESP" | jq -r '.replay_count')"
if [ "$RC_REPLAY" = "2" ]; then
echo " ✓ replay_count bumped to 2"
else
echo " ✗ replay_count=$RC_REPLAY"; FAILED=1
fi
# ── 4. Update ─────────────────────────────────────────────────────
echo "[pathway-smoke] Update → in-place content replace:"
HTTP="$(curl -sS -o "$TMP/upd.json" -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/update \
-H 'Content-Type: application/json' \
-d "{\"uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30\",\"outcome\":\"hired\",\"note\":\"cert verified\"}}")"
if [ "$HTTP" = "200" ]; then
NOTE="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A" | jq -r '.content.note')"
if [ "$NOTE" = "cert verified" ]; then
echo " ✓ Update applied and persisted"
else
echo " ✗ note=$NOTE after update"; FAILED=1
fi
else
echo " ✗ Update HTTP=$HTTP"; FAILED=1
fi
# ── 5. Revise → predecessor link ──────────────────────────────────
echo "[pathway-smoke] Revise → new UID with predecessor link:"
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/revise \
-H 'Content-Type: application/json' \
-d "{\"predecessor_uid\":\"$UID_A\",\"content\":{\"approach\":\"forklift-OSHA-30+CDL\",\"outcome\":\"upgraded\"},\"tags\":[\"staffing\",\"revision\"]}")"
UID_B="$(echo "$RESP" | jq -r '.uid')"
PRED="$(echo "$RESP" | jq -r '.predecessor_uid')"
if [ "$UID_B" != "$UID_A" ] && [ "$PRED" = "$UID_A" ]; then
echo " ✓ revision uid=$UID_B predecessor=$UID_A"
else
echo " ✗ uid=$UID_B pred=$PRED"; FAILED=1
fi
# ── 6. History → 2-trace chain ────────────────────────────────────
echo "[pathway-smoke] History → walks chain backward:"
RESP="$(curl -sS "http://127.0.0.1:3110/v1/pathway/history/$UID_B")"
LEN="$(echo "$RESP" | jq -r '.length')"
HEAD="$(echo "$RESP" | jq -r '.chain[0].uid')"
TAIL="$(echo "$RESP" | jq -r '.chain[1].uid')"
if [ "$LEN" = "2" ] && [ "$HEAD" = "$UID_B" ] && [ "$TAIL" = "$UID_A" ]; then
echo " ✓ chain length=2, [0]=$UID_B [1]=$UID_A"
else
echo " ✗ len=$LEN head=$HEAD tail=$TAIL"; FAILED=1
fi
# ── 7. Search by tag ──────────────────────────────────────────────
echo "[pathway-smoke] Search tag=staffing → finds both traces:"
COUNT="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-H 'Content-Type: application/json' -d '{"tag":"staffing"}' | jq -r '.count')"
if [ "$COUNT" = "2" ]; then
echo " ✓ tag search count=2"
else
echo " ✗ count=$COUNT"; FAILED=1
fi
# ── 8. Retire → excluded from search default, still in Get ────────
echo "[pathway-smoke] Retire → excluded from Search but Get-able:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/retire \
-H 'Content-Type: application/json' -d "{\"uid\":\"$UID_A\"}")"
if [ "$HTTP" != "204" ]; then echo " ✗ retire HTTP=$HTTP"; FAILED=1; fi
# Default search excludes retired → only revision (UID_B) remains
COUNT_DEFAULT="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-H 'Content-Type: application/json' -d '{"tag":"staffing"}' | jq -r '.count')"
# IncludeRetired=true brings UID_A back
COUNT_ALL="$(curl -sS -X POST http://127.0.0.1:3110/v1/pathway/search \
-H 'Content-Type: application/json' -d '{"tag":"staffing","include_retired":true}' | jq -r '.count')"
# Get on retired UID still returns the trace (audit trail intact)
RETIRED_FLAG="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A" | jq -r '.retired')"
if [ "$COUNT_DEFAULT" = "1" ] && [ "$COUNT_ALL" = "2" ] && [ "$RETIRED_FLAG" = "true" ]; then
echo " ✓ retired excluded from default Search, included with flag, still Get-able"
else
echo " ✗ default=$COUNT_DEFAULT all=$COUNT_ALL retired=$RETIRED_FLAG"; FAILED=1
fi
# ── 9. Stats ──────────────────────────────────────────────────────
echo "[pathway-smoke] Stats → total/active/retired counters:"
STATS="$(curl -sS http://127.0.0.1:3110/v1/pathway/stats)"
T="$(echo "$STATS" | jq -r '.Total')"
A="$(echo "$STATS" | jq -r '.Active')"
R="$(echo "$STATS" | jq -r '.Retired')"
if [ "$T" = "2" ] && [ "$A" = "1" ] && [ "$R" = "1" ]; then
echo " ✓ total=2 active=1 retired=1"
else
echo " ✗ total=$T active=$A retired=$R"; FAILED=1
fi
# ── 10. Negative paths ────────────────────────────────────────────
echo "[pathway-smoke] Negative paths → 4xx semantics:"
GET_404="$(curl -sS -o /dev/null -w '%{http_code}' http://127.0.0.1:3110/v1/pathway/get/no-such-uid)"
UPD_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/update \
-H 'Content-Type: application/json' -d '{"uid":"no-such-uid","content":{}}')"
REV_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/revise \
-H 'Content-Type: application/json' -d '{"predecessor_uid":"no-such-uid","content":{}}')"
RET_404="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/retire \
-H 'Content-Type: application/json' -d '{"uid":"no-such-uid"}')"
ADD_400="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/pathway/add \
-H 'Content-Type: application/json' -d '{"content":not-json}')"
if [ "$GET_404" = "404" ] && [ "$UPD_404" = "404" ] && [ "$REV_404" = "404" ] && [ "$RET_404" = "404" ] && [ "$ADD_400" = "400" ]; then
echo " ✓ get/update/revise/retire on unknown → 404; bad content → 400"
else
echo " ✗ get=$GET_404 upd=$UPD_404 rev=$REV_404 ret=$RET_404 add=$ADD_400"; FAILED=1
fi
# ── 11. Persistence → kill + restart preserves all traces ─────────
echo "[pathway-smoke] kill + restart pathwayd → state survives:"
kill $PATHWAYD_PID 2>/dev/null || true
wait $PATHWAYD_PID 2>/dev/null || true
sleep 0.3
launch_pathwayd
sleep 0.2
# Both traces should reappear, retired flag preserved, replay_count preserved
RESP_A="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_A")"
RESP_B="$(curl -sS "http://127.0.0.1:3110/v1/pathway/get/$UID_B")"
RC_AFTER="$(echo "$RESP_A" | jq -r '.replay_count')"
RETIRED_AFTER="$(echo "$RESP_A" | jq -r '.retired')"
PRED_AFTER="$(echo "$RESP_B" | jq -r '.predecessor_uid')"
if [ "$RC_AFTER" = "2" ] && [ "$RETIRED_AFTER" = "true" ] && [ "$PRED_AFTER" = "$UID_A" ]; then
echo " ✓ replay_count, retired flag, predecessor link all preserved"
else
echo " ✗ replay_count=$RC_AFTER retired=$RETIRED_AFTER pred=$PRED_AFTER"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[pathway-smoke] Pathway acceptance gate: PASSED"
exit 0
else
echo "[pathway-smoke] Pathway acceptance gate: FAILED"
exit 1
fi

View File

@ -1,198 +0,0 @@
#!/usr/bin/env bash
# Playbook smoke — learning-loop integration end-to-end.
# All assertions go through gateway :3110.
#
# Validates the full boost cycle:
# 1. Build a test corpus with 3 items
# 2. Query → get baseline ranking
# 3. Record a playbook: query → bottom-ranked answer with score=1.0
# 4. Re-query with use_playbook=true
# 5. Assert: the recorded answer's distance ≈ 0.5 × baseline (boost
# math: distance' = distance × (1 - 0.5×score))
# 6. Assert: PlaybookBoosted >= 1 in the response
#
# Requires Ollama on :11434 with nomic-embed-text loaded — Record
# embeds the query_text. Skips (exit 0) when Ollama is absent.
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
if ! curl -sS --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
echo "[playbook-smoke] Ollama not reachable on :11434 — skipping"
exit 0
fi
echo "[playbook-smoke] building stack..."
go build -o bin/ ./cmd/embedd ./cmd/vectord ./cmd/matrixd ./cmd/gateway
pkill -f "bin/(embedd|vectord|matrixd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/playbook.toml"
cleanup() {
echo "[playbook-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[playbook-smoke] launching embedd → vectord → matrixd → gateway..."
./bin/embedd -config "$CFG" > /tmp/embedd.log 2>&1 & PIDS+=($!)
poll_health 3216 || { echo "embedd failed"; tail /tmp/embedd.log; exit 1; }
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 & PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 & PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 & PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
# Embed three corpus items + the query, all via /v1/embed.
echo "[playbook-smoke] embedding 3 corpus items + query..."
EMBEDS="$(curl -sS -X POST http://127.0.0.1:3110/v1/embed \
-H 'Content-Type: application/json' \
-d '{"texts":["alpha staffing query test","bravo distinct content","charlie unrelated topic","alpha staffing query test full prompt"]}')"
V_A="$(echo "$EMBEDS" | jq -c '.vectors[0]')"
V_B="$(echo "$EMBEDS" | jq -c '.vectors[1]')"
V_C="$(echo "$EMBEDS" | jq -c '.vectors[2]')"
V_Q="$(echo "$EMBEDS" | jq -c '.vectors[3]')"
# Build corpus
echo "[playbook-smoke] create corpus widgets + add 3 items..."
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index \
-H 'Content-Type: application/json' \
-d '{"name":"widgets","dimension":768,"distance":"cosine"}'
curl -sS -o /dev/null -X POST http://127.0.0.1:3110/v1/vectors/index/widgets/add \
-H 'Content-Type: application/json' \
-d "$(jq -n --argjson va "$V_A" --argjson vb "$V_B" --argjson vc "$V_C" \
'{items:[
{id:"widget-a", vector:$va, metadata:{label:"a"}},
{id:"widget-b", vector:$vb, metadata:{label:"b"}},
{id:"widget-c", vector:$vc, metadata:{label:"c"}}
]}')"
# Baseline matrix search (no playbook) — using query_vector to skip
# embedd round-trip and keep the test deterministic on the geometry
# we know.
echo "[playbook-smoke] baseline search (no playbook):"
BASELINE="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d "$(jq -n --argjson v "$V_Q" '{query_vector:$v, corpora:["widgets"], k:3}')")"
BASE_ORDER="$(echo "$BASELINE" | jq -r '[.results[].id] | join(",")')"
BASE_C_DIST="$(echo "$BASELINE" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
echo " baseline order: $BASE_ORDER widget-c distance=$BASE_C_DIST"
# Record a playbook entry for the query → widget-c (use the same
# query_text that the playbook will be re-queried by, exact match).
QUERY_TEXT="alpha staffing query test full prompt"
echo "[playbook-smoke] record playbook: ($QUERY_TEXT) → widget-c score=1.0"
RECORD_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/record \
-H 'Content-Type: application/json' \
-d "$(jq -n --arg q "$QUERY_TEXT" \
'{query_text:$q, answer_id:"widget-c", answer_corpus:"widgets", score:1.0, tags:["smoke"]}')")"
PB_ID="$(echo "$RECORD_RESP" | jq -r '.playbook_id // empty')"
if [ -z "$PB_ID" ]; then
echo " ✗ no playbook_id in response: $RECORD_RESP"; FAILED=1
else
echo " ✓ playbook_id=$PB_ID"
fi
# Re-search with use_playbook=true. Use query_text so matrixd embeds
# it again (proves end-to-end). The newly-recorded playbook entry has
# the SAME query_text → cosine distance ~0 → boost applies to widget-c.
echo "[playbook-smoke] boosted search (use_playbook=true):"
BOOSTED="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/search \
-H 'Content-Type: application/json' \
-d "$(jq -n --arg q "$QUERY_TEXT" \
'{query_text:$q, corpora:["widgets"], k:3, use_playbook:true, playbook_max_distance:0.5}')")"
BOOST_ORDER="$(echo "$BOOSTED" | jq -r '[.results[].id] | join(",")')"
BOOST_C_DIST="$(echo "$BOOSTED" | jq -r '[.results[] | select(.id=="widget-c")] | .[0].distance // -1')"
PB_BOOSTED="$(echo "$BOOSTED" | jq -r '.playbook_boosted // 0')"
echo " boosted order: $BOOST_ORDER widget-c distance=$BOOST_C_DIST playbook_boosted=$PB_BOOSTED"
# ── Assertion 1: PlaybookBoosted >= 1 ────────────────────────────
if [ "$PB_BOOSTED" -ge 1 ]; then
echo " ✓ playbook_boosted=$PB_BOOSTED ≥ 1"
else
echo " ✗ playbook_boosted=$PB_BOOSTED (expected ≥ 1)"; FAILED=1
fi
# ── Assertion 2: widget-c distance halved (score=1.0 → 0.5× factor)
# Allow some tolerance because the query and recorded query may not
# be byte-identical depending on Ollama's tokenization stability.
RATIO="$(awk -v b="$BASE_C_DIST" -v c="$BOOST_C_DIST" 'BEGIN{ if (b<=0) print -1; else print c/b }')"
echo " widget-c distance ratio (boosted/baseline) = $RATIO (expect ≈ 0.5)"
WITHIN="$(awk -v r="$RATIO" 'BEGIN{ print (r>=0.40 && r<=0.60) ? "true" : "false" }')"
if [ "$WITHIN" = "true" ]; then
echo " ✓ ratio in [0.40, 0.60] — boost applied correctly"
else
echo " ✗ ratio out of band: $RATIO"; FAILED=1
fi
# ── 4. /matrix/playbooks/bulk — component C (operational rating wiring)
echo "[playbook-smoke] bulk record 3 entries:"
BULK_RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/playbooks/bulk \
-H 'Content-Type: application/json' \
-d "$(jq -n '{
entries: [
{query_text: "alpha test query", answer_id: "widget-a", answer_corpus: "widgets", score: 0.9},
{query_text: "bravo test query", answer_id: "widget-b", answer_corpus: "widgets", score: 0.8},
{query_text: "", answer_id: "x", answer_corpus: "widgets", score: 0.5}
]
}')")"
RECORDED="$(echo "$BULK_RESP" | jq -r '.recorded')"
FAIL="$(echo "$BULK_RESP" | jq -r '.failed')"
GOT_PB_A="$(echo "$BULK_RESP" | jq -r '.results[0].playbook_id // empty')"
ERR_BAD="$(echo "$BULK_RESP" | jq -r '.results[2].error // empty')"
if [ "$RECORDED" = "2" ] && [ "$FAIL" = "1" ] && [ -n "$GOT_PB_A" ] && [ -n "$ERR_BAD" ]; then
echo " ✓ 2 recorded, 1 failed (empty query_text caught), per-entry IDs/errors returned"
else
echo " ✗ recorded=$RECORDED failed=$FAIL pb_a=$GOT_PB_A err=$ERR_BAD"
echo " full: $BULK_RESP"
FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[playbook-smoke] Playbook acceptance gate: PASSED"
exit 0
else
echo "[playbook-smoke] Playbook acceptance gate: FAILED"
exit 1
fi

View File

@ -1,156 +0,0 @@
#!/usr/bin/env bash
# Relevance smoke — code-relevance filter via matrixd /matrix/relevance.
# All assertions go through gateway :3110.
#
# Validates the headline adjacency-pollution scenario:
# Focus: crates/queryd/src/db.go which defines Connector.
# Chunk A is about Connector → kept (defined_match).
# Chunk B is about catalogd::Registry which db.go imports → outranked
# by Chunk A.
# Chunk C is unrelated → dropped (no signals fire).
#
# Plus negative paths:
# - Empty chunks → 400
# - Threshold honored when set explicitly
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[relevance-smoke] building matrixd + vectord + gateway..."
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/relevance.toml"
cleanup() {
echo "[relevance-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Custom toml: vectord persistence disabled. /matrix/relevance doesn't
# touch vectord at all, but matrixd config requires the URL anyway.
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[relevance-smoke] launching vectord → matrixd → gateway..."
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
# ── 1. Adjacency-pollution scenario ──────────────────────────────
echo "[relevance-smoke] adjacency-pollution: Connector outranks Registry, junk dropped:"
PAYLOAD='{
"focus": {
"Path": "crates/queryd/src/db.go",
"Content": "pub struct Connector {}\npub fn open_connector() *Connector { return nil }\nuse catalogd::Registry;"
},
"chunks": [
{"source":"lakehouse_symbols_v1","doc_id":"symbol:queryd::struct::Connector","text":"Connector wraps the DuckDB handle. open_connector creates one.","score":0.9},
{"source":"lakehouse_symbols_v1","doc_id":"symbol:catalogd::struct::Registry","text":"Registry stores manifests. Used by ingestd.","score":0.85},
{"source":"lakehouse_symbols_v1","doc_id":"symbol:totally_other::Thing","text":"completely unrelated text about something else entirely","score":0.7}
],
"threshold": 0.3
}'
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD")"
# Connector chunk should be in kept
CONNECTOR_KEPT="$(echo "$RESP" | jq -r '[.kept[] | select(.doc_id | contains("Connector"))] | length')"
# The unrelated junk chunk should be in dropped
JUNK_DROPPED="$(echo "$RESP" | jq -r '[.dropped[] | select(.doc_id | contains("Thing"))] | length')"
# Connector should outrank Registry (whichever bucket they end up in)
CONN_REL="$(echo "$RESP" | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Connector"))] | .[0].relevance // -999')"
REG_REL="$(echo "$RESP" | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Registry"))] | .[0].relevance // -999')"
TOTAL_IN="$(echo "$RESP" | jq -r '.total_in')"
CONN_OUTRANKS_REG="$(awk -v a="$CONN_REL" -v b="$REG_REL" 'BEGIN{print (a>b)?"true":"false"}')"
if [ "$CONNECTOR_KEPT" = "1" ] && [ "$JUNK_DROPPED" = "1" ] && [ "$CONN_OUTRANKS_REG" = "true" ] && [ "$TOTAL_IN" = "3" ]; then
echo " ✓ Connector kept, junk dropped, Connector ($CONN_REL) > Registry ($REG_REL)"
else
echo " ✗ kept_connector=$CONNECTOR_KEPT dropped_junk=$JUNK_DROPPED conn=$CONN_REL reg=$REG_REL total=$TOTAL_IN"
echo " full: $RESP"
FAILED=1
fi
# ── 2. Empty chunks → 400 ────────────────────────────────────────
echo "[relevance-smoke] empty chunks → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/relevance \
-H 'Content-Type: application/json' \
-d '{"focus":{"Path":"x"},"chunks":[]}')"
if [ "$HTTP" = "400" ]; then
echo " ✓ 400 on empty chunks"
else
echo " ✗ got $HTTP"; FAILED=1
fi
# ── 3. Threshold honored ─────────────────────────────────────────
echo "[relevance-smoke] threshold=10 (impossibly high) drops everything:"
PAYLOAD2='{
"focus": {"Path": "x.go", "Content": "pub fn known() {}", "DefinedSymbols": ["known"]},
"chunks": [
{"source":"s","doc_id":"d1","text":"known appears here","score":0.9}
],
"threshold": 10
}'
RESP2="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD2")"
KEPT_COUNT="$(echo "$RESP2" | jq -r '.kept | length')"
DROP_COUNT="$(echo "$RESP2" | jq -r '.dropped | length')"
if [ "$KEPT_COUNT" = "0" ] && [ "$DROP_COUNT" = "1" ]; then
echo " ✓ threshold=10 drops everything (0 kept / 1 dropped)"
else
echo " ✗ kept=$KEPT_COUNT dropped=$DROP_COUNT"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[relevance-smoke] Relevance acceptance gate: PASSED"
exit 0
else
echo "[relevance-smoke] Relevance acceptance gate: FAILED"
exit 1
fi

View File

@ -1,14 +1,13 @@
// Staffing co-pilot scale test driver — workers_500k corpus.
// Staffing co-pilot scale test driver.
//
// Pipeline: workers_500k.csv → /v1/embed → /v1/vectors/index/workers_500k/add.
// The pipeline itself lives in internal/corpusingest; this driver
// provides the CSV → Row mapping and the post-ingest semantic queries
// that are the human-readable check ("does forklift OSHA-30 actually
// retrieve forklift workers?").
// Pipeline: workers_500k.csv → /v1/embed (batched, parallel) →
// /v1/vectors/index/workers_500k/add (batched). Then runs a handful
// of semantic queries against the populated index and prints the
// top hits — the human-readable check that "find workers like X"
// actually returns relevant workers.
//
// Designed to be re-run safely; index gets DELETEd at the start
// when -drop is set so leftover state doesn't bias recall.
// Designed to be re-run; index gets DELETEd at the start so leftover
// state from prior runs doesn't bias recall.
package main
import (
@ -16,138 +15,69 @@ import (
"context"
"encoding/csv"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
)
const (
indexName = "workers_500k"
dim = 768
// Column indexes in workers_500k.csv. Stable contract; if the CSV
// schema changes these need updating.
colWorkerID = 0
colName = 1
colRole = 2
colCity = 5
colState = 6
colSkills = 8
colCerts = 9
colResume = 17
embedConcurrency = 8 // matches Ollama-on-A4000 sweet spot
embedBatchSize = 16 // texts per /v1/embed call
addBatchSize = 1000 // items per /v1/vectors/index/add call
maxColPhone = 4
maxColCity = 5
maxColState = 6
maxColRole = 2
maxColSkills = 8
maxColCerts = 9
maxColResume = 17
colWorkerID = 0
colName = 1
)
// workersCSV implements corpusingest.Source. CSV reader state +
// row → Row mapping live here; the embed/add pipeline is generic.
type workersCSV struct {
cr *csv.Reader
}
func (s *workersCSV) Next() (corpusingest.Row, error) {
for {
row, err := s.cr.Read()
if err != nil {
return corpusingest.Row{}, err
}
if len(row) <= colResume {
continue // skip malformed rows; matches prior behavior
}
id := strings.TrimSpace(row[colWorkerID])
return corpusingest.Row{
ID: "w-" + id,
Text: buildWorkerText(row),
Metadata: map[string]any{
"name": row[colName],
"role": row[colRole],
"city": row[colCity],
"state": row[colState],
},
}, nil
}
}
// buildWorkerText concatenates staffing-relevant columns into the
// embed-text. Order: role first (most semantically dense), then
// location, skills, certs, prose resume. Embedding models weight
// earlier tokens slightly more, so the front matter matters.
func buildWorkerText(row []string) string {
var b strings.Builder
b.WriteString(row[colRole])
b.WriteString(" in ")
b.WriteString(row[colCity])
b.WriteString(", ")
b.WriteString(row[colState])
b.WriteString(". Skills: ")
b.WriteString(row[colSkills])
b.WriteString(". Certifications: ")
b.WriteString(row[colCerts])
b.WriteString(". ")
b.WriteString(row[colResume])
return b.String()
}
func main() {
var (
gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
csvPath = flag.String("csv", "/tmp/rs/workers_500k.csv", "path to workers CSV")
limit = flag.Int("limit", 0, "limit rows (0 = all)")
queries = flag.String("queries", "default", "default | <semicolon-separated query strings>")
skipPop = flag.Bool("skip-populate", false, "skip embed+add, only run queries")
drop = flag.Bool("drop", true, "DELETE index before populate (default true for clean recall)")
gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
csvPath = flag.String("csv", "/tmp/rs/workers_500k.csv", "path to workers CSV")
limit = flag.Int("limit", 0, "limit rows (0 = all)")
queries = flag.String("queries", "default", "default | <semicolon-separated query strings>")
skipPop = flag.Bool("skip-populate", false, "skip embed+add, only run queries")
)
flag.Parse()
hc := &http.Client{Timeout: 5 * time.Minute}
ctx := context.Background()
if !*skipPop {
f, err := os.Open(*csvPath)
if err != nil {
log.Fatalf("open csv: %v", err)
}
defer f.Close()
cr := csv.NewReader(f)
cr.FieldsPerRecord = -1
if _, err := cr.Read(); err != nil { // skip header
log.Fatalf("read header: %v", err)
}
// Tear down any prior index so recall is on a fresh build.
fmt.Printf("[sc] DELETE %s/v1/vectors/index/%s (idempotent cleanup)\n", *gateway, indexName)
_ = httpDelete(hc, *gateway+"/v1/vectors/index/"+indexName)
stats, err := corpusingest.Run(ctx, corpusingest.Config{
GatewayURL: *gateway,
IndexName: indexName,
Dimension: dim,
Distance: "cosine",
EmbedBatch: 16, // matches Ollama-on-A4000 sweet spot
EmbedWorkers: 8, // matches Ollama-on-A4000 sweet spot
AddBatch: 1000, // empirically fine; vectord BatchAdd lock-amortized at f1c1883
Limit: *limit,
DropExisting: *drop,
HTTPClient: hc,
LogProgress: 10 * time.Second,
}, &workersCSV{cr: cr})
if err != nil {
// ErrPartialFailure means SOME batches failed but we still
// have a corpus to query. Report and continue rather than
// nuking the run for transient Ollama hiccups.
if errors.Is(err, corpusingest.ErrPartialFailure) {
fmt.Printf("[sc] WARN partial failure: %v\n", err)
} else {
log.Fatalf("ingest: %v", err)
}
// Create the index.
body := map[string]any{"name": indexName, "dimension": dim, "distance": "cosine"}
if code, msg := httpPostJSON(hc, *gateway+"/v1/vectors/index", body); code != 201 {
log.Fatalf("create index: %d %s", code, msg)
}
fmt.Printf("[sc] populate done: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
stats.Wall.Round(time.Millisecond))
fmt.Println("[sc] created index workers_500k dim=768 cosine")
t0 := time.Now()
if err := populate(hc, *gateway, *csvPath, *limit); err != nil {
log.Fatal(err)
}
fmt.Printf("[sc] populate complete in %v\n", time.Since(t0))
}
// Validate semantic queries against the populated index.
// Validate semantic queries.
qs := defaultQueries()
if *queries != "default" {
qs = strings.Split(*queries, ";")
@ -167,35 +97,196 @@ func defaultQueries() []string {
}
}
// runQuery embeds a query, searches the index, prints top hits.
// Stays in this driver (not corpusingest) — query validation is
// per-corpus concern, not part of the ingest pipeline.
func runQuery(hc *http.Client, gateway, q string) {
t0 := time.Now()
body, _ := json.Marshal(map[string]any{"texts": []string{q}})
req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/embed", bytes.NewReader(body))
func populate(hc *http.Client, gateway, csvPath string, limit int) error {
f, err := os.Open(csvPath)
if err != nil {
return fmt.Errorf("open csv: %w", err)
}
defer f.Close()
cr := csv.NewReader(f)
cr.FieldsPerRecord = -1
if _, err := cr.Read(); err != nil { // header
return fmt.Errorf("read header: %w", err)
}
type job struct {
ids []string
texts []string
metas []json.RawMessage
}
jobs := make(chan job, embedConcurrency*2)
var wg sync.WaitGroup
var (
totalEmbedded int64
totalAdded int64
)
for i := 0; i < embedConcurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := range jobs {
vecs, err := embedBatch(hc, gateway, j.texts)
if err != nil {
log.Printf("embed batch (%d items): %v", len(j.texts), err)
continue
}
atomic.AddInt64(&totalEmbedded, int64(len(vecs)))
if err := addBatch(hc, gateway, j.ids, vecs, j.metas); err != nil {
log.Printf("add batch (%d items): %v", len(j.ids), err)
continue
}
atomic.AddInt64(&totalAdded, int64(len(j.ids)))
}
}()
}
progressTicker := time.NewTicker(10 * time.Second)
go func() {
for range progressTicker.C {
fmt.Printf("[sc] progress: embedded=%d added=%d\n",
atomic.LoadInt64(&totalEmbedded), atomic.LoadInt64(&totalAdded))
}
}()
defer progressTicker.Stop()
curIDs := make([]string, 0, embedBatchSize)
curTexts := make([]string, 0, embedBatchSize)
curMetas := make([]json.RawMessage, 0, embedBatchSize)
rows := 0
for {
row, err := cr.Read()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("csv read row %d: %w", rows, err)
}
if len(row) <= maxColResume {
continue
}
id := strings.TrimSpace(row[colWorkerID])
text := buildSearchText(row)
meta, _ := json.Marshal(map[string]any{
"name": row[colName],
"role": row[maxColRole],
"city": row[maxColCity],
"state": row[maxColState],
})
curIDs = append(curIDs, "w-"+id)
curTexts = append(curTexts, text)
curMetas = append(curMetas, meta)
if len(curIDs) >= embedBatchSize {
jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
curIDs = make([]string, 0, embedBatchSize)
curTexts = make([]string, 0, embedBatchSize)
curMetas = make([]json.RawMessage, 0, embedBatchSize)
}
rows++
if limit > 0 && rows >= limit {
break
}
}
if len(curIDs) > 0 {
jobs <- job{ids: curIDs, texts: curTexts, metas: curMetas}
}
close(jobs)
wg.Wait()
fmt.Printf("[sc] final: scanned=%d embedded=%d added=%d\n",
rows, atomic.LoadInt64(&totalEmbedded), atomic.LoadInt64(&totalAdded))
return nil
}
// buildSearchText concatenates the staffing-relevant columns into
// the text that gets embedded. Order: role first (most semantically
// dense), then skills + certs, city/state, finally the prose
// resume_text. Embedding models weight earlier tokens slightly more.
func buildSearchText(row []string) string {
var b strings.Builder
b.WriteString(row[maxColRole])
b.WriteString(" in ")
b.WriteString(row[maxColCity])
b.WriteString(", ")
b.WriteString(row[maxColState])
b.WriteString(". Skills: ")
b.WriteString(row[maxColSkills])
b.WriteString(". Certifications: ")
b.WriteString(row[maxColCerts])
b.WriteString(". ")
b.WriteString(row[maxColResume])
return b.String()
}
func embedBatch(hc *http.Client, gateway string, texts []string) ([][]float32, error) {
body := map[string]any{"texts": texts}
bs, _ := json.Marshal(body)
req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/embed", bytes.NewReader(bs))
req.Header.Set("Content-Type", "application/json")
resp, err := hc.Do(req)
if err != nil {
fmt.Printf("[sc] query %q: embed err: %v\n", q, err)
return
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return nil, fmt.Errorf("embed status %d: %s", resp.StatusCode, string(preview))
}
var er struct {
Vectors [][]float32 `json:"vectors"`
}
if err := json.NewDecoder(resp.Body).Decode(&er); err != nil || len(er.Vectors) == 0 {
fmt.Printf("[sc] query %q: embed decode err: %v\n", q, err)
if err := json.NewDecoder(resp.Body).Decode(&er); err != nil {
return nil, err
}
return er.Vectors, nil
}
type addItem struct {
ID string `json:"id"`
Vector []float32 `json:"vector"`
Metadata json.RawMessage `json:"metadata"`
}
func addBatch(hc *http.Client, gateway string, ids []string, vecs [][]float32, metas []json.RawMessage) error {
items := make([]addItem, len(ids))
for i := range ids {
items[i] = addItem{ID: ids[i], Vector: vecs[i], Metadata: metas[i]}
}
bs, _ := json.Marshal(map[string]any{"items": items})
req, _ := http.NewRequest(http.MethodPost,
gateway+"/v1/vectors/index/"+indexName+"/add", bytes.NewReader(bs))
req.Header.Set("Content-Type", "application/json")
resp, err := hc.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return fmt.Errorf("add status %d: %s", resp.StatusCode, string(preview))
}
return nil
}
func runQuery(hc *http.Client, gateway, q string) {
t0 := time.Now()
// 1. Embed the query.
vecs, err := embedBatch(hc, gateway, []string{q})
if err != nil || len(vecs) == 0 {
fmt.Printf("[sc] query %q: embed err: %v\n", q, err)
return
}
embedDur := time.Since(t0)
t1 := time.Now()
body, _ = json.Marshal(map[string]any{"vector": er.Vectors[0], "k": 5})
req, _ = http.NewRequest(http.MethodPost,
gateway+"/v1/vectors/index/"+indexName+"/search", bytes.NewReader(body))
// 2. Search.
body := map[string]any{"vector": vecs[0], "k": 5}
bs, _ := json.Marshal(body)
req, _ := http.NewRequest(http.MethodPost,
gateway+"/v1/vectors/index/"+indexName+"/search", bytes.NewReader(bs))
req.Header.Set("Content-Type", "application/json")
resp, err = hc.Do(req)
resp, err := hc.Do(req)
if err != nil {
fmt.Printf("[sc] query %q: search err: %v\n", q, err)
return
@ -219,3 +310,29 @@ func runQuery(hc *http.Client, gateway, q string) {
}
}
func httpPostJSON(hc *http.Client, url string, body any) (int, string) {
bs, _ := json.Marshal(body)
req, _ := http.NewRequest(http.MethodPost, url, bytes.NewReader(bs))
req.Header.Set("Content-Type", "application/json")
resp, err := hc.Do(req)
if err != nil {
return 0, err.Error()
}
defer resp.Body.Close()
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return resp.StatusCode, string(preview)
}
func httpDelete(hc *http.Client, url string) error {
req, _ := http.NewRequest(http.MethodDelete, url, nil)
resp, err := hc.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
io.Copy(io.Discard, resp.Body)
return nil
}
// keep context.Background reachable in case future paths use it
var _ = context.Background

View File

@ -1,303 +0,0 @@
// Staffing candidates corpus driver — second corpus on the Go side
// after workers_500k. Validates the corpusingest substrate against
// real production-shape parquet data and gives the matrix indexer a
// second corpus to compose against.
//
// Source: /home/profit/lakehouse/data/datasets/candidates.parquet
// (1000 candidates, 11 columns including skills + status + years).
//
// IDs are prefixed "c-" so merged matrix results across corpora
// stay unambiguous (workers use "w-").
//
// Post-ingest: runs a real staffing query through /v1/matrix/search
// against just the candidates corpus — first deep-field reality test
// using the new pipeline.
package main
import (
"bytes"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/parquet/file"
"github.com/apache/arrow-go/v18/parquet/pqarrow"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
)
const (
indexName = "candidates"
dim = 768
)
// candidatesSource implements corpusingest.Source over an in-memory
// arrow.Table loaded from candidates.parquet. 1000 rows fits
// comfortably in RAM; a chunked-record-batch reader is the next
// abstraction when a multi-million-row parquet shows up.
type candidatesSource struct {
cols struct {
id, firstName, lastName, email, phone, city, state, skills, status *array.String
years, rate *array.Int64
}
n int
cur int
}
func newCandidatesSource(path string) (*candidatesSource, func(), error) {
f, err := os.Open(path)
if err != nil {
return nil, nil, fmt.Errorf("open parquet: %w", err)
}
pf, err := file.NewParquetReader(f)
if err != nil {
f.Close()
return nil, nil, fmt.Errorf("parquet reader: %w", err)
}
fr, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
if err != nil {
pf.Close()
f.Close()
return nil, nil, fmt.Errorf("arrow reader: %w", err)
}
table, err := fr.ReadTable(context.Background())
if err != nil {
pf.Close()
f.Close()
return nil, nil, fmt.Errorf("read table: %w", err)
}
src := &candidatesSource{n: int(table.NumRows())}
schema := table.Schema()
stringColByName := func(name string) (*array.String, error) {
idx := schema.FieldIndices(name)
if len(idx) == 0 {
return nil, fmt.Errorf("column %q not found", name)
}
ch := table.Column(idx[0]).Data()
if ch.Len() == 0 {
return nil, fmt.Errorf("column %q empty", name)
}
// Single-chunk assumption — ReadTable on a single-row-group
// 1000-row parquet returns one chunk. If parquets get larger,
// switch to RecordReader and iterate chunks.
if n := len(ch.Chunks()); n != 1 {
return nil, fmt.Errorf("column %q has %d chunks; only 1 supported here", name, n)
}
s, ok := ch.Chunk(0).(*array.String)
if !ok {
return nil, fmt.Errorf("column %q is %T, want *array.String", name, ch.Chunk(0))
}
return s, nil
}
int64ColByName := func(name string) (*array.Int64, error) {
idx := schema.FieldIndices(name)
if len(idx) == 0 {
return nil, fmt.Errorf("column %q not found", name)
}
ch := table.Column(idx[0]).Data()
i, ok := ch.Chunk(0).(*array.Int64)
if !ok {
return nil, fmt.Errorf("column %q is %T, want *array.Int64", name, ch.Chunk(0))
}
return i, nil
}
cleanup := func() {
table.Release()
pf.Close()
f.Close()
}
for _, t := range []struct {
name string
dst **array.String
}{
{"candidate_id", &src.cols.id},
{"first_name", &src.cols.firstName},
{"last_name", &src.cols.lastName},
{"email", &src.cols.email},
{"phone", &src.cols.phone},
{"city", &src.cols.city},
{"state", &src.cols.state},
{"skills", &src.cols.skills},
{"status", &src.cols.status},
} {
col, err := stringColByName(t.name)
if err != nil {
cleanup()
return nil, nil, err
}
*t.dst = col
}
for _, t := range []struct {
name string
dst **array.Int64
}{
{"years_experience", &src.cols.years},
{"hourly_rate_usd", &src.cols.rate},
} {
col, err := int64ColByName(t.name)
if err != nil {
cleanup()
return nil, nil, err
}
*t.dst = col
}
return src, cleanup, nil
}
func (s *candidatesSource) Next() (corpusingest.Row, error) {
if s.cur >= s.n {
return corpusingest.Row{}, io.EOF
}
i := s.cur
s.cur++
candidateID := s.cols.id.Value(i)
firstName := s.cols.firstName.Value(i)
lastName := s.cols.lastName.Value(i)
city := s.cols.city.Value(i)
state := s.cols.state.Value(i)
skills := s.cols.skills.Value(i)
status := s.cols.status.Value(i)
years := s.cols.years.Value(i)
rate := s.cols.rate.Value(i)
// Embed text: name + role-shape from skills + location + experience
// + status. Order matters — embedding models weight earlier tokens
// slightly more, so role-relevant signal (skills) goes first.
var b strings.Builder
b.WriteString("Candidate skills: ")
b.WriteString(skills)
b.WriteString(". Based in ")
b.WriteString(city)
b.WriteString(", ")
b.WriteString(state)
b.WriteString(". ")
fmt.Fprintf(&b, "%d years experience. Status: %s. ", years, status)
b.WriteString(firstName)
b.WriteString(" ")
b.WriteString(lastName)
b.WriteString(".")
return corpusingest.Row{
ID: "c-" + candidateID,
Text: b.String(),
Metadata: map[string]any{
"candidate_id": candidateID,
"first_name": firstName,
"last_name": lastName,
"email": s.cols.email.Value(i),
"phone": s.cols.phone.Value(i),
"city": city,
"state": state,
"skills": skills,
"status": status,
"years_experience": years,
"hourly_rate_usd": rate,
},
}, nil
}
func main() {
var (
gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
parquetPath = flag.String("parquet", "/home/profit/lakehouse/data/datasets/candidates.parquet", "candidates parquet")
limit = flag.Int("limit", 0, "limit rows (0 = all 1000)")
query = flag.String("query", "Python AWS Docker engineer in Chicago available now", "post-ingest reality-test query")
drop = flag.Bool("drop", true, "DELETE candidates index before populate")
skipPop = flag.Bool("skip-populate", false, "skip ingest, only run query")
)
flag.Parse()
hc := &http.Client{Timeout: 5 * time.Minute}
ctx := context.Background()
if !*skipPop {
src, cleanup, err := newCandidatesSource(*parquetPath)
if err != nil {
log.Fatalf("open candidates source: %v", err)
}
defer cleanup()
stats, err := corpusingest.Run(ctx, corpusingest.Config{
GatewayURL: *gateway,
IndexName: indexName,
Dimension: dim,
Distance: "cosine",
EmbedBatch: 16,
EmbedWorkers: 8,
AddBatch: 500, // 1000 candidates → 2 add calls; small batches keep memory bounded
Limit: *limit,
DropExisting: *drop,
HTTPClient: hc,
LogProgress: 5 * time.Second,
}, src)
if err != nil {
if errors.Is(err, corpusingest.ErrPartialFailure) {
fmt.Printf("[candidates] WARN partial failure: %v\n", err)
} else {
log.Fatalf("ingest: %v", err)
}
}
fmt.Printf("[candidates] populate: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
stats.Wall.Round(time.Millisecond))
}
// Reality test — run a real staffing query through /v1/matrix/search
// against just the candidates corpus. Multi-corpus retrieval against
// workers + candidates is the next step.
fmt.Printf("\n[candidates] reality test query: %q\n", *query)
runMatrixQuery(hc, *gateway, *query)
}
func runMatrixQuery(hc *http.Client, gateway, query string) {
body, _ := json.Marshal(map[string]any{
"query_text": query,
"corpora": []string{indexName},
"k": 5,
"per_corpus_k": 10,
})
req, _ := http.NewRequest(http.MethodPost, gateway+"/v1/matrix/search", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
t0 := time.Now()
resp, err := hc.Do(req)
if err != nil {
log.Fatalf("matrix search: %v", err)
}
defer resp.Body.Close()
dur := time.Since(t0)
if resp.StatusCode != 200 {
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
log.Fatalf("matrix search %d: %s", resp.StatusCode, preview)
}
var sr struct {
Results []struct {
ID string `json:"id"`
Distance float32 `json:"distance"`
Corpus string `json:"corpus"`
Metadata json.RawMessage `json:"metadata"`
} `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil {
log.Fatalf("decode: %v", err)
}
fmt.Printf("[candidates] matrix returned %d hits in %v:\n", len(sr.Results), dur.Round(time.Millisecond))
for i, r := range sr.Results {
fmt.Printf(" %d. %s d=%.4f corpus=%s\n %s\n",
i+1, r.ID, r.Distance, r.Corpus, string(r.Metadata))
}
}

View File

@ -1,308 +0,0 @@
// Staffing workers corpus driver — second-of-two corpora that proves
// the multi-corpus matrix indexer end-to-end. Mirrors the candidates
// driver's parquet pattern but handles multi-chunk arrow tables
// (workers_500k.parquet has multiple row groups, candidates fits in
// one).
//
// Source: /home/profit/lakehouse/data/datasets/workers_500k.parquet
// (500000 rows, 18 cols including role + skills + certifications +
// archetype + reliability scores + resume_text).
//
// IDs prefixed "w-" so multi-corpus matrix queries returning workers
// alongside candidates ("c-") stay unambiguous in merged results.
//
// Default -limit 5000 because the goal of this driver is multi-corpus
// reality testing, not the 500K stress test (separate concern, see
// project_golang_lakehouse.md scale framing).
package main
import (
"context"
"errors"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/parquet/file"
"github.com/apache/arrow-go/v18/parquet/pqarrow"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/corpusingest"
)
const (
indexName = "workers"
dim = 768
)
// workersSource implements corpusingest.Source over an in-memory
// arrow.Table loaded from workers_500k.parquet. Unlike the candidates
// driver, this MUST handle multi-chunk arrow columns — a 500K-row
// parquet has ≥1 row group, each becoming its own chunk after read.
type workersSource struct {
cols struct {
workerID *chunkedInt64
name, role, city, state, skills, certs, archetype, resume, comm *chunkedString
}
n int64
cur int64
}
// chunkedString lets per-row access work whether the table came back
// with one chunk or many. Forward-only iteration; not safe to seek.
type chunkedString struct {
chunks []*array.String
sizes []int64
}
func newChunkedString(col *arrow.Chunked) (*chunkedString, error) {
cs := &chunkedString{}
for i, ch := range col.Chunks() {
s, ok := ch.(*array.String)
if !ok {
return nil, fmt.Errorf("chunk %d is %T, want *array.String", i, ch)
}
cs.chunks = append(cs.chunks, s)
cs.sizes = append(cs.sizes, int64(s.Len()))
}
return cs, nil
}
// At returns the value at the global row index. O(chunks) per call;
// fine for our scale (≤5000 rows × ~5 chunks).
func (c *chunkedString) At(row int64) string {
var offset int64
for i, s := range c.chunks {
n := c.sizes[i]
if row < offset+n {
return s.Value(int(row - offset))
}
offset += n
}
return ""
}
type chunkedInt64 struct {
chunks []*array.Int64
sizes []int64
}
func newChunkedInt64(col *arrow.Chunked) (*chunkedInt64, error) {
ci := &chunkedInt64{}
for i, ch := range col.Chunks() {
s, ok := ch.(*array.Int64)
if !ok {
return nil, fmt.Errorf("chunk %d is %T, want *array.Int64", i, ch)
}
ci.chunks = append(ci.chunks, s)
ci.sizes = append(ci.sizes, int64(s.Len()))
}
return ci, nil
}
func (c *chunkedInt64) At(row int64) int64 {
var offset int64
for i, s := range c.chunks {
n := c.sizes[i]
if row < offset+n {
return s.Value(int(row - offset))
}
offset += n
}
return 0
}
func newWorkersSource(path string) (*workersSource, func(), error) {
f, err := os.Open(path)
if err != nil {
return nil, nil, fmt.Errorf("open parquet: %w", err)
}
pf, err := file.NewParquetReader(f)
if err != nil {
f.Close()
return nil, nil, fmt.Errorf("parquet reader: %w", err)
}
fr, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
if err != nil {
pf.Close()
f.Close()
return nil, nil, fmt.Errorf("arrow reader: %w", err)
}
table, err := fr.ReadTable(context.Background())
if err != nil {
pf.Close()
f.Close()
return nil, nil, fmt.Errorf("read table: %w", err)
}
src := &workersSource{n: table.NumRows()}
schema := table.Schema()
stringCol := func(name string) (*chunkedString, error) {
idx := schema.FieldIndices(name)
if len(idx) == 0 {
return nil, fmt.Errorf("column %q not found", name)
}
return newChunkedString(table.Column(idx[0]).Data())
}
int64Col := func(name string) (*chunkedInt64, error) {
idx := schema.FieldIndices(name)
if len(idx) == 0 {
return nil, fmt.Errorf("column %q not found", name)
}
return newChunkedInt64(table.Column(idx[0]).Data())
}
cleanup := func() {
table.Release()
pf.Close()
f.Close()
}
wid, err := int64Col("worker_id")
if err != nil {
cleanup()
return nil, nil, err
}
src.cols.workerID = wid
for _, t := range []struct {
name string
dst **chunkedString
}{
{"name", &src.cols.name},
{"role", &src.cols.role},
{"city", &src.cols.city},
{"state", &src.cols.state},
{"skills", &src.cols.skills},
{"certifications", &src.cols.certs},
{"archetype", &src.cols.archetype},
{"resume_text", &src.cols.resume},
{"communications", &src.cols.comm},
} {
col, err := stringCol(t.name)
if err != nil {
cleanup()
return nil, nil, err
}
*t.dst = col
}
return src, cleanup, nil
}
func (s *workersSource) Next() (corpusingest.Row, error) {
if s.cur >= s.n {
return corpusingest.Row{}, io.EOF
}
i := s.cur
s.cur++
workerID := s.cols.workerID.At(i)
name := s.cols.name.At(i)
role := s.cols.role.At(i)
city := s.cols.city.At(i)
state := s.cols.state.At(i)
skills := s.cols.skills.At(i)
certs := s.cols.certs.At(i)
archetype := s.cols.archetype.At(i)
resume := s.cols.resume.At(i)
// Embed text — restored to V0 after 2026-04-29 D experiment.
// Three variants tested on a query of "Forklift operator with
// OSHA-30 certification, warehouse experience":
// V0 (this): structured "Worker role: ... Skills: ... <resume_text>"
// → 6 workers in top-8, 0 Forklift, top dist 0.327
// V4a (drop): drop labels + resume + archetype, double the role
// → 6 workers in top-8, 0 Forklift, top dist 0.254
// V4b (resume only): just resume_text, no structured prefix
// → 4 workers in top-8 (worse mix), 0 Forklift, top 0.379
// All three surfaced Production Workers / Machine Operators /
// Line Leads above actual Forklift Operators. Conclusion: the
// bottleneck is nomic-embed-text 137M's geometry, not text
// design. Real fixes belong elsewhere — hybrid SQL+semantic
// (B in next-step menu) or playbook boost (component 5,
// already shipped). V0 keeps the best worker/candidate mix.
var b strings.Builder
b.WriteString("Worker role: ")
b.WriteString(role)
b.WriteString(". Skills: ")
b.WriteString(skills)
b.WriteString(". Certifications: ")
b.WriteString(certs)
b.WriteString(". Based in ")
b.WriteString(city)
b.WriteString(", ")
b.WriteString(state)
b.WriteString(". Archetype: ")
b.WriteString(archetype)
b.WriteString(". ")
b.WriteString(resume)
text := b.String()
return corpusingest.Row{
ID: fmt.Sprintf("w-%d", workerID),
Text: text,
Metadata: map[string]any{
"worker_id": workerID,
"name": name,
"role": role,
"city": city,
"state": state,
"skills": skills,
"certifications": certs,
"archetype": archetype,
},
}, nil
}
func main() {
var (
gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL")
parquetPath = flag.String("parquet", "/home/profit/lakehouse/data/datasets/workers_500k.parquet", "workers parquet")
limit = flag.Int("limit", 5000, "limit rows (0 = all 500K — usually not what you want here)")
drop = flag.Bool("drop", true, "DELETE workers index before populate")
)
flag.Parse()
hc := &http.Client{Timeout: 5 * time.Minute}
ctx := context.Background()
src, cleanup, err := newWorkersSource(*parquetPath)
if err != nil {
log.Fatalf("open workers source: %v", err)
}
defer cleanup()
stats, err := corpusingest.Run(ctx, corpusingest.Config{
GatewayURL: *gateway,
IndexName: indexName,
Dimension: dim,
Distance: "cosine",
EmbedBatch: 16,
EmbedWorkers: 8,
AddBatch: 500,
Limit: *limit,
DropExisting: *drop,
HTTPClient: hc,
LogProgress: 10 * time.Second,
}, src)
if err != nil {
if errors.Is(err, corpusingest.ErrPartialFailure) {
fmt.Printf("[workers] WARN partial failure: %v\n", err)
} else {
log.Fatalf("ingest: %v", err)
}
}
fmt.Printf("[workers] populate: scanned=%d embedded=%d added=%d failed=%d wall=%v\n",
stats.Scanned, stats.Embedded, stats.Added, stats.FailedBatches,
stats.Wall.Round(time.Millisecond))
}

View File

@ -1,193 +0,0 @@
#!/usr/bin/env bash
# Workflow smoke — Observer-KB workflow runner end-to-end (SPEC §3.8
# first slice). All assertions go through gateway :3110.
#
# Validates:
# - GET /observer/workflow/modes lists fixture.echo + fixture.upper
# - POST /observer/workflow/run executes a 3-node DAG with $-ref
# substitution: shape (uppercase) → weakness → improvement
# - Each node's execution lands an ObservedOp via the observer
# ring (visible in /observer/stats with source="workflow")
# - Aborting case: unknown mode → 400 with helpful error
# - Skip cascade: node with failed dep gets skipped, independent
# siblings still run
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[workflow-smoke] building observerd + gateway..."
go build -o bin/ ./cmd/observerd ./cmd/gateway
pkill -f "bin/(observerd|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/workflow.toml"
cleanup() {
echo "[workflow-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
observerd_url = "http://127.0.0.1:3219"
[observerd]
bind = "127.0.0.1:3219"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[workflow-smoke] launching observerd → gateway..."
./bin/observerd -config "$CFG" > /tmp/observerd.log 2>&1 &
PIDS+=($!)
poll_health 3219 || { echo "observerd failed"; tail /tmp/observerd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
# ── 1. /observer/workflow/modes lists registered modes ────────────
echo "[workflow-smoke] /observer/workflow/modes lists fixtures + real modes:"
RESP="$(curl -sS http://127.0.0.1:3110/v1/observer/workflow/modes)"
EXPECTED=("fixture.echo" "fixture.upper" "matrix.relevance" "matrix.downgrade" "distillation.score" "drift.scorer" "matrix.search")
MISSING=""
for m in "${EXPECTED[@]}"; do
if [ "$(echo "$RESP" | jq -r --arg m "$m" '.modes | index($m) != null')" != "true" ]; then
MISSING="$MISSING $m"
fi
done
if [ -z "$MISSING" ]; then
echo " ✓ all 7 expected modes registered (fixtures + 4 pure + matrix.search HTTP)"
else
echo " ✗ missing modes:$MISSING"; FAILED=1
fi
# ── 2. 3-node DAG with $-ref substitution ─────────────────────────
echo "[workflow-smoke] 3-node DAG: shape (upper) → weakness → improvement"
WORKFLOW='{
"workflow": {
"name": "smoke-chain",
"description": "DAG ref substitution test",
"nodes": [
{"id":"shape", "mode":"fixture.upper", "prompt":"hello world"},
{"id":"weakness", "mode":"fixture.echo",
"prompt":"observed shape: $shape.output.upper",
"depends_on":["shape"]},
{"id":"improvement", "mode":"fixture.echo",
"prompt":"based on $weakness.output.prompt do better",
"depends_on":["weakness"]}
]
}
}'
RUN="$(curl -sS -X POST http://127.0.0.1:3110/v1/observer/workflow/run \
-H 'Content-Type: application/json' -d "$WORKFLOW")"
STATUS="$(echo "$RUN" | jq -r '.status')"
SHAPE_UPPER="$(echo "$RUN" | jq -r '.nodes[0].output.upper')"
WEAK_PROMPT="$(echo "$RUN" | jq -r '.nodes[1].output.prompt')"
IMP_PROMPT="$(echo "$RUN" | jq -r '.nodes[2].output.prompt')"
if [ "$STATUS" = "succeeded" ] && [ "$SHAPE_UPPER" = "HELLO WORLD" ] \
&& [[ "$WEAK_PROMPT" == *"HELLO WORLD"* ]] \
&& [[ "$IMP_PROMPT" == *"HELLO WORLD"* ]]; then
echo " ✓ status=succeeded · shape=HELLO WORLD · refs propagated through 3-node chain"
else
echo " ✗ status=$STATUS shape=$SHAPE_UPPER weak=$WEAK_PROMPT imp=$IMP_PROMPT"
echo " full: $RUN"
FAILED=1
fi
# ── 3. Per-node provenance recorded as ObservedOps ────────────────
echo "[workflow-smoke] /observer/stats reflects workflow ops:"
STATS="$(curl -sS http://127.0.0.1:3110/v1/observer/stats)"
WORKFLOW_OPS="$(echo "$STATS" | jq -r '.by_source.workflow // 0')"
TOTAL="$(echo "$STATS" | jq -r '.total')"
if [ "$WORKFLOW_OPS" = "3" ] && [ "$TOTAL" = "3" ]; then
echo " ✓ 3 workflow ops recorded (one per node), total=3"
else
echo " ✗ workflow=$WORKFLOW_OPS total=$TOTAL"
echo " full: $STATS"; FAILED=1
fi
# ── 4. Unknown mode → 400 ─────────────────────────────────────────
echo "[workflow-smoke] unknown mode → 400:"
HTTP="$(curl -sS -o /tmp/wf_bad.json -w '%{http_code}' -X POST \
http://127.0.0.1:3110/v1/observer/workflow/run \
-H 'Content-Type: application/json' \
-d '{"workflow":{"name":"bad","nodes":[{"id":"a","mode":"does.not.exist"}]}}')"
ERR="$(jq -r '.error' < /tmp/wf_bad.json 2>/dev/null)"
if [ "$HTTP" = "400" ] && echo "$ERR" | grep -qi "unknown mode"; then
echo " ✓ unknown mode aborts with 400 + helpful error"
else
echo " ✗ http=$HTTP err=$ERR"; FAILED=1
fi
# ── 5. Real-mode chain: matrix.downgrade → distillation.score ─────
# This proves the §3.4 components compose through the workflow runner.
# Two pure modes, no external service deps, deterministic input/output.
echo "[workflow-smoke] real-mode chain: downgrade → distillation.score"
REAL_WORKFLOW='{
"workflow": {
"name": "real-mode-chain",
"nodes": [
{"id":"gate", "mode":"matrix.downgrade",
"inputs":{"mode":"codereview_lakehouse", "model":"x-ai/grok-4.1-fast"}},
{"id":"score", "mode":"distillation.score",
"inputs":{"record":{
"run_id":"r-1", "task_id":"t-1",
"timestamp":"2026-04-29T12:00:00Z", "schema_version":1,
"provenance":{"source_file":"data/_kb/scrum_reviews.jsonl",
"sig_hash":"x", "recorded_at":"2026-04-29T12:00:01Z"},
"success_markers":["accepted_on_attempt_1"]
}}}
]
}
}'
RUN="$(curl -sS -X POST http://127.0.0.1:3110/v1/observer/workflow/run \
-H 'Content-Type: application/json' -d "$REAL_WORKFLOW")"
STATUS="$(echo "$RUN" | jq -r '.status')"
GATE_MODE="$(echo "$RUN" | jq -r '.nodes[0].output.mode')"
GATE_FROM="$(echo "$RUN" | jq -r '.nodes[0].output.downgraded_from')"
SCORE_CAT="$(echo "$RUN" | jq -r '.nodes[1].output.category')"
if [ "$STATUS" = "succeeded" ] \
&& [ "$GATE_MODE" = "codereview_isolation" ] \
&& [ "$GATE_FROM" = "codereview_lakehouse" ] \
&& [ "$SCORE_CAT" = "accepted" ]; then
echo " ✓ downgrade flipped lakehouse→isolation; scorer rated scrum_review attempt_1=accepted"
else
echo " ✗ status=$STATUS gate=$GATE_MODE from=$GATE_FROM score=$SCORE_CAT"
echo " full: $RUN"
FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[workflow-smoke] Workflow runner acceptance: PASSED"
exit 0
else
echo "[workflow-smoke] Workflow runner acceptance: FAILED"
exit 1
fi