root b2e45f7f26 playbook_lift: harness expansion + reality test #001 (7/8 lift, 87.5%)
The 5-loop substrate's load-bearing gate is verified — playbook +
matrix indexer give the results we're looking for. Per the report's
rubric, lift ≥ 50% of discoveries means matrix is doing real work;
7/8 = 87.5% blew through that.

Harness was structurally hiding bugs behind a 5-daemon stripped boot.
Expanding to the full 10-daemon prod stack surfaced 7 fixes in cascade:

1. driver→matrixd: {"query": ...} → {"query_text": ...} field name
2. harness temp toml missing [s3] → wrong default bucket → catalogd
   rehydrate 500 on first call
3. harness→queryd SQL probe: {"q": ...} → {"sql": ...} field name
4. expand boot from 5 → 10 daemons in dep-ordered launch
5. add SQL surface probe (3-row CSV ingest → COUNT(*)=3 assertion)
6. candidates corpus was synthetic SWE-tech (Swift/iOS, Scala/Spark) —
   wrong domain for staffing queries; replaced with ethereal_workers
   (10K rows, real staffing schema, "e-" id prefix to avoid collision
   with workers' "w-"). staffing_workers driver gains -index-name +
   -id-prefix flags so the same binary serves both corpora
7. local_judge qwen3.5:latest is a vision-SSM 256K-ctx build running
   ~30s per judge call against the lift loop; reverted to
   qwen2.5:latest (~1s/call, 30× faster, held lift theory)

Each contract drift (1, 3) is now locked into a cmd/<bin>/main_test.go
so future drift fires in `go test`, not in a reality run. R-005 closed:

- cmd/matrixd/main_test.go (new) — playbook record drift detector +
  score bounds + 6 routes mounted
- cmd/queryd/main_test.go — wrong-field-name drift detector
- cmd/pathwayd/main_test.go (new) — 9 routes + add round-trip + retire
- cmd/observerd/main_test.go (new) — 4 routes + invalid-op + unknown-mode

`go test ./cmd/{matrixd,queryd,pathwayd,observerd}` all green.

Reality test results (reports/reality-tests/playbook_lift_001.{json,md}):
  Queries              21 (staffing-domain, 7 categories)
  Discoveries          8 (judge ≠ cosine top-1)
  Lifts                7/8 (87.5%)
  Boosts triggered     9
  Mean Δ distance      -0.053 (warm closer than cold)
  OOD honesty          dental/RN/SWE rated 1, no fake matches
  Cross-corpus boosts  confirmed (e- ↔ w- swaps in lifts)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 06:22:21 -05:00

140 lines
5.1 KiB
Go

package main
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/matrix"
)
// newTestRouter builds the matrixd router with a Retriever pointing at
// unreachable URLs. Contract-drift assertions in this file fire BEFORE
// any retriever call, so the unreachable-upstream behavior only matters
// for tests that exercise the success path (none here).
func newTestRouter(t *testing.T) http.Handler {
t.Helper()
h := &handlers{r: matrix.New("http://127.0.0.1:0", "http://127.0.0.1:0")}
r := chi.NewRouter()
h.register(r)
return r
}
// TestPlaybookRecord_OldFieldNameRejected locks against a regression of
// the 2026-04-30 driver/matrixd contract drift: the playbook_lift driver
// briefly sent `{"query": ...}` while matrixd parsed `{"query_text": ...}`.
// Empty QueryText fails Validate() with "query_text required", which is
// the exact 400 the harness saw. If anyone renames the JSON tag, this
// test catches it before the harness has to.
func TestPlaybookRecord_OldFieldNameRejected(t *testing.T) {
r := newTestRouter(t)
body := []byte(`{"query":"x","answer_id":"y","answer_corpus":"z","score":1.0}`)
req := httptest.NewRequest("POST", "/matrix/playbooks/record", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400 for old field name, got %d (body=%s)", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "query_text required") {
t.Errorf("expected validation error to mention query_text, got %q", w.Body.String())
}
}
// TestPlaybookRecord_CurrentFieldName proves the right field name parses
// and reaches the retriever. We can't assert 200 without a live retriever,
// but we CAN assert the response is NOT a 400 from the validate step —
// which is the drift-detector counterpart to the test above.
func TestPlaybookRecord_CurrentFieldName(t *testing.T) {
r := newTestRouter(t)
body, _ := json.Marshal(map[string]any{
"query_text": "forklift operator OSHA-30",
"answer_id": "worker_42",
"answer_corpus": "workers",
"score": 1.0,
"tags": []string{"reality-test"},
})
req := httptest.NewRequest("POST", "/matrix/playbooks/record", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
// Retriever will fail (unreachable upstream); expected outcomes are
// 502 (bad gateway, mapped from upstream HTTP error) or 500 (network
// error). Anything that's NOT a 400 means we cleared validation.
if w.Code == http.StatusBadRequest {
t.Errorf("valid request rejected at validation step: %d %s", w.Code, w.Body.String())
}
}
// TestPlaybookRecord_ScoreOutOfRange locks the score-bounds invariant
// from internal/matrix/playbook.go. Negative or >1.0 scores must 400.
func TestPlaybookRecord_ScoreOutOfRange(t *testing.T) {
r := newTestRouter(t)
for _, s := range []float64{-0.1, 1.1, 99} {
body, _ := json.Marshal(map[string]any{
"query_text": "x",
"answer_id": "y",
"answer_corpus": "z",
"score": s,
})
req := httptest.NewRequest("POST", "/matrix/playbooks/record", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("score=%v should be rejected, got %d", s, w.Code)
}
}
}
// TestRelevance_EmptyChunks locks the explicit empty-chunks 400 in
// handleRelevance. Keeps callers from silently getting an empty result
// when their request was malformed.
func TestRelevance_EmptyChunks(t *testing.T) {
r := newTestRouter(t)
body := []byte(`{"focus":{},"chunks":[]}`)
req := httptest.NewRequest("POST", "/matrix/relevance", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 on empty chunks, got %d (body=%s)", w.Code, w.Body.String())
}
}
// TestRoutesMounted asserts that every route in handlers.register(r)
// resolves to a handler — i.e. none of them would 404 against a request.
// Closes R-005 for matrixd (router-level wiring test).
func TestRoutesMounted(t *testing.T) {
r := newTestRouter(t)
cases := []struct {
method, path string
}{
{"POST", "/matrix/search"},
{"GET", "/matrix/corpora"},
{"POST", "/matrix/relevance"},
{"POST", "/matrix/downgrade"},
{"POST", "/matrix/playbooks/record"},
{"POST", "/matrix/playbooks/bulk"},
}
for _, tc := range cases {
t.Run(tc.method+" "+tc.path, func(t *testing.T) {
req := httptest.NewRequest(tc.method, tc.path, bytes.NewReader([]byte(`{}`)))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code == http.StatusNotFound {
t.Errorf("%s %s returned 404 — route not mounted", tc.method, tc.path)
}
if w.Code == http.StatusMethodNotAllowed {
t.Errorf("%s %s returned 405 — wrong method registered", tc.method, tc.path)
}
})
}
}