golangLAKEHOUSE/cmd/vectord/main_test.go
root b216b7e5b6 fix the other 4: close all OPEN-list items in one wave
Substantial wave addressing all 4 prior OPEN items. Three closed in
full, one partially (the speculative half deliberately deferred).

OPEN #1 — Periodic fresh→main index merge (FULL):
- POST /v1/vectors/index/{src}/merge with {dest, clear_source}
- Idempotent on re-runs (existing-in-dest items skipped)
- internal/vectord/index.go: new Index.IDs() snapshot method +
  i.ids tracker field as canonical ID set, independent of meta
  map's nil-vs-{} sparseness (was a real bug — IDs() backed by meta
  alone missed items added with nil metadata)
- 4 cmd-level integration tests (happy path drain+clear, dim
  mismatch, dest not found, self-merge rejection) + 1 unit test
- DecodeIndex backward-compat: old envelopes restore i.ids from
  meta keys (best effort; new items going forward use the tracker)

OPEN #2 — Distillation SFT export (SUBSTRATE):
- internal/distillation/sft_export.go ports the load-bearing half:
  IsSftNever predicate + ListScoredRunFiles (data/scored-runs/YYYY/
  MM/DD walk) + LoadScoredRunsFromFile + partial ExportSft.
- Synthesis (instruction/input/response generation) deferred to a
  separate wave — too big for this session, but the substrate
  makes the next wave a port-not-design exercise.
- TestSftNever_PinsExpectedSet locks the contamination firewall
  set: if a future commit adds/removes from SftNever, this test
  fails — forcing the change through review.
- 5 new tests; firewall fires end-to-end through the partial port.

OPEN #3 — Distribution drift via PSI (FULL):
- internal/drift/drift.go: ComputeDistributionDrift via Population
  Stability Index. Standard finance/risk metric, well-defined
  verdict tiers (stable < 0.10, minor 0.10–0.25, major ≥ 0.25).
- Equal-width bucketing over combined min/max so neither dist
  falls outside; epsilon-clamping for empty buckets so log doesn't
  blow up. Per-bucket breakdown for drilldown.
- Pairs with the existing ComputeScorerDrift: scorer drift is
  categorical, distribution drift is continuous. Different shapes,
  same package.
- 7 new tests covering identical-is-stable, hard-shift-is-major,
  moderate-detected-not-stable, empty-inputs-safe, all-identical-
  safe, bucket-counts-conserved, num-buckets-clamping.

OPEN #4 — Ops nice-to-haves (PARTIAL — wall-clock done, others
deferred):
- (a) Real-time wall-clock for stress harness: per-phase elapsed
  time logged to stdout as it runs (`[stress] phase NAME starting
  (T+12.3s)` + `[stress] phase NAME done — 8.5s (T+20.8s)`).
  Output.PhaseTimings + Output.TotalElapsedMs in JSON.
- (b) chatd fixture-mode S3 mock + (c) liberal-paraphrase
  calibration: not actioned — no fired trigger, would be
  speculative. Documented as deferred-until-need rather than
  ignored. Per the project's discipline ("don't add features
  beyond what the task requires").

OPEN list now empty / steady-state. Future items will land as
production triggers fire.

Build + vet + tests green; 18 new tests across the 4 closures.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 23:42:11 -05:00

420 lines
12 KiB
Go

package main
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"strconv"
"strings"
"testing"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord"
)
// Closes R-005 for vectord: cmd-level tests for the 6 routes.
// Persistence-disabled mode (h.persist == nil) is the test config —
// keeps tests pure-in-memory; persistence is covered by g1p_smoke +
// proof GOLAKE-070.
func mountedRouter() chi.Router {
h := &handlers{reg: vectord.NewRegistry()}
r := chi.NewRouter()
h.register(r)
return r
}
func TestRoutesMounted(t *testing.T) {
r := mountedRouter()
want := map[string]bool{
"POST /vectors/index": false,
"GET /vectors/index": false,
"GET /vectors/index/{name}": false,
"DELETE /vectors/index/{name}": false,
"POST /vectors/index/{name}/add": false,
"POST /vectors/index/{name}/search": false,
"POST /vectors/index/{name}/merge": false,
}
chi.Walk(r, func(method, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error {
key := method + " " + route
if _, ok := want[key]; ok {
want[key] = true
}
return nil
})
for sig, found := range want {
if !found {
t.Errorf("expected route %q mounted", sig)
}
}
}
func TestHandleCreate_HappyPath_201(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader(`{"name":"test_idx","dimension":4}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusCreated {
t.Errorf("expected 201 on create, got %d", resp.StatusCode)
}
}
func TestHandleCreate_MissingDim_400(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader(`{"name":"missing_dim"}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on missing dim, got %d", resp.StatusCode)
}
}
func TestHandleCreate_MalformedJSON_400(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader("not json"))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on malformed, got %d", resp.StatusCode)
}
}
func TestHandleCreate_BodyTooLarge(t *testing.T) {
// 4xx range — see embedd's TestHandleEmbed_BodyTooLarge for the
// 413-vs-400 unwrap nuance. Contract is "client error, fails loud."
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
big := bytes.Repeat([]byte("x"), maxRequestBytes+(1<<20))
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", bytes.NewReader(big))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode < 400 || resp.StatusCode >= 500 {
t.Errorf("expected 4xx on oversize, got %d", resp.StatusCode)
}
}
func TestHandleGetIndex_NotFound_404(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Get(srv.URL + "/vectors/index/nonexistent")
if err != nil {
t.Fatalf("GET: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNotFound {
t.Errorf("expected 404, got %d", resp.StatusCode)
}
}
func TestHandleAdd_IndexNotFound_404(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/vectors/index/missing/add", "application/json",
strings.NewReader(`{"items":[{"id":"v1","vector":[1,2,3,4]}]}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNotFound {
t.Errorf("expected 404 on add to missing index, got %d", resp.StatusCode)
}
}
func TestHandleAdd_EmptyItems_400(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
// Create index first.
http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader(`{"name":"empty_test","dimension":4}`))
resp, err := http.Post(srv.URL+"/vectors/index/empty_test/add", "application/json",
strings.NewReader(`{"items":[]}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on empty items, got %d", resp.StatusCode)
}
}
func TestHandleAdd_DimMismatch_400(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader(`{"name":"dim_test","dimension":3}`))
resp, err := http.Post(srv.URL+"/vectors/index/dim_test/add", "application/json",
strings.NewReader(`{"items":[{"id":"x","vector":[1,2,3,4]}]}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on dim mismatch, got %d", resp.StatusCode)
}
}
func TestHandleAdd_EmptyID_400(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
http.Post(srv.URL+"/vectors/index", "application/json",
strings.NewReader(`{"name":"id_test","dimension":4}`))
resp, err := http.Post(srv.URL+"/vectors/index/id_test/add", "application/json",
strings.NewReader(`{"items":[{"id":"","vector":[1,2,3,4]}]}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on empty id, got %d", resp.StatusCode)
}
}
func TestHandleSearch_IndexNotFound_404(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Post(srv.URL+"/vectors/index/missing/search", "application/json",
strings.NewReader(`{"vector":[1,2,3,4],"k":5}`))
if err != nil {
t.Fatalf("POST: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNotFound {
t.Errorf("expected 404 on search of missing index, got %d", resp.StatusCode)
}
}
func TestHandleDelete_NotFound_404(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
req, _ := http.NewRequest(http.MethodDelete, srv.URL+"/vectors/index/missing", nil)
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("DELETE: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNotFound {
t.Errorf("expected 404 deleting missing index, got %d", resp.StatusCode)
}
}
func TestHandleList_EmptyShape(t *testing.T) {
r := mountedRouter()
srv := httptest.NewServer(r)
defer srv.Close()
resp, err := http.Get(srv.URL + "/vectors/index")
if err != nil {
t.Fatalf("GET: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("expected 200, got %d", resp.StatusCode)
}
}
// TestHandleMerge end-to-end via mountedRouter (no external HTTP):
// create source + dest indexes, populate source, merge with
// clear_source=true, assert dest gained the items, source emptied.
// Closes OPEN #1 — locks the merge contract at unit level so a
// future regression on the IDs/Lookup/Add/Delete chain fails here
// before any operator hits "merge again" and silently moves nothing.
func TestHandleMerge_HappyPath_DrainAndClear(t *testing.T) {
h := &handlers{reg: vectord.NewRegistry()}
r := chi.NewRouter()
h.register(r)
srv := httptest.NewServer(r)
defer srv.Close()
// Create both indexes (4-d for test simplicity).
for _, name := range []string{"fresh_test", "main_test"} {
body := `{"name":"` + name + `","dimension":4,"distance":"cosine"}`
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
if err != nil {
t.Fatalf("create %s: %v", name, err)
}
resp.Body.Close()
}
// Populate fresh_test with 3 items.
addBody := `{"items":[
{"id":"f-1","vector":[1,0,0,0],"metadata":{"name":"fresh-001"}},
{"id":"f-2","vector":[0,1,0,0],"metadata":{"name":"fresh-002"}},
{"id":"f-3","vector":[0,0,1,0],"metadata":{"name":"fresh-003"}}
]}`
resp, err := http.Post(srv.URL+"/vectors/index/fresh_test/add", "application/json", strings.NewReader(addBody))
if err != nil || resp.StatusCode != http.StatusOK {
t.Fatalf("add to fresh_test: status=%d err=%v", resp.StatusCode, err)
}
resp.Body.Close()
// Pre-seed main_test with one item that ALSO exists in fresh
// (collision) so we exercise the skipped_already_present path.
preBody := `{"items":[{"id":"f-1","vector":[1,0,0,0],"metadata":{"name":"main-collision"}}]}`
resp, err = http.Post(srv.URL+"/vectors/index/main_test/add", "application/json", strings.NewReader(preBody))
if err != nil || resp.StatusCode != http.StatusOK {
t.Fatalf("add collision to main_test: status=%d err=%v", resp.StatusCode, err)
}
resp.Body.Close()
// Merge fresh_test → main_test, clearing source.
mergeBody := `{"dest":"main_test","clear_source":true}`
resp, err = http.Post(srv.URL+"/vectors/index/fresh_test/merge", "application/json", strings.NewReader(mergeBody))
if err != nil {
t.Fatalf("merge: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("expected 200 on merge, got %d", resp.StatusCode)
}
var out mergeResponse
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
t.Fatalf("decode merge resp: %v", err)
}
if out.Merged != 2 {
t.Errorf("expected 2 merged (f-2 + f-3), got %d", out.Merged)
}
if out.SkippedAlreadyPresent != 1 {
t.Errorf("expected 1 skipped (f-1 collision), got %d", out.SkippedAlreadyPresent)
}
if out.LengthSource != 0 {
t.Errorf("expected source emptied, got len=%d", out.LengthSource)
}
if out.LengthDest != 3 {
t.Errorf("expected dest len=3 after merge, got %d", out.LengthDest)
}
}
func TestHandleMerge_DimensionMismatch_400(t *testing.T) {
h := &handlers{reg: vectord.NewRegistry()}
r := chi.NewRouter()
h.register(r)
srv := httptest.NewServer(r)
defer srv.Close()
for _, c := range []struct{ name string; dim int }{
{"src_4d", 4},
{"dst_8d", 8},
} {
body := `{"name":"` + c.name + `","dimension":` + strconv.Itoa(c.dim) + `,"distance":"cosine"}`
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
if err != nil {
t.Fatalf("create %s: %v", c.name, err)
}
resp.Body.Close()
}
resp, err := http.Post(srv.URL+"/vectors/index/src_4d/merge", "application/json",
strings.NewReader(`{"dest":"dst_8d"}`))
if err != nil {
t.Fatalf("merge: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 on dim mismatch, got %d", resp.StatusCode)
}
}
func TestHandleMerge_DestNotFound_404(t *testing.T) {
h := &handlers{reg: vectord.NewRegistry()}
r := chi.NewRouter()
h.register(r)
srv := httptest.NewServer(r)
defer srv.Close()
body := `{"name":"only_src","dimension":4}`
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
if err != nil {
t.Fatalf("create: %v", err)
}
resp.Body.Close()
resp, err = http.Post(srv.URL+"/vectors/index/only_src/merge", "application/json",
strings.NewReader(`{"dest":"missing_dest"}`))
if err != nil {
t.Fatalf("merge: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNotFound {
t.Errorf("expected 404 for missing dest, got %d", resp.StatusCode)
}
}
func TestHandleMerge_SameSourceDest_400(t *testing.T) {
h := &handlers{reg: vectord.NewRegistry()}
r := chi.NewRouter()
h.register(r)
srv := httptest.NewServer(r)
defer srv.Close()
body := `{"name":"self","dimension":4}`
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
if err != nil {
t.Fatalf("create: %v", err)
}
resp.Body.Close()
resp, err = http.Post(srv.URL+"/vectors/index/self/merge", "application/json",
strings.NewReader(`{"dest":"self"}`))
if err != nil {
t.Fatalf("merge: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusBadRequest {
t.Errorf("expected 400 for self-merge, got %d", resp.StatusCode)
}
}
func TestSearchK_DefaultsAndMax(t *testing.T) {
if defaultK <= 0 {
t.Errorf("defaultK = %d, must be > 0", defaultK)
}
if maxK < defaultK {
t.Errorf("maxK=%d < defaultK=%d", maxK, defaultK)
}
// Sanity bounds.
if maxK > 100_000 {
t.Errorf("maxK=%d unreasonably large", maxK)
}
}