Substantial wave addressing all 4 prior OPEN items. Three closed in full, one partially (the speculative half deliberately deferred). OPEN #1 — Periodic fresh→main index merge (FULL): - POST /v1/vectors/index/{src}/merge with {dest, clear_source} - Idempotent on re-runs (existing-in-dest items skipped) - internal/vectord/index.go: new Index.IDs() snapshot method + i.ids tracker field as canonical ID set, independent of meta map's nil-vs-{} sparseness (was a real bug — IDs() backed by meta alone missed items added with nil metadata) - 4 cmd-level integration tests (happy path drain+clear, dim mismatch, dest not found, self-merge rejection) + 1 unit test - DecodeIndex backward-compat: old envelopes restore i.ids from meta keys (best effort; new items going forward use the tracker) OPEN #2 — Distillation SFT export (SUBSTRATE): - internal/distillation/sft_export.go ports the load-bearing half: IsSftNever predicate + ListScoredRunFiles (data/scored-runs/YYYY/ MM/DD walk) + LoadScoredRunsFromFile + partial ExportSft. - Synthesis (instruction/input/response generation) deferred to a separate wave — too big for this session, but the substrate makes the next wave a port-not-design exercise. - TestSftNever_PinsExpectedSet locks the contamination firewall set: if a future commit adds/removes from SftNever, this test fails — forcing the change through review. - 5 new tests; firewall fires end-to-end through the partial port. OPEN #3 — Distribution drift via PSI (FULL): - internal/drift/drift.go: ComputeDistributionDrift via Population Stability Index. Standard finance/risk metric, well-defined verdict tiers (stable < 0.10, minor 0.10–0.25, major ≥ 0.25). - Equal-width bucketing over combined min/max so neither dist falls outside; epsilon-clamping for empty buckets so log doesn't blow up. Per-bucket breakdown for drilldown. - Pairs with the existing ComputeScorerDrift: scorer drift is categorical, distribution drift is continuous. Different shapes, same package. - 7 new tests covering identical-is-stable, hard-shift-is-major, moderate-detected-not-stable, empty-inputs-safe, all-identical- safe, bucket-counts-conserved, num-buckets-clamping. OPEN #4 — Ops nice-to-haves (PARTIAL — wall-clock done, others deferred): - (a) Real-time wall-clock for stress harness: per-phase elapsed time logged to stdout as it runs (`[stress] phase NAME starting (T+12.3s)` + `[stress] phase NAME done — 8.5s (T+20.8s)`). Output.PhaseTimings + Output.TotalElapsedMs in JSON. - (b) chatd fixture-mode S3 mock + (c) liberal-paraphrase calibration: not actioned — no fired trigger, would be speculative. Documented as deferred-until-need rather than ignored. Per the project's discipline ("don't add features beyond what the task requires"). OPEN list now empty / steady-state. Future items will land as production triggers fire. Build + vet + tests green; 18 new tests across the 4 closures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
420 lines
12 KiB
Go
420 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/vectord"
|
|
)
|
|
|
|
// Closes R-005 for vectord: cmd-level tests for the 6 routes.
|
|
// Persistence-disabled mode (h.persist == nil) is the test config —
|
|
// keeps tests pure-in-memory; persistence is covered by g1p_smoke +
|
|
// proof GOLAKE-070.
|
|
|
|
func mountedRouter() chi.Router {
|
|
h := &handlers{reg: vectord.NewRegistry()}
|
|
r := chi.NewRouter()
|
|
h.register(r)
|
|
return r
|
|
}
|
|
|
|
func TestRoutesMounted(t *testing.T) {
|
|
r := mountedRouter()
|
|
want := map[string]bool{
|
|
"POST /vectors/index": false,
|
|
"GET /vectors/index": false,
|
|
"GET /vectors/index/{name}": false,
|
|
"DELETE /vectors/index/{name}": false,
|
|
"POST /vectors/index/{name}/add": false,
|
|
"POST /vectors/index/{name}/search": false,
|
|
"POST /vectors/index/{name}/merge": false,
|
|
}
|
|
chi.Walk(r, func(method, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error {
|
|
key := method + " " + route
|
|
if _, ok := want[key]; ok {
|
|
want[key] = true
|
|
}
|
|
return nil
|
|
})
|
|
for sig, found := range want {
|
|
if !found {
|
|
t.Errorf("expected route %q mounted", sig)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestHandleCreate_HappyPath_201(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader(`{"name":"test_idx","dimension":4}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusCreated {
|
|
t.Errorf("expected 201 on create, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleCreate_MissingDim_400(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader(`{"name":"missing_dim"}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on missing dim, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleCreate_MalformedJSON_400(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader("not json"))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on malformed, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleCreate_BodyTooLarge(t *testing.T) {
|
|
// 4xx range — see embedd's TestHandleEmbed_BodyTooLarge for the
|
|
// 413-vs-400 unwrap nuance. Contract is "client error, fails loud."
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
big := bytes.Repeat([]byte("x"), maxRequestBytes+(1<<20))
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", bytes.NewReader(big))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode < 400 || resp.StatusCode >= 500 {
|
|
t.Errorf("expected 4xx on oversize, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleGetIndex_NotFound_404(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Get(srv.URL + "/vectors/index/nonexistent")
|
|
if err != nil {
|
|
t.Fatalf("GET: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Errorf("expected 404, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleAdd_IndexNotFound_404(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/missing/add", "application/json",
|
|
strings.NewReader(`{"items":[{"id":"v1","vector":[1,2,3,4]}]}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Errorf("expected 404 on add to missing index, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleAdd_EmptyItems_400(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
// Create index first.
|
|
http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader(`{"name":"empty_test","dimension":4}`))
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/empty_test/add", "application/json",
|
|
strings.NewReader(`{"items":[]}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on empty items, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleAdd_DimMismatch_400(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader(`{"name":"dim_test","dimension":3}`))
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/dim_test/add", "application/json",
|
|
strings.NewReader(`{"items":[{"id":"x","vector":[1,2,3,4]}]}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on dim mismatch, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleAdd_EmptyID_400(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
http.Post(srv.URL+"/vectors/index", "application/json",
|
|
strings.NewReader(`{"name":"id_test","dimension":4}`))
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/id_test/add", "application/json",
|
|
strings.NewReader(`{"items":[{"id":"","vector":[1,2,3,4]}]}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on empty id, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleSearch_IndexNotFound_404(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/missing/search", "application/json",
|
|
strings.NewReader(`{"vector":[1,2,3,4],"k":5}`))
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Errorf("expected 404 on search of missing index, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleDelete_NotFound_404(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
req, _ := http.NewRequest(http.MethodDelete, srv.URL+"/vectors/index/missing", nil)
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("DELETE: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Errorf("expected 404 deleting missing index, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleList_EmptyShape(t *testing.T) {
|
|
r := mountedRouter()
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Get(srv.URL + "/vectors/index")
|
|
if err != nil {
|
|
t.Fatalf("GET: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Errorf("expected 200, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
// TestHandleMerge end-to-end via mountedRouter (no external HTTP):
|
|
// create source + dest indexes, populate source, merge with
|
|
// clear_source=true, assert dest gained the items, source emptied.
|
|
// Closes OPEN #1 — locks the merge contract at unit level so a
|
|
// future regression on the IDs/Lookup/Add/Delete chain fails here
|
|
// before any operator hits "merge again" and silently moves nothing.
|
|
func TestHandleMerge_HappyPath_DrainAndClear(t *testing.T) {
|
|
h := &handlers{reg: vectord.NewRegistry()}
|
|
r := chi.NewRouter()
|
|
h.register(r)
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
// Create both indexes (4-d for test simplicity).
|
|
for _, name := range []string{"fresh_test", "main_test"} {
|
|
body := `{"name":"` + name + `","dimension":4,"distance":"cosine"}`
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
|
|
if err != nil {
|
|
t.Fatalf("create %s: %v", name, err)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
// Populate fresh_test with 3 items.
|
|
addBody := `{"items":[
|
|
{"id":"f-1","vector":[1,0,0,0],"metadata":{"name":"fresh-001"}},
|
|
{"id":"f-2","vector":[0,1,0,0],"metadata":{"name":"fresh-002"}},
|
|
{"id":"f-3","vector":[0,0,1,0],"metadata":{"name":"fresh-003"}}
|
|
]}`
|
|
resp, err := http.Post(srv.URL+"/vectors/index/fresh_test/add", "application/json", strings.NewReader(addBody))
|
|
if err != nil || resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("add to fresh_test: status=%d err=%v", resp.StatusCode, err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
// Pre-seed main_test with one item that ALSO exists in fresh
|
|
// (collision) so we exercise the skipped_already_present path.
|
|
preBody := `{"items":[{"id":"f-1","vector":[1,0,0,0],"metadata":{"name":"main-collision"}}]}`
|
|
resp, err = http.Post(srv.URL+"/vectors/index/main_test/add", "application/json", strings.NewReader(preBody))
|
|
if err != nil || resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("add collision to main_test: status=%d err=%v", resp.StatusCode, err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
// Merge fresh_test → main_test, clearing source.
|
|
mergeBody := `{"dest":"main_test","clear_source":true}`
|
|
resp, err = http.Post(srv.URL+"/vectors/index/fresh_test/merge", "application/json", strings.NewReader(mergeBody))
|
|
if err != nil {
|
|
t.Fatalf("merge: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Errorf("expected 200 on merge, got %d", resp.StatusCode)
|
|
}
|
|
var out mergeResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
|
t.Fatalf("decode merge resp: %v", err)
|
|
}
|
|
if out.Merged != 2 {
|
|
t.Errorf("expected 2 merged (f-2 + f-3), got %d", out.Merged)
|
|
}
|
|
if out.SkippedAlreadyPresent != 1 {
|
|
t.Errorf("expected 1 skipped (f-1 collision), got %d", out.SkippedAlreadyPresent)
|
|
}
|
|
if out.LengthSource != 0 {
|
|
t.Errorf("expected source emptied, got len=%d", out.LengthSource)
|
|
}
|
|
if out.LengthDest != 3 {
|
|
t.Errorf("expected dest len=3 after merge, got %d", out.LengthDest)
|
|
}
|
|
}
|
|
|
|
func TestHandleMerge_DimensionMismatch_400(t *testing.T) {
|
|
h := &handlers{reg: vectord.NewRegistry()}
|
|
r := chi.NewRouter()
|
|
h.register(r)
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
for _, c := range []struct{ name string; dim int }{
|
|
{"src_4d", 4},
|
|
{"dst_8d", 8},
|
|
} {
|
|
body := `{"name":"` + c.name + `","dimension":` + strconv.Itoa(c.dim) + `,"distance":"cosine"}`
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
|
|
if err != nil {
|
|
t.Fatalf("create %s: %v", c.name, err)
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
resp, err := http.Post(srv.URL+"/vectors/index/src_4d/merge", "application/json",
|
|
strings.NewReader(`{"dest":"dst_8d"}`))
|
|
if err != nil {
|
|
t.Fatalf("merge: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 on dim mismatch, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleMerge_DestNotFound_404(t *testing.T) {
|
|
h := &handlers{reg: vectord.NewRegistry()}
|
|
r := chi.NewRouter()
|
|
h.register(r)
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
body := `{"name":"only_src","dimension":4}`
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
|
|
if err != nil {
|
|
t.Fatalf("create: %v", err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
resp, err = http.Post(srv.URL+"/vectors/index/only_src/merge", "application/json",
|
|
strings.NewReader(`{"dest":"missing_dest"}`))
|
|
if err != nil {
|
|
t.Fatalf("merge: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusNotFound {
|
|
t.Errorf("expected 404 for missing dest, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestHandleMerge_SameSourceDest_400(t *testing.T) {
|
|
h := &handlers{reg: vectord.NewRegistry()}
|
|
r := chi.NewRouter()
|
|
h.register(r)
|
|
srv := httptest.NewServer(r)
|
|
defer srv.Close()
|
|
|
|
body := `{"name":"self","dimension":4}`
|
|
resp, err := http.Post(srv.URL+"/vectors/index", "application/json", strings.NewReader(body))
|
|
if err != nil {
|
|
t.Fatalf("create: %v", err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
resp, err = http.Post(srv.URL+"/vectors/index/self/merge", "application/json",
|
|
strings.NewReader(`{"dest":"self"}`))
|
|
if err != nil {
|
|
t.Fatalf("merge: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusBadRequest {
|
|
t.Errorf("expected 400 for self-merge, got %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func TestSearchK_DefaultsAndMax(t *testing.T) {
|
|
if defaultK <= 0 {
|
|
t.Errorf("defaultK = %d, must be > 0", defaultK)
|
|
}
|
|
if maxK < defaultK {
|
|
t.Errorf("maxK=%d < defaultK=%d", maxK, defaultK)
|
|
}
|
|
// Sanity bounds.
|
|
if maxK > 100_000 {
|
|
t.Errorf("maxK=%d unreasonably large", maxK)
|
|
}
|
|
}
|