golangLAKEHOUSE/internal/vectord/batch_bench_test.go

package vectord

import (
	"fmt"
	"math/rand"
	"testing"
)

// BenchmarkSingleAdd vs BenchmarkBatchAdd quantifies the lock-amortization
// win for the HTTP-batch shape. Same N items, same vectors; one path
// takes the lock N times, the other takes it once. Run with:
//   go test ./internal/vectord/ -bench=. -benchmem -benchtime=1x
func BenchmarkSingleAdd(b *testing.B) {
	for _, n := range []int{16, 128, 1024} {
		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
			items := makeBatch(n, 768)
			for i := 0; i < b.N; i++ {
				idx := mustIndex(b)
				for _, it := range items {
					if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
						b.Fatalf("Add: %v", err)
					}
				}
			}
		})
	}
}

func BenchmarkBatchAdd(b *testing.B) {
	for _, n := range []int{16, 128, 1024} {
		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
			items := makeBatch(n, 768)
			for i := 0; i < b.N; i++ {
				idx := mustIndex(b)
				if err := idx.BatchAdd(items); err != nil {
					b.Fatalf("BatchAdd: %v", err)
				}
			}
		})
	}
}

// TestBatchAdd_IntraBatchDedup guards the 2026-04-29 scrum BLOCK:
// without dedup, coder/hnsw's "node not added" length-invariant
// panics when the same ID appears twice in one batch. Last-write-
// wins semantics; the second vector for a duplicate ID replaces the
// first.
func TestBatchAdd_IntraBatchDedup(t *testing.T) {
	idx := mustIndex(t)
	items := []BatchItem{
		{ID: "a", Vector: makeVec(768, 1)},
		{ID: "b", Vector: makeVec(768, 2)},
		{ID: "a", Vector: makeVec(768, 99)}, // duplicate — should win
	}
	if err := idx.BatchAdd(items); err != nil {
		t.Fatalf("BatchAdd: %v", err)
	}
	if idx.Len() != 2 {
		t.Errorf("Len: want 2, got %d", idx.Len())
	}
	// "a" should hold the LATER vector (the 99 one), not the first.
	v, _, ok := idx.Lookup("a")
	if !ok {
		t.Fatal("a not found")
	}
	if v[0] != 99 {
		t.Errorf("last-write-wins: want vec[0]=99, got %v", v[0])
	}
}

func makeVec(dim int, val float32) []float32 {
	v := make([]float32, dim)
	v[0] = val
	v[1] = 1 // non-zero-norm under cosine
	return v
}

func mustIndex(tb testing.TB) *Index {
	tb.Helper()
	idx, err := NewIndex(IndexParams{
		Name:      "bench",
		Dimension: 768,
		M:         DefaultM,
		EfSearch:  DefaultEfSearch,
		Distance:  DistanceCosine,
	})
	if err != nil {
		tb.Fatalf("NewIndex: %v", err)
	}
	return idx
}

func makeBatch(n, dim int) []BatchItem {
	rng := rand.New(rand.NewSource(int64(n)))
	out := make([]BatchItem, n)
	for i := range out {
		v := make([]float32, dim)
		for j := range v {
			v[j] = rng.Float32()*2 - 1
		}
		out[i] = BatchItem{ID: fmt.Sprintf("k-%06d", i), Vector: v}
	}
	return out
}