Replaces the per-item Add loop in the HTTP handler with one call to Index.BatchAdd, which acquires the write-lock once and pushes the whole batch through coder/hnsw's variadic Graph.Add. Pre-validation stays in the handler so per-item error messages keep their item-index precision. Microbench (internal/vectord/batch_bench_test.go) at d=768 cosine: N=16 SingleAdd 283µs/op → BatchAdd 170µs/op 1.66× N=128 SingleAdd 7.9ms/op → BatchAdd 7.5ms/op 1.05× N=1024 SingleAdd 87.5ms/op → BatchAdd 83.4ms/op 1.05× Win is biggest at staffing-driver batch sizes (N=16) where per-call lock + validation overhead is a meaningful fraction. At larger N the inner HNSW neighborhood search per insert dominates, which is the load-bearing finding for Option B (sharded indexes): the throughput ceiling lives inside the library, not at the lock, so sharding to N parallel Graphs is the only path to true concurrent-Add throughput. g1, g1p, g2 smokes all PASS post-change. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
70 lines
1.6 KiB
Go
70 lines
1.6 KiB
Go
package vectord
|
|
|
|
import (
|
|
"fmt"
|
|
"math/rand"
|
|
"testing"
|
|
)
|
|
|
|
// BenchmarkSingleAdd vs BenchmarkBatchAdd quantifies the lock-amortization
|
|
// win for the HTTP-batch shape. Same N items, same vectors; one path
|
|
// takes the lock N times, the other takes it once. Run with:
|
|
// go test ./internal/vectord/ -bench=. -benchmem -benchtime=1x
|
|
func BenchmarkSingleAdd(b *testing.B) {
|
|
for _, n := range []int{16, 128, 1024} {
|
|
b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
|
|
items := makeBatch(n, 768)
|
|
for i := 0; i < b.N; i++ {
|
|
idx := mustIndex(b)
|
|
for _, it := range items {
|
|
if err := idx.Add(it.ID, it.Vector, it.Metadata); err != nil {
|
|
b.Fatalf("Add: %v", err)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkBatchAdd(b *testing.B) {
|
|
for _, n := range []int{16, 128, 1024} {
|
|
b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
|
|
items := makeBatch(n, 768)
|
|
for i := 0; i < b.N; i++ {
|
|
idx := mustIndex(b)
|
|
if err := idx.BatchAdd(items); err != nil {
|
|
b.Fatalf("BatchAdd: %v", err)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func mustIndex(tb testing.TB) *Index {
|
|
tb.Helper()
|
|
idx, err := NewIndex(IndexParams{
|
|
Name: "bench",
|
|
Dimension: 768,
|
|
M: DefaultM,
|
|
EfSearch: DefaultEfSearch,
|
|
Distance: DistanceCosine,
|
|
})
|
|
if err != nil {
|
|
tb.Fatalf("NewIndex: %v", err)
|
|
}
|
|
return idx
|
|
}
|
|
|
|
func makeBatch(n, dim int) []BatchItem {
|
|
rng := rand.New(rand.NewSource(int64(n)))
|
|
out := make([]BatchItem, n)
|
|
for i := range out {
|
|
v := make([]float32, dim)
|
|
for j := range v {
|
|
v[j] = rng.Float32()*2 - 1
|
|
}
|
|
out[i] = BatchItem{ID: fmt.Sprintf("k-%06d", i), Vector: v}
|
|
}
|
|
return out
|
|
}
|