diff --git a/crates/lance-bench/src/main.rs b/crates/lance-bench/src/main.rs index 7216d37..91af48d 100644 --- a/crates/lance-bench/src/main.rs +++ b/crates/lance-bench/src/main.rs @@ -456,6 +456,26 @@ async fn build_lance_vector_index(path: &str, _dims: usize) -> Result<()> { .await .context("create_index")?; + // Also build the scalar btree on doc_id. This bench's + // measure_random_access_lance uses take(row_position) which doesn't + // need the btree, but the dataset this bench writes is also queried + // downstream by /vectors/lance/doc// (the production + // lookup path) — without this index that path falls back to a full + // table scan. Cheap to build (~1.2s on 10M rows) and matches the + // gateway's lance_migrate handler behavior so bench-produced datasets + // are immediately production-shape. + use lance_index::scalar::ScalarIndexParams; + dataset + .create_index( + &["doc_id"], + IndexType::Scalar, + Some("doc_id_btree".into()), + &ScalarIndexParams::default(), + true, + ) + .await + .context("create_index doc_id btree")?; + Ok(()) }