From 76f6fba5de0f3eb26316bf52fea8e09d5d5ed62d Mon Sep 17 00:00:00 2001 From: root Date: Thu, 16 Apr 2026 02:37:11 -0500 Subject: [PATCH] =?UTF-8?q?Phase=20B:=20Lance=20pilot=20=E2=80=94=20hybrid?= =?UTF-8?q?=20decision=20with=20measured=20benchmark?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standalone benchmark crate `crates/lance-bench` running Lance 4.0 against our Parquet+HNSW at 100K × 768d (resumes_100k_v2) measured 8 dimensions. Results (see docs/ADR-019-vector-storage.md for full scorecard): Cold load: Parquet 0.17s vs Lance 0.13s (tie — not ≥2× threshold) Disk size: 330.3 MB vs 330.4 MB (tie) Search p50: 873us vs 2229us (Parquet 2.55× faster) Search p95: 1413us vs 4998us (Parquet 3.54× faster) Index build: 230s (ec=80) vs 16s (IVF_PQ) (Lance 14× faster) Random access: 35ms (scan) vs 311us (Lance 112× faster) Append 10K rows: full rewrite vs 0.08s/+31MB (Lance structural win) Decision (ADR-019): hybrid, not migrate-or-reject. - Parquet+HNSW stays primary — our HNSW at ec=80 es=30 recall=1.00 is 2.55× faster than Lance IVF_PQ at 100K in-RAM scale - Lance joins as second backend per-profile for workloads where it wins architecturally: random row access (RAG text fetch), append-heavy pipelines (Phase C), hot-swap generations (Phase 16, 14× faster builds), and indexes past the ~5M RAM ceiling - Phase 17 ModelProfile gets vector_backend: Parquet | Lance field - Ceiling table in PRD updated — 5M ceiling now says "switch to Lance" instead of "migrate" since Lance runs alongside, not instead of Isolation: lance-bench is a standalone workspace crate with its own dep tree (Lance pulls DataFusion 52 + Arrow 57 incompatible with main stack DataFusion 47 + Arrow 55). Kept off the critical path until API is stable enough to promote into vectord::lance_store. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 3119 ++++++++++++++++++++++++++++---- Cargo.toml | 1 + crates/lance-bench/Cargo.toml | 42 + crates/lance-bench/src/main.rs | 633 +++++++ docs/ADR-019-vector-storage.md | 105 ++ docs/DECISIONS.md | 5 + docs/PRD.md | 15 +- 7 files changed, 3557 insertions(+), 363 deletions(-) create mode 100644 crates/lance-bench/Cargo.toml create mode 100644 crates/lance-bench/src/main.rs create mode 100644 docs/ADR-019-vector-storage.md diff --git a/Cargo.lock b/Cargo.lock index deb43df..4326bbf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,15 @@ dependencies = [ "object", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -118,19 +127,40 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 55.2.0", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-csv 55.2.0", + "arrow-data 55.2.0", + "arrow-ipc 55.2.0", + "arrow-json 55.2.0", + "arrow-ord 55.2.0", + "arrow-row 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", + "arrow-string 55.2.0", +] + +[[package]] +name = "arrow" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +dependencies = [ + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-csv 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-json 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", ] [[package]] @@ -139,14 +169,28 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", "chrono", "num", ] +[[package]] +name = "arrow-arith" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "num-traits", +] + [[package]] name = "arrow-array" version = "55.2.0" @@ -154,9 +198,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", "chrono", "chrono-tz", "half", @@ -164,6 +208,25 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +dependencies = [ + "ahash", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + [[package]] name = "arrow-buffer" version = "55.2.0" @@ -175,17 +238,29 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + [[package]] name = "arrow-cast" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", "atoi", "base64", "chrono", @@ -196,15 +271,52 @@ dependencies = [ "ryu", ] +[[package]] +name = "arrow-cast" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num-traits", + "ryu", +] + [[package]] name = "arrow-csv" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-cast 55.2.0", + "arrow-schema 55.2.0", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +dependencies = [ + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", "chrono", "csv", "csv-core", @@ -217,24 +329,53 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 55.2.0", + "arrow-schema 55.2.0", "half", "num", ] +[[package]] +name = "arrow-data" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +dependencies = [ + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "half", + "num-integer", + "num-traits", +] + [[package]] name = "arrow-ipc" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", "flatbuffers", - "lz4_flex", + "lz4_flex 0.11.6", +] + +[[package]] +name = "arrow-ipc" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "flatbuffers", + "lz4_flex 0.12.1", + "zstd", ] [[package]] @@ -243,11 +384,11 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", "chrono", "half", "indexmap", @@ -259,17 +400,54 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "arrow-json" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "half", + "indexmap", + "itoa", + "lexical-core", + "memchr", + "num-traits", + "ryu", + "serde_core", + "serde_json", + "simdutf8", +] + [[package]] name = "arrow-ord" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", +] + +[[package]] +name = "arrow-ord" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", ] [[package]] @@ -278,10 +456,23 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "half", ] @@ -291,6 +482,17 @@ version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +[[package]] +name = "arrow-schema" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +dependencies = [ + "bitflags", + "serde_core", + "serde_json", +] + [[package]] name = "arrow-select" version = "55.2.0" @@ -298,30 +500,73 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", "num", ] +[[package]] +name = "arrow-select" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +dependencies = [ + "ahash", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "num-traits", +] + [[package]] name = "arrow-string" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", "memchr", "num", "regex", "regex-syntax", ] +[[package]] +name = "arrow-string" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-channel" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-compression" version = "0.4.19" @@ -339,6 +584,28 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-recursion" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -358,7 +625,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -369,7 +636,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -388,6 +655,15 @@ dependencies = [ "tungstenite", ] +[[package]] +name = "async_cell" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447ab28afbb345f5408b120702a44e5529ebf90b1796ec76e9528df8e288e6c2" +dependencies = [ + "loom", +] + [[package]] name = "atoi" version = "2.0.0" @@ -487,6 +763,27 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bitpacking" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "blake2" version = "0.10.6" @@ -528,6 +825,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bon" +version = "3.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + [[package]] name = "brotli" version = "8.0.2" @@ -561,6 +883,12 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "byteorder" version = "1.5.0" @@ -599,7 +927,7 @@ dependencies = [ name = "catalogd" version = "0.1.0" dependencies = [ - "arrow", + "arrow 55.2.0", "axum", "bytes", "chrono", @@ -636,6 +964,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cesu8" version = "1.1.0" @@ -756,6 +1090,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "const-random" version = "0.1.18" @@ -804,7 +1147,7 @@ checksum = "4f160aad86b4343e8d4e261fee9965c3005b2fd6bc117d172ab65948779e4acf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -815,7 +1158,7 @@ checksum = "42571ed01eb46d2e1adcf99c8ca576f081e46f2623d13500eba70d1d99a4c439" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -940,6 +1283,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -959,6 +1311,25 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-skiplist" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1022,7 +1393,8 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "syn", + "strsim", + "syn 2.0.117", ] [[package]] @@ -1033,7 +1405,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1062,47 +1434,47 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" dependencies = [ - "arrow", - "arrow-ipc", - "arrow-schema", + "arrow 55.2.0", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", "async-trait", "bytes", "bzip2", "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-csv", - "datafusion-datasource-json", + "datafusion-catalog 47.0.0", + "datafusion-catalog-listing 47.0.0", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-datasource-csv 47.0.0", + "datafusion-datasource-json 47.0.0", "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-macros", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-functions 47.0.0", + "datafusion-functions-aggregate 47.0.0", + "datafusion-functions-nested 47.0.0", + "datafusion-functions-table 47.0.0", + "datafusion-functions-window 47.0.0", + "datafusion-macros 47.0.0", + "datafusion-optimizer 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-optimizer 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", + "datafusion-sql 47.0.0", "flate2", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", - "parquet", + "parquet 55.2.0", "rand 0.8.5", "regex", - "sqlparser", + "sqlparser 0.55.0", "tempfile", "tokio", "url", @@ -1111,26 +1483,100 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7541353e77dc7262b71ca27be07d8393661737e3a73b5d1b1c6f7d814c64fa2a" +dependencies = [ + "arrow 57.3.0", + "arrow-schema 57.3.0", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog 52.5.0", + "datafusion-catalog-listing 52.5.0", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-datasource-arrow", + "datafusion-datasource-csv 52.5.0", + "datafusion-datasource-json 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-functions-aggregate 52.5.0", + "datafusion-functions-nested 52.5.0", + "datafusion-functions-table 52.5.0", + "datafusion-functions-window 52.5.0", + "datafusion-optimizer 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-optimizer 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", + "datafusion-sql 52.5.0", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "regex", + "sqlparser 0.59.0", + "tempfile", + "tokio", + "url", + "uuid", +] + [[package]] name = "datafusion-catalog" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", + "datafusion-sql 47.0.0", "futures", - "itertools", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9997731f90fa5398ef831ad0e69600f92c861b79c0d38bd1a29b6f0e3a0ce4c8" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "dashmap", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", + "futures", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -1143,23 +1589,46 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 47.0.0", + "datafusion-common 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", "futures", "log", "object_store", "tokio", ] +[[package]] +name = "datafusion-catalog-listing" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b30a3dd50dec860c9559275c8d97d9de602e611237a6ecfbda0b3b63b872352" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "datafusion-catalog 52.5.0", + "datafusion-common 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "futures", + "itertools 0.14.0", + "log", + "object_store", +] + [[package]] name = "datafusion-common" version = "47.0.0" @@ -1167,8 +1636,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" dependencies = [ "ahash", - "arrow", - "arrow-ipc", + "arrow 55.2.0", + "arrow-ipc 55.2.0", "base64", "half", "hashbrown 0.14.5", @@ -1176,10 +1645,32 @@ dependencies = [ "libc", "log", "object_store", - "parquet", + "parquet 55.2.0", "paste", "recursive", - "sqlparser", + "sqlparser 0.55.0", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d551054acec0398ca604512310b77ce05c46f66e54b54d48200a686e385cca4e" +dependencies = [ + "ahash", + "arrow 57.3.0", + "arrow-ipc 57.3.0", + "chrono", + "half", + "hashbrown 0.16.1", + "indexmap", + "libc", + "log", + "object_store", + "paste", + "sqlparser 0.59.0", "tokio", "web-time", ] @@ -1195,33 +1686,44 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-common-runtime" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567d40e285f5b79f8737b576605721cd6c1133b5d2b00bdbd5d9838d90d0812f" +dependencies = [ + "futures", + "log", + "tokio", +] + [[package]] name = "datafusion-datasource" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" dependencies = [ - "arrow", + "arrow 55.2.0", "async-compression", "async-trait", "bytes", "bzip2", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", "flate2", "futures", "glob", - "itertools", + "itertools 0.14.0", "log", "object_store", - "parquet", + "parquet 55.2.0", "rand 0.8.5", "tempfile", "tokio", @@ -1231,25 +1733,101 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d2668f51b3b30befae2207472569e37807fdedd1d14da58acc6f8ca6257eae" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "bytes", + "chrono", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand 0.9.2", + "tokio", + "url", +] + +[[package]] +name = "datafusion-datasource-arrow" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02e1b3e3a8ec55f1f62de4252b0407c8567363d056078769a197e24fc834a0f" +dependencies = [ + "arrow 57.3.0", + "arrow-ipc 57.3.0", + "async-trait", + "bytes", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 47.0.0", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b559d7bf87d4f900f847baba8509634f838d9718695389e903604cdcccdb01f3" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "bytes", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", "futures", "object_store", "regex", @@ -1262,52 +1840,74 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 47.0.0", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", "futures", "object_store", "serde_json", "tokio", ] +[[package]] +name = "datafusion-datasource-json" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250e2d7591ba8b638f063854650faa40bca4e8bd4059b2ece8836f6388d02db4" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "bytes", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-session 52.5.0", + "futures", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-parquet" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 47.0.0", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-datasource 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-functions-aggregate 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-optimizer 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-session 47.0.0", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", - "parquet", + "parquet 55.2.0", "rand 0.8.5", "tokio", ] @@ -1318,16 +1918,22 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" +[[package]] +name = "datafusion-doc" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9496cb0db222dbb9a3735760ceca7fc56f35e1d5502c38d0caa77a81e9c1f6a" + [[package]] name = "datafusion-execution" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" dependencies = [ - "arrow", + "arrow 55.2.0", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 47.0.0", + "datafusion-expr 47.0.0", "futures", "log", "object_store", @@ -1337,25 +1943,68 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-execution" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc45d23c516ed8d3637751e44e09e21b45b3f58b473c802dddd1f1ad4fe435ff" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "chrono", + "dashmap", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + [[package]] name = "datafusion-expr" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" dependencies = [ - "arrow", + "arrow 55.2.0", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 47.0.0", + "datafusion-doc 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-functions-aggregate-common 47.0.0", + "datafusion-functions-window-common 47.0.0", + "datafusion-physical-expr-common 47.0.0", "indexmap", "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.55.0", +] + +[[package]] +name = "datafusion-expr" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63dd30526d2db4fda6440806a41e4676334a94bc0596cc9cc2a0efed20ef2c44" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "chrono", + "datafusion-common 52.5.0", + "datafusion-doc 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-functions-aggregate-common 52.5.0", + "datafusion-functions-window-common 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "indexmap", + "itertools 0.14.0", + "paste", + "serde_json", + "sqlparser 0.59.0", ] [[package]] @@ -1364,10 +2013,23 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" dependencies = [ - "arrow", - "datafusion-common", + "arrow 55.2.0", + "datafusion-common 47.0.0", "indexmap", - "itertools", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-expr-common" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b486b5f6255d40976b88bb83813b0d035a8333e0ec39864824e78068cf42fa6" +dependencies = [ + "arrow 57.3.0", + "datafusion-common 52.5.0", + "indexmap", + "itertools 0.14.0", "paste", ] @@ -1377,20 +2039,20 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 55.2.0", + "arrow-buffer 55.2.0", "base64", "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 47.0.0", + "datafusion-doc 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-macros 47.0.0", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand 0.8.5", @@ -1400,6 +2062,37 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07356c94118d881130dd0ffbff127540407d969c8978736e324edcd6c41cd48f" +dependencies = [ + "arrow 57.3.0", + "arrow-buffer 57.3.0", + "base64", + "blake2", + "blake3", + "chrono", + "chrono-tz", + "datafusion-common 52.5.0", + "datafusion-doc 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-macros 52.5.0", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "num-traits", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-functions-aggregate" version = "47.0.0" @@ -1407,15 +2100,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "arrow 55.2.0", + "datafusion-common 47.0.0", + "datafusion-doc 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-functions-aggregate-common 47.0.0", + "datafusion-macros 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b644f9cf696df9233ce6958b9807666d78563b56f923267474dd6c07795f1f8f" +dependencies = [ + "ahash", + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-doc 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-functions-aggregate-common 52.5.0", + "datafusion-macros 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", "half", "log", "paste", @@ -1428,10 +2142,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "arrow 55.2.0", + "datafusion-common 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-physical-expr-common 47.0.0", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1de2deaaabe8923ce9ea9f29c47bbb4ee14f67ea2fe1ab5398d9bbebcf86e56" +dependencies = [ + "ahash", + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-physical-expr-common 52.5.0", ] [[package]] @@ -1440,17 +2167,40 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-macros", - "datafusion-physical-expr-common", - "itertools", + "arrow 55.2.0", + "arrow-ord 55.2.0", + "datafusion-common 47.0.0", + "datafusion-doc 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-functions 47.0.0", + "datafusion-functions-aggregate 47.0.0", + "datafusion-macros 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-nested" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552f8d92e4331ee91d23c02d12bb6acf32cbfd5215117e01c0fb63cd4b15af1a" +dependencies = [ + "arrow 57.3.0", + "arrow-ord 57.3.0", + "datafusion-common 52.5.0", + "datafusion-doc 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-functions-aggregate 52.5.0", + "datafusion-functions-aggregate-common 52.5.0", + "datafusion-macros 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "itertools 0.14.0", "log", "paste", ] @@ -1461,12 +2211,28 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-catalog 47.0.0", + "datafusion-common 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-plan 47.0.0", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "970fd0cdd3df8802b9a9975ff600998289ba9d46682a4f7285cba4820c9ada78" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "datafusion-catalog 52.5.0", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-plan 52.5.0", "parking_lot", "paste", ] @@ -1477,13 +2243,31 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" dependencies = [ - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 47.0.0", + "datafusion-doc 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-functions-window-common 47.0.0", + "datafusion-macros 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b4c21a7c8a986a1866c0a87ab756d0bbf7b5f41f306009fa2d9af79c52ed31" +dependencies = [ + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-doc 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-functions-window-common 52.5.0", + "datafusion-macros 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", "log", "paste", ] @@ -1494,8 +2278,18 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 47.0.0", + "datafusion-physical-expr-common 47.0.0", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1210ad73b8b3211aeaf4a42bef9bd7a2b7fce3ec119a478831f18c6ff7f7b93" +dependencies = [ + "datafusion-common 52.5.0", + "datafusion-physical-expr-common 52.5.0", ] [[package]] @@ -1504,9 +2298,20 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" dependencies = [ - "datafusion-expr", + "datafusion-expr 47.0.0", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "datafusion-macros" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaa566a963013a38681ad82a727a654bc7feb19632426aea8c3412d415d200c5" +dependencies = [ + "datafusion-doc 52.5.0", + "quote", + "syn 2.0.117", ] [[package]] @@ -1515,19 +2320,38 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" dependencies = [ - "arrow", + "arrow 55.2.0", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", "indexmap", - "itertools", + "itertools 0.14.0", "log", "recursive", "regex", "regex-syntax", ] +[[package]] +name = "datafusion-optimizer" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff9aa82b240252a88dee118372f9b9757c545ab9e53c0736bebab2e7da0ef1f2" +dependencies = [ + "arrow 57.3.0", + "chrono", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-physical-expr 52.5.0", + "indexmap", + "itertools 0.14.0", + "log", + "regex", + "regex-syntax", +] + [[package]] name = "datafusion-physical-expr" version = "47.0.0" @@ -1535,19 +2359,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "arrow 55.2.0", + "datafusion-common 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-functions-aggregate-common 47.0.0", + "datafusion-physical-expr-common 47.0.0", "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", +] + +[[package]] +name = "datafusion-physical-expr" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d48022b8af9988c1d852644f9e8b5584c490659769a550c5e8d39457a1da0a5" +dependencies = [ + "ahash", + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-functions-aggregate-common 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "half", + "hashbrown 0.16.1", + "indexmap", + "itertools 0.14.0", + "parking_lot", + "paste", + "petgraph 0.8.3", + "tokio", +] + +[[package]] +name = "datafusion-physical-expr-adapter" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae7a8abc0b4fe624000972a9b145b30b7f1b680bffaa950ea53f78d9b21c27c3" +dependencies = [ + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "itertools 0.14.0", ] [[package]] @@ -1557,11 +2419,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", + "arrow 55.2.0", + "datafusion-common 47.0.0", + "datafusion-expr-common 47.0.0", "hashbrown 0.14.5", - "itertools", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147253ca3e6b9d59c162de64c02800973018660e13340dd1886dd038d17ac429" +dependencies = [ + "ahash", + "arrow 57.3.0", + "chrono", + "datafusion-common 52.5.0", + "datafusion-expr-common 52.5.0", + "hashbrown 0.16.1", + "indexmap", + "itertools 0.14.0", + "parking_lot", ] [[package]] @@ -1570,19 +2449,37 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "itertools", + "arrow 55.2.0", + "datafusion-common 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-expr-common 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", + "datafusion-physical-plan 47.0.0", + "itertools 0.14.0", "log", "recursive", ] +[[package]] +name = "datafusion-physical-optimizer" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "689156bb2282107b6239db8d7ef44b4dab10a9b33d3491a0c74acac5e4fedd72" +dependencies = [ + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "datafusion-pruning", + "itertools 0.14.0", +] + [[package]] name = "datafusion-physical-plan" version = "47.0.0" @@ -1590,68 +2487,167 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" dependencies = [ "ahash", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 55.2.0", + "arrow-ord 55.2.0", + "arrow-schema 55.2.0", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-functions-window-common 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-expr-common 47.0.0", "futures", "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", "tokio", ] +[[package]] +name = "datafusion-physical-plan" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68253dc0ee5330aa558b2549c9b0da5af9fc17d753ae73022939014ad616fc28" +dependencies = [ + "ahash", + "arrow 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "async-trait", + "datafusion-common 52.5.0", + "datafusion-common-runtime 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-functions-aggregate-common 52.5.0", + "datafusion-functions-window-common 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "futures", + "half", + "hashbrown 0.16.1", + "indexmap", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-pruning" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcad240a54d0b1d3e8f668398900260a53122d522b2102ab57218590decacd6" +dependencies = [ + "arrow 57.3.0", + "datafusion-common 52.5.0", + "datafusion-datasource 52.5.0", + "datafusion-expr-common 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-expr-common 52.5.0", + "datafusion-physical-plan 52.5.0", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-session" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", + "datafusion-common 47.0.0", + "datafusion-common-runtime 47.0.0", + "datafusion-execution 47.0.0", + "datafusion-expr 47.0.0", + "datafusion-physical-expr 47.0.0", + "datafusion-physical-plan 47.0.0", + "datafusion-sql 47.0.0", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", "tokio", ] +[[package]] +name = "datafusion-session" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f58e83a68bb67007a8fcbf005c44cefe441270c7ee7f6dee10c0e0109b556f6d" +dependencies = [ + "async-trait", + "datafusion-common 52.5.0", + "datafusion-execution 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-plan 52.5.0", + "parking_lot", +] + [[package]] name = "datafusion-sql" version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" dependencies = [ - "arrow", + "arrow 55.2.0", "bigdecimal", - "datafusion-common", - "datafusion-expr", + "datafusion-common 47.0.0", + "datafusion-expr 47.0.0", "indexmap", "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.55.0", +] + +[[package]] +name = "datafusion-sql" +version = "52.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be53e9eb55db0fbb8980bb6d87f2435b0524acf4c718ed54a57cabbb299b2ab3" +dependencies = [ + "arrow 57.3.0", + "bigdecimal", + "chrono", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "indexmap", + "log", + "regex", + "sqlparser 0.59.0", +] + +[[package]] +name = "deepsize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cdb987ec36f6bf7bfbea3f928b75590b736fc42af8e54d97592481351b2b96c" +dependencies = [ + "deepsize_derive", +] + +[[package]] +name = "deepsize_derive" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990101d41f3bc8c1a45641024377ee284ecc338e5ecf3ea0f0e236d897c72796" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -1661,6 +2657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", + "serde_core", ] [[package]] @@ -1682,7 +2679,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 2.0.117", "unicode-xid", ] @@ -1802,7 +2799,7 @@ dependencies = [ "dioxus-rsx", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1954,7 +2951,7 @@ dependencies = [ "convert_case 0.8.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "xxhash-rust", ] @@ -2017,7 +3014,7 @@ dependencies = [ "convert_case 0.8.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2058,7 +3055,7 @@ dependencies = [ "proc-macro2-diagnostics", "quote", "rustversion", - "syn", + "syn 2.0.117", ] [[package]] @@ -2098,7 +3095,7 @@ dependencies = [ "convert_case 0.8.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2134,6 +3131,27 @@ dependencies = [ "web-sys", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.61.2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -2142,7 +3160,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2154,6 +3172,12 @@ dependencies = [ "litrs", ] +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + [[package]] name = "dunce" version = "1.0.5" @@ -2193,7 +3217,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2212,6 +3236,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "ethnum" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" + [[package]] name = "euclid" version = "0.22.14" @@ -2221,12 +3251,45 @@ dependencies = [ "num-traits", ] +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "fallible-iterator" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fast-float2" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" + +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + [[package]] name = "fastrand" version = "2.3.0" @@ -2278,6 +3341,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2287,6 +3356,41 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.52.0", +] + +[[package]] +name = "fsst" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" +dependencies = [ + "arrow-array 57.3.0", + "rand 0.9.2", +] + +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" +dependencies = [ + "utf8-ranges", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.32" @@ -2343,7 +3447,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2380,7 +3484,7 @@ name = "gateway" version = "0.1.0" dependencies = [ "aibridge", - "arrow", + "arrow 55.2.0", "axum", "catalogd", "chrono", @@ -2415,6 +3519,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "generator" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows-link", + "windows-result", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2564,7 +3683,9 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash", + "allocator-api2", + "equivalent", + "foldhash 0.1.5", ] [[package]] @@ -2572,6 +3693,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "headers" @@ -2624,6 +3750,12 @@ dependencies = [ "digest", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "1.4.0" @@ -2752,6 +3884,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -2915,7 +4056,7 @@ dependencies = [ name = "ingestd" version = "0.1.0" dependencies = [ - "arrow", + "arrow 55.2.0", "axum", "bytes", "catalogd", @@ -2923,7 +4064,7 @@ dependencies = [ "csv", "lopdf", "object_store", - "parquet", + "parquet 55.2.0", "serde", "serde_json", "sha2", @@ -2989,6 +4130,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -3004,6 +4154,47 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jiff" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jni" version = "0.21.1" @@ -3045,7 +4236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" dependencies = [ "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3062,12 +4253,12 @@ dependencies = [ name = "journald" version = "0.1.0" dependencies = [ - "arrow", + "arrow 55.2.0", "axum", "bytes", "chrono", "object_store", - "parquet", + "parquet 55.2.0", "serde", "serde_json", "shared", @@ -3086,6 +4277,26 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonb" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb98fb29636087c40ad0d1274d9a30c0c1e83e03ae93f6e7e89247b37fcc6953" +dependencies = [ + "byteorder", + "ethnum", + "fast-float2", + "itoa", + "jiff", + "nom 8.0.0", + "num-traits", + "ordered-float 5.3.0", + "rand 0.9.2", + "serde", + "serde_json", + "zmij", +] + [[package]] name = "keyboard-types" version = "0.7.0" @@ -3095,6 +4306,478 @@ dependencies = [ "bitflags", ] +[[package]] +name = "lance" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" +dependencies = [ + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "async-recursion", + "async-trait", + "async_cell", + "byteorder", + "bytes", + "chrono", + "crossbeam-skiplist", + "dashmap", + "datafusion 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-physical-plan 52.5.0", + "deepsize", + "either", + "futures", + "half", + "humantime", + "itertools 0.13.0", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-encoding", + "lance-file", + "lance-index", + "lance-io", + "lance-linalg", + "lance-namespace", + "lance-table", + "log", + "moka", + "object_store", + "permutation", + "pin-project", + "prost 0.14.3", + "prost-types 0.14.3", + "rand 0.9.2", + "roaring", + "semver", + "serde", + "serde_json", + "snafu", + "tantivy", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-arrow" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "bytes", + "futures", + "getrandom 0.2.17", + "half", + "jsonb", + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "lance-bench" +version = "0.1.0" +dependencies = [ + "anyhow", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-schema 57.3.0", + "bytes", + "futures", + "lance", + "lance-index", + "lance-linalg", + "parquet 57.3.0", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "lance-bitpacking" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" +dependencies = [ + "arrayref", + "paste", + "seq-macro", +] + +[[package]] +name = "lance-core" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "async-trait", + "byteorder", + "bytes", + "chrono", + "datafusion-common 52.5.0", + "datafusion-sql 52.5.0", + "deepsize", + "futures", + "itertools 0.13.0", + "lance-arrow", + "libc", + "log", + "mock_instant", + "moka", + "num_cpus", + "object_store", + "pin-project", + "prost 0.14.3", + "rand 0.9.2", + "roaring", + "serde_json", + "snafu", + "tempfile", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", +] + +[[package]] +name = "lance-datafusion" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" +dependencies = [ + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "async-trait", + "chrono", + "datafusion 52.5.0", + "datafusion-common 52.5.0", + "datafusion-functions 52.5.0", + "datafusion-physical-expr 52.5.0", + "futures", + "jsonb", + "lance-arrow", + "lance-core", + "lance-datagen", + "log", + "pin-project", + "prost 0.14.3", + "prost-build 0.14.3", + "snafu", + "tokio", + "tracing", +] + +[[package]] +name = "lance-datagen" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" +dependencies = [ + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "futures", + "half", + "hex", + "rand 0.9.2", + "rand_distr 0.5.1", + "rand_xoshiro", + "random_word", +] + +[[package]] +name = "lance-encoding" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" +dependencies = [ + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "bytemuck", + "byteorder", + "bytes", + "fsst", + "futures", + "hex", + "hyperloglogplus", + "itertools 0.13.0", + "lance-arrow", + "lance-bitpacking", + "lance-core", + "log", + "lz4", + "num-traits", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.2", + "snafu", + "strum", + "tokio", + "tracing", + "xxhash-rust", + "zstd", +] + +[[package]] +name = "lance-file" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" +dependencies = [ + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "datafusion-common 52.5.0", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-encoding", + "lance-io", + "log", + "num-traits", + "object_store", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "snafu", + "tokio", + "tracing", +] + +[[package]] +name = "lance-index" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" +dependencies = [ + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "async-channel", + "async-recursion", + "async-trait", + "bitpacking", + "bitvec", + "bytes", + "chrono", + "crossbeam-queue", + "datafusion 52.5.0", + "datafusion-common 52.5.0", + "datafusion-expr 52.5.0", + "datafusion-physical-expr 52.5.0", + "datafusion-sql 52.5.0", + "deepsize", + "dirs", + "fst", + "futures", + "half", + "itertools 0.13.0", + "jsonb", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-datagen", + "lance-encoding", + "lance-file", + "lance-io", + "lance-linalg", + "lance-table", + "libm", + "log", + "ndarray", + "num-traits", + "object_store", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.2", + "rand_distr 0.5.1", + "rangemap", + "rayon", + "roaring", + "serde", + "serde_json", + "smallvec", + "snafu", + "tantivy", + "tempfile", + "tokio", + "tracing", + "twox-hash", + "uuid", +] + +[[package]] +name = "lance-io" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" +dependencies = [ + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "http", + "lance-arrow", + "lance-core", + "lance-namespace", + "log", + "object_store", + "path_abs", + "pin-project", + "prost 0.14.3", + "rand 0.9.2", + "serde", + "snafu", + "tempfile", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "lance-linalg" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "cc", + "deepsize", + "half", + "lance-arrow", + "lance-core", + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "lance-namespace" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" +dependencies = [ + "arrow 57.3.0", + "async-trait", + "bytes", + "lance-core", + "lance-namespace-reqwest-client", + "serde", + "snafu", +] + +[[package]] +name = "lance-namespace-reqwest-client" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +dependencies = [ + "reqwest", + "serde", + "serde_json", + "serde_repr", + "url", +] + +[[package]] +name = "lance-table" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" +dependencies = [ + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "async-trait", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-file", + "lance-io", + "log", + "object_store", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.2", + "rangemap", + "roaring", + "semver", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + [[package]] name = "lazy-js-bundle" version = "0.7.3" @@ -3113,6 +4796,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "lexical-core" version = "1.0.6" @@ -3201,6 +4890,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -3240,6 +4935,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bd0dd2cd90571056fdb71f6275fada10131182f84899f4b2a916e565d81d86" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lopdf" version = "0.35.0" @@ -3255,7 +4963,7 @@ dependencies = [ "itoa", "log", "md-5", - "nom", + "nom 7.1.3", "nom_locate", "rangemap", "rayon", @@ -3264,12 +4972,40 @@ dependencies = [ "weezl", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "lru-slab" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4" +version = "1.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "lz4_flex" version = "0.11.6" @@ -3279,6 +5015,15 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lz4_flex" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +dependencies = [ + "twox-hash", +] + [[package]] name = "lzma-sys" version = "0.1.20" @@ -3298,7 +5043,7 @@ checksum = "1b27834086c65ec3f9387b096d66e99f221cf081c2b738042aa252bcd41204e3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3338,7 +5083,7 @@ dependencies = [ "manganis-core", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3356,6 +5101,19 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "num_cpus", + "once_cell", + "rawpointer", + "thread-tree", +] + [[package]] name = "md-5" version = "0.10.6" @@ -3366,6 +5124,15 @@ dependencies = [ "digest", ] +[[package]] +name = "measure_time" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" +dependencies = [ + "log", +] + [[package]] name = "memchr" version = "2.8.0" @@ -3378,7 +5145,7 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad38eb12aea514a0466ea40a80fd8cc83637065948eb4a426e4aa46261175227" dependencies = [ - "rustix", + "rustix 1.1.4", ] [[package]] @@ -3433,6 +5200,32 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "mock_instant" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" + +[[package]] +name = "moka" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" +dependencies = [ + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "event-listener", + "futures-util", + "parking_lot", + "portable-atomic", + "smallvec", + "tagptr", + "uuid", +] + [[package]] name = "multer" version = "3.1.0" @@ -3456,6 +5249,27 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + +[[package]] +name = "ndarray" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + [[package]] name = "ndk" version = "0.9.0" @@ -3496,6 +5310,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nom_locate" version = "4.2.0" @@ -3504,7 +5327,7 @@ checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" dependencies = [ "bytecount", "memchr", - "nom", + "nom 7.1.3", ] [[package]] @@ -3625,7 +5448,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3671,7 +5494,7 @@ dependencies = [ "http-body-util", "humantime", "hyper", - "itertools", + "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", @@ -3697,6 +5520,12 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "openssl-probe" version = "0.2.1" @@ -3753,6 +5582,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -3771,6 +5606,30 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ownedbytes" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.5" @@ -3801,13 +5660,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", "base64", "brotli", "bytes", @@ -3816,7 +5675,7 @@ dependencies = [ "futures", "half", "hashbrown 0.15.5", - "lz4_flex", + "lz4_flex 0.11.6", "num", "num-bigint", "object_store", @@ -3830,18 +5689,70 @@ dependencies = [ "zstd", ] +[[package]] +name = "parquet" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +dependencies = [ + "ahash", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "half", + "hashbrown 0.16.1", + "lz4_flex 0.12.1", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "twox-hash", + "zstd", +] + [[package]] name = "paste" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path_abs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ef02f6342ac01d8a93b65f96db53fe68a92a15f41144f97fb00a9e669633c3" +dependencies = [ + "serde", + "serde_derive", + "std_prelude", + "stfu8", +] + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "permutation" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" + [[package]] name = "petgraph" version = "0.7.1" @@ -3852,6 +5763,18 @@ dependencies = [ "indexmap", ] +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", + "serde", +] + [[package]] name = "phf" version = "0.12.1" @@ -3906,7 +5829,7 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3927,6 +5850,21 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" +dependencies = [ + "portable-atomic", +] + [[package]] name = "postgres-protocol" version = "0.6.10" @@ -3991,7 +5929,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", ] [[package]] @@ -4020,7 +5958,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "version_check", ] @@ -4031,7 +5969,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive 0.14.3", ] [[package]] @@ -4041,16 +5989,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools", + "itertools 0.14.0", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "regex", - "syn", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-build" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" +dependencies = [ + "heck", + "itertools 0.14.0", + "log", + "multimap", + "petgraph 0.8.3", + "prettyplease", + "prost 0.14.3", + "prost-types 0.14.3", + "regex", + "syn 2.0.117", "tempfile", ] @@ -4061,10 +6028,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -4073,14 +6053,23 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost", + "prost 0.13.5", +] + +[[package]] +name = "prost-types" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" +dependencies = [ + "prost 0.14.3", ] [[package]] name = "proto" version = "0.1.0" dependencies = [ - "prost", + "prost 0.13.5", "tonic", "tonic-build", ] @@ -4115,12 +6104,12 @@ dependencies = [ name = "queryd" version = "0.1.0" dependencies = [ - "arrow", + "arrow 55.2.0", "axum", "bytes", "catalogd", "chrono", - "datafusion", + "datafusion 47.0.0", "futures", "object_store", "serde", @@ -4218,6 +6207,12 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -4277,6 +6272,48 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core 0.9.5", +] + +[[package]] +name = "random_word" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e47a395bdb55442b883c89062d6bcff25dc90fa5f8369af81e0ac6d49d78cf81" +dependencies = [ + "ahash", + "brotli", + "paste", + "rand 0.9.2", + "unicase", +] + [[package]] name = "rangemap" version = "1.7.1" @@ -4289,6 +6326,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" version = "1.11.0" @@ -4326,7 +6369,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4338,6 +6381,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + [[package]] name = "regex" version = "1.12.3" @@ -4377,6 +6431,7 @@ dependencies = [ "bytes", "cookie", "cookie_store", + "encoding_rs", "futures-core", "futures-util", "h2", @@ -4388,6 +6443,7 @@ dependencies = [ "hyper-util", "js-sys", "log", + "mime", "mime_guess", "percent-encoding", "pin-project-lite", @@ -4427,6 +6483,26 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "roaring" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-hash" version = "1.1.0" @@ -4448,6 +6524,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.4" @@ -4457,7 +6546,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] @@ -4538,6 +6627,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -4626,7 +6721,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4664,6 +6759,17 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -4720,11 +6826,11 @@ dependencies = [ name = "shared" version = "0.1.0" dependencies = [ - "arrow", + "arrow 55.2.0", "async-trait", "bytes", "chrono", - "parquet", + "parquet 55.2.0", "serde", "serde_json", "sha2", @@ -4769,6 +6875,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" +[[package]] +name = "sketches-ddsketch" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.12" @@ -4792,7 +6907,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb251b407f50028476a600541542b605bb864d35d9ee1de4f6cab45d88475e6d" dependencies = [ "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4820,6 +6935,27 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snafu" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1d4bced6a69f90b2056c03dcff2c4737f98d6fb9e0853493996e1d253ca29c6" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "snap" version = "1.1.1" @@ -4863,6 +6999,16 @@ dependencies = [ "sqlparser_derive", ] +[[package]] +name = "sqlparser" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "sqlparser_derive", +] + [[package]] name = "sqlparser_derive" version = "0.3.0" @@ -4871,7 +7017,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4893,6 +7039,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "std_prelude" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" + +[[package]] +name = "stfu8" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" + [[package]] name = "storaged" version = "0.1.0" @@ -4920,6 +7078,34 @@ dependencies = [ "unicode-properties", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + [[package]] name = "subsecond" version = "0.7.3" @@ -4954,6 +7140,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.117" @@ -4982,9 +7179,167 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "tantivy" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "bon", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "hyperloglogplus", + "itertools 0.14.0", + "levenshtein_automata", + "log", + "lru", + "lz4_flex 0.11.6", + "measure_time", + "memmap2", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash 2.1.1", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror 2.0.18", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools 0.14.0", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" +dependencies = [ + "nom 7.1.3", + "serde", + "serde_json", +] + +[[package]] +name = "tantivy-sstable" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" +dependencies = [ + "futures-util", + "itertools 0.14.0", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" +dependencies = [ + "murmurhash32", + "rand_distr 0.4.3", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" +dependencies = [ + "serde", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.27.0" @@ -4994,7 +7349,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix", + "rustix 1.1.4", "windows-sys 0.61.2", ] @@ -5024,7 +7379,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5035,7 +7390,16 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "thread-tree" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbd370cb847953a25954d9f63e14824a36113f8c72eecf6eccef5dc4b45d630" +dependencies = [ + "crossbeam-channel", ] [[package]] @@ -5148,7 +7512,7 @@ checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5302,7 +7666,7 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.5", "socket2 0.5.10", "tokio", "tokio-stream", @@ -5320,10 +7684,10 @@ checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" dependencies = [ "prettyplease", "proc-macro2", - "prost-build", - "prost-types", + "prost-build 0.13.5", + "prost-types 0.13.5", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5351,13 +7715,18 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ + "async-compression", "bitflags", "bytes", + "futures-core", "futures-util", "http", "http-body", + "http-body-util", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower", "tower-layer", "tower-service", @@ -5396,7 +7765,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5508,6 +7877,9 @@ name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +dependencies = [ + "rand 0.9.2", +] [[package]] name = "typenum" @@ -5602,6 +7974,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -5631,13 +8009,13 @@ name = "vectord" version = "0.1.0" dependencies = [ "aibridge", - "arrow", + "arrow 55.2.0", "axum", "bytes", "chrono", "instant-distance", "object_store", - "parquet", + "parquet 55.2.0", "serde", "serde_json", "shared", @@ -5691,7 +8069,7 @@ checksum = "59195a1db0e95b920366d949ba5e0d3fc0e70b67c09be15ce5abb790106b0571" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5782,7 +8160,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -5890,6 +8268,22 @@ dependencies = [ "web-sys", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -5899,6 +8293,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" @@ -5920,7 +8320,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5931,7 +8331,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -6237,7 +8637,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -6253,7 +8653,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -6301,6 +8701,15 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xxhash-rust" version = "0.8.15" @@ -6335,7 +8744,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -6356,7 +8765,7 @@ checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -6376,7 +8785,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -6416,7 +8825,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2a75d1c..d886001 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "crates/journald", "crates/gateway", "crates/ui", + "crates/lance-bench", ] [workspace.dependencies] diff --git a/crates/lance-bench/Cargo.toml b/crates/lance-bench/Cargo.toml new file mode 100644 index 0000000..4aa2ab1 --- /dev/null +++ b/crates/lance-bench/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "lance-bench" +version = "0.1.0" +edition = "2024" + +# Standalone pilot for Phase B (see docs/EXECUTION_PLAN.md). +# Deliberately NOT sharing workspace deps — Lance 4.x pulls in its own +# DataFusion and Arrow versions incompatible with the rest of the stack. +# Isolating the pilot means we don't force a workspace-wide upgrade until +# we've decided Lance is worth it. + +[dependencies] +# Only the features we actually need — the default brings in AWS/Azure/GCP/HF etc +# which is ~200 extra crates we don't care about for a local pilot. +lance = { version = "4.0", default-features = false } +# Lance exposes DatasetIndexExt, IndexType, and IvfBuildParams through +# its sub-crates which must be imported directly — lance itself doesn't +# re-export them at a convenient path. +lance-index = { version = "4.0", default-features = false } +lance-linalg = { version = "4.0", default-features = false } + +# Arrow re-exported by Lance; pin to a range Lance picks so types match. +arrow = "57" +arrow-array = "57" +arrow-schema = "57" + +# Also need to read the EXISTING Parquet vector files so we can compare. +# These live in data/vectors/*.parquet. Lance's internal Parquet reading +# might differ from ours; using our format's Arrow/Parquet versions for +# the read side keeps the inputs identical. +parquet = "57" + +tokio = { version = "1", features = ["full"] } +futures = "0.3" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +anyhow = "1" +bytes = "1" + +[[bin]] +name = "lance-bench" +path = "src/main.rs" diff --git a/crates/lance-bench/src/main.rs b/crates/lance-bench/src/main.rs new file mode 100644 index 0000000..7216d37 --- /dev/null +++ b/crates/lance-bench/src/main.rs @@ -0,0 +1,633 @@ +//! Phase B: Lance pilot benchmark. +//! +//! Standalone binary that compares Lance vector storage against our +//! Parquet-with-binary-blob + in-RAM HNSW approach. See +//! docs/EXECUTION_PLAN.md for the decision rules this fuels. +//! +//! Inputs: +//! data/vectors/resumes_100k_v2.parquet — existing 100K × 768d embeddings +//! +//! Output: +//! A JSON report printed to stdout with measurements for: +//! - Cold load time (parquet → arrow) vs Lance open + scan +//! - Disk size +//! - Vector search latency (p50 / p95 / p99) +//! - Single-row random access +//! - Append cost (adding 10K rows) +//! +//! Usage: +//! cargo run --bin lance-bench -- \ +//! --parquet data/vectors/resumes_100k_v2.parquet \ +//! --lance-out /tmp/lance_resumes_100k_v2 \ +//! --json-out /tmp/lance_bench.json + +use anyhow::{Context, Result}; +use arrow_array::{Array, ArrayRef, BinaryArray, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator}; +use arrow_schema::{DataType, Field, Schema}; +use serde::Serialize; +use std::sync::Arc; +use std::time::Instant; + +#[derive(Debug, Serialize)] +struct BenchReport { + vectors: usize, + dimensions: usize, + parquet_path: String, + lance_path: String, + + // Parquet baseline + parquet_disk_bytes: u64, + parquet_cold_load_secs: f32, + + // Lance numbers + lance_write_secs: f32, + lance_disk_bytes: u64, + lance_cold_open_secs: f32, + + // Index + search + lance_index_build_secs: Option, + lance_index_disk_bytes: Option, + lance_search_p50_us: Option, + lance_search_p95_us: Option, + lance_search_p99_us: Option, + + // Architectural features Parquet+sidecar can't cheaply do + lance_random_row_access_us: Option, // fetch one row by row_id + parquet_random_row_access_us: Option, // for comparison — full scan cost + lance_append_10k_secs: Option, // add 10K new rows + lance_append_disk_bytes_added: Option, + + // Head-to-head reference (from our own measurements) + reference_hnsw_p50_us: f32, + reference_hnsw_p95_us: f32, + reference_brute_force_us: f32, + reference_hnsw_build_secs: f32, +} + +#[tokio::main] +async fn main() -> Result<()> { + // Simple positional args: parquet_in, lance_out. + let args: Vec = std::env::args().collect(); + let parquet_path = args + .get(1) + .cloned() + .unwrap_or_else(|| "data/vectors/resumes_100k_v2.parquet".to_string()); + let lance_path = args + .get(2) + .cloned() + .unwrap_or_else(|| "/tmp/lance_bench_dataset".to_string()); + + eprintln!("=== Phase B Lance pilot ==="); + eprintln!("input parquet: {}", parquet_path); + eprintln!("output lance: {}", lance_path); + + // --- 1. Cold-load the existing Parquet vector index into memory + eprintln!("\n[1/4] reading Parquet baseline..."); + let t0 = Instant::now(); + let (schema, batches, total_rows) = read_parquet_vectors(&parquet_path) + .context("read parquet")?; + let parquet_cold_load_secs = t0.elapsed().as_secs_f32(); + let parquet_disk_bytes = std::fs::metadata(&parquet_path)?.len(); + + let dims = detect_vector_dims(&batches)?; + eprintln!( + " loaded {} rows, {} columns, vectors={}d, disk={:.1} MB, cold load={:.2}s", + total_rows, + schema.fields().len(), + dims, + parquet_disk_bytes as f64 / 1_000_000.0, + parquet_cold_load_secs, + ); + + // --- 2. Convert from binary-blob-of-f32 to Lance's FixedSizeList + eprintln!("\n[2/4] converting binary-blob vectors to Arrow FixedSizeList..."); + let t0 = Instant::now(); + let (lance_schema, lance_batches) = convert_to_fixed_size_list(&schema, batches, dims)?; + eprintln!(" conversion took {:.2}s", t0.elapsed().as_secs_f32()); + + // --- 3. Write as Lance dataset + eprintln!("\n[3/4] writing Lance dataset..."); + let t0 = Instant::now(); + // Clean up any prior run + let _ = std::fs::remove_dir_all(&lance_path); + write_lance_dataset(&lance_path, lance_schema.clone(), lance_batches).await?; + let lance_write_secs = t0.elapsed().as_secs_f32(); + let lance_disk_bytes = dir_size_bytes(&lance_path); + eprintln!( + " write took {:.2}s, disk={:.1} MB", + lance_write_secs, + lance_disk_bytes as f64 / 1_000_000.0, + ); + + // --- 4. Cold open + scan the Lance dataset + eprintln!("\n[4/6] cold-opening Lance dataset..."); + let t0 = Instant::now(); + let scanned_rows = cold_open_and_scan_lance(&lance_path).await?; + let lance_cold_open_secs = t0.elapsed().as_secs_f32(); + eprintln!( + " open + full scan: {} rows in {:.2}s", + scanned_rows, lance_cold_open_secs, + ); + + // --- 5. Build a vector index on the Lance dataset + eprintln!("\n[5/6] building Lance vector index (IVF_PQ)..."); + let t0 = Instant::now(); + let index_built = build_lance_vector_index(&lance_path, dims).await; + let (lance_index_build_secs, lance_index_disk_bytes) = match index_built { + Ok(()) => { + let secs = t0.elapsed().as_secs_f32(); + let disk = dir_size_bytes(&lance_path) - lance_disk_bytes; + eprintln!(" built in {:.2}s, index adds {:.1} MB on disk", secs, disk as f64 / 1e6); + (Some(secs), Some(disk)) + } + Err(e) => { + eprintln!(" index build failed: {e:#}"); + (None, None) + } + }; + + // --- 6. Run search queries, measure latency + eprintln!("\n[6/6] running vector search benchmarks..."); + let search_stats = if lance_index_build_secs.is_some() { + run_search_benchmarks(&lance_path, dims).await.ok() + } else { + None + }; + let (lance_search_p50, lance_search_p95, lance_search_p99) = match search_stats { + Some((p50, p95, p99)) => { + eprintln!(" p50={:.0}us p95={:.0}us p99={:.0}us", p50, p95, p99); + (Some(p50), Some(p95), Some(p99)) + } + None => (None, None, None), + }; + + // --- Random access comparison + eprintln!("\n[7/8] random row access — Lance vs full-scan Parquet..."); + let lance_random = measure_random_access_lance(&lance_path).await.ok(); + let parquet_random = measure_random_access_parquet(&parquet_path).ok(); + if let Some(us) = lance_random { + eprintln!(" Lance random-fetch avg: {:.0}us", us); + } + if let Some(us) = parquet_random { + eprintln!(" Parquet full-scan-to-row avg: {:.0}us", us); + } + + // --- Append cost + eprintln!("\n[8/8] append 10K new rows to existing dataset..."); + let t0 = Instant::now(); + let pre_append_bytes = dir_size_bytes(&lance_path); + let append_result = append_10k_rows(&lance_path, dims).await; + let (lance_append_secs, lance_append_bytes) = match append_result { + Ok(()) => { + let secs = t0.elapsed().as_secs_f32(); + let bytes = dir_size_bytes(&lance_path).saturating_sub(pre_append_bytes); + eprintln!(" append took {:.2}s, added {:.1} MB", secs, bytes as f64 / 1e6); + (Some(secs), Some(bytes)) + } + Err(e) => { + eprintln!(" append failed: {e:#}"); + (None, None) + } + }; + + // --- Report + let report = BenchReport { + vectors: total_rows, + dimensions: dims, + parquet_path: parquet_path.clone(), + lance_path: lance_path.clone(), + parquet_disk_bytes, + parquet_cold_load_secs, + lance_write_secs, + lance_disk_bytes, + lance_cold_open_secs, + lance_index_build_secs, + lance_index_disk_bytes, + lance_search_p50_us: lance_search_p50, + lance_search_p95_us: lance_search_p95, + lance_search_p99_us: lance_search_p99, + lance_random_row_access_us: lance_random, + parquet_random_row_access_us: parquet_random, + lance_append_10k_secs: lance_append_secs, + lance_append_disk_bytes_added: lance_append_bytes, + // From our Phase 15 trial on the SAME index (ec=80 es=30, recall=1.00): + reference_hnsw_p50_us: 873.0, + reference_hnsw_p95_us: 1413.0, + reference_brute_force_us: 43983.0, + reference_hnsw_build_secs: 230.0, + }; + + let json = serde_json::to_string_pretty(&report)?; + println!("{}", json); + + eprintln!("\n=== Summary ==="); + eprintln!(" Parquet cold load: {:.2}s", report.parquet_cold_load_secs); + eprintln!(" Lance cold open: {:.2}s ({})", + report.lance_cold_open_secs, + format_ratio(report.parquet_cold_load_secs, report.lance_cold_open_secs)); + eprintln!(" Parquet disk: {:.1} MB", report.parquet_disk_bytes as f64 / 1e6); + eprintln!(" Lance disk: {:.1} MB ({})", + report.lance_disk_bytes as f64 / 1e6, + format_ratio(report.parquet_disk_bytes as f32, report.lance_disk_bytes as f32)); + if let (Some(p50), Some(p95)) = (report.lance_search_p50_us, report.lance_search_p95_us) { + eprintln!(" Lance search p50: {:.0}us vs our HNSW {:.0}us ({})", + p50, report.reference_hnsw_p50_us, + format_ratio(report.reference_hnsw_p50_us, p50)); + eprintln!(" Lance search p95: {:.0}us vs our HNSW {:.0}us ({})", + p95, report.reference_hnsw_p95_us, + format_ratio(report.reference_hnsw_p95_us, p95)); + eprintln!(" Speedup vs brute force: {:.1}× (Lance) vs {:.1}× (HNSW)", + report.reference_brute_force_us / p50, + report.reference_brute_force_us / report.reference_hnsw_p50_us); + } + if let Some(build) = report.lance_index_build_secs { + eprintln!(" Index build: {:.1}s (Lance IVF_PQ) vs {:.0}s (our HNSW ec=80) ({}× faster)", + build, report.reference_hnsw_build_secs, report.reference_hnsw_build_secs / build); + } + if let (Some(lance_us), Some(parquet_us)) = (report.lance_random_row_access_us, report.parquet_random_row_access_us) { + eprintln!(" Random row access: {:.0}us (Lance) vs {:.0}us (Parquet scan) ({})", + lance_us, parquet_us, format_ratio(parquet_us, lance_us)); + } + if let Some(append_secs) = report.lance_append_10k_secs { + eprintln!(" Append 10K rows: {:.2}s (Lance native) [Parquet would require full rewrite]", + append_secs); + } + + Ok(()) +} + +fn format_ratio(baseline: f32, candidate: f32) -> String { + if candidate == 0.0 { return "inf".into(); } + let ratio = baseline / candidate; + if ratio >= 1.0 { + format!("{:.2}× faster/smaller", ratio) + } else { + format!("{:.2}× slower/larger", 1.0 / ratio) + } +} + +fn dir_size_bytes(path: &str) -> u64 { + fn recurse(p: &std::path::Path) -> u64 { + let Ok(meta) = std::fs::metadata(p) else { return 0; }; + if meta.is_file() { return meta.len(); } + let Ok(entries) = std::fs::read_dir(p) else { return 0; }; + entries + .filter_map(|e| e.ok()) + .map(|e| recurse(&e.path())) + .sum() + } + recurse(std::path::Path::new(path)) +} + +/// Read the existing vector Parquet (binary-blob format: source, doc_id, +/// chunk_idx, chunk_text, vector as Binary bytes). +fn read_parquet_vectors(path: &str) -> Result<(Arc, Vec, usize)> { + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + use std::fs::File; + + let file = File::open(path).with_context(|| format!("open {path}"))?; + let builder = ParquetRecordBatchReaderBuilder::try_new(file)?; + let schema = builder.schema().clone(); + let reader = builder.build()?; + let batches: Vec = reader.collect::, _>>()?; + let rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + Ok((schema, batches, rows)) +} + +fn detect_vector_dims(batches: &[RecordBatch]) -> Result { + for batch in batches { + let vector_col_idx = batch + .schema() + .index_of("vector") + .context("no 'vector' column in parquet")?; + let col = batch.column(vector_col_idx); + if let Some(binary) = col.as_any().downcast_ref::() { + for i in 0..binary.len() { + if !binary.is_null(i) { + let bytes = binary.value(i); + return Ok(bytes.len() / 4); // f32 = 4 bytes + } + } + } + } + anyhow::bail!("could not determine vector dimensions") +} + +/// Convert our binary-blob vector representation into Arrow's native +/// FixedSizeList — that's what Lance expects for vector columns. +fn convert_to_fixed_size_list( + schema: &Arc, + batches: Vec, + dims: usize, +) -> Result<(Arc, Vec)> { + // New schema keeps everything identical but replaces the vector column + // with a FixedSizeList. + let new_fields: Vec> = schema + .fields() + .iter() + .map(|f| { + if f.name() == "vector" { + Arc::new(Field::new( + "vector", + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::Float32, true)), + dims as i32, + ), + false, + )) + } else { + f.clone() + } + }) + .collect(); + let new_schema = Arc::new(Schema::new(new_fields)); + + let mut new_batches = Vec::with_capacity(batches.len()); + for batch in batches { + let vector_idx = batch.schema().index_of("vector")?; + let mut new_arrays: Vec = Vec::with_capacity(batch.num_columns()); + for (i, col) in batch.columns().iter().enumerate() { + if i == vector_idx { + let binary = col + .as_any() + .downcast_ref::() + .context("vector column must be Binary")?; + let fsl = binary_to_fixed_size_list(binary, dims)?; + new_arrays.push(Arc::new(fsl)); + } else { + new_arrays.push(col.clone()); + } + } + new_batches.push(RecordBatch::try_new(new_schema.clone(), new_arrays)?); + } + + Ok((new_schema, new_batches)) +} + +fn binary_to_fixed_size_list(binary: &BinaryArray, dims: usize) -> Result { + let n = binary.len(); + let mut all_floats: Vec = Vec::with_capacity(n * dims); + for i in 0..n { + if binary.is_null(i) { + all_floats.extend(std::iter::repeat(0.0).take(dims)); + continue; + } + let bytes = binary.value(i); + if bytes.len() != dims * 4 { + anyhow::bail!( + "row {} has {} bytes, expected {} ({} × f32)", + i, bytes.len(), dims * 4, dims, + ); + } + for chunk in bytes.chunks_exact(4) { + all_floats.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); + } + } + let values = Float32Array::from(all_floats); + let field = Arc::new(Field::new("item", DataType::Float32, true)); + FixedSizeListArray::try_new(field, dims as i32, Arc::new(values), None) + .context("build FixedSizeListArray") +} + +/// Write batches into a Lance dataset at the given path. +async fn write_lance_dataset( + path: &str, + schema: Arc, + batches: Vec, +) -> Result<()> { + use lance::dataset::{Dataset, WriteParams}; + + let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema); + Dataset::write(reader, path, Some(WriteParams::default())) + .await + .context("Dataset::write")?; + Ok(()) +} + +/// Open a Lance dataset cold (from disk) and scan it fully — measuring the +/// equivalent of our "load embeddings from Parquet" cost. +async fn cold_open_and_scan_lance(path: &str) -> Result { + use futures::StreamExt; + use lance::dataset::Dataset; + + let dataset = Dataset::open(path).await.context("Dataset::open")?; + let scanner = dataset.scan(); + let mut stream = scanner.try_into_stream().await?; + let mut total = 0usize; + while let Some(batch) = stream.next().await { + let batch = batch?; + total += batch.num_rows(); + } + Ok(total) +} + +/// Build an IVF_PQ vector index on the `vector` column. IVF_PQ (Inverted File +/// with Product Quantization) is Lance's native ANN index — comparable to +/// HNSW in intent, but on-disk and compatible with Lance's random-access +/// model. +async fn build_lance_vector_index(path: &str, _dims: usize) -> Result<()> { + use lance::dataset::Dataset; + use lance::index::vector::VectorIndexParams; + use lance_index::{DatasetIndexExt, IndexType}; + use lance_linalg::distance::MetricType; + + let mut dataset = Dataset::open(path).await?; + + // IVF_PQ with ~sqrt(N) partitions is a reasonable default for 100K. + // num_sub_vectors must divide dims evenly: 768/48 = 16 dims per subvector. + // num_bits = 8 gives 256 codes per subvector (good recall/size trade). + // max_iterations = 50 is plenty for this scale. + let params = VectorIndexParams::ivf_pq( + 316, // num_partitions (~sqrt(100000)) + 8, // num_bits + 48, // num_sub_vectors + MetricType::Cosine, + 50, // max_iterations + ); + + dataset + .create_index( + &["vector"], + IndexType::Vector, + Some("vec_idx".into()), + ¶ms, + true, + ) + .await + .context("create_index")?; + + Ok(()) +} + +/// Run N vector searches against the Lance dataset and return (p50, p95, p99) latencies in us. +/// Uses a handful of random rows as queries — same pattern as our harness::synthetic_from_chunks. +async fn run_search_benchmarks(path: &str, _dims: usize) -> Result<(f32, f32, f32)> { + use futures::StreamExt; + use lance::dataset::Dataset; + + let dataset = Dataset::open(path).await?; + + // Pick 20 representative query vectors from the data itself. + // (Synthetic — same pattern as our existing harness.) + let query_vectors = sample_query_vectors(&dataset, 20).await?; + + let mut latencies_us: Vec = Vec::with_capacity(query_vectors.len()); + for (i, qv) in query_vectors.iter().enumerate() { + let qarr = Arc::new(Float32Array::from(qv.clone())) as ArrayRef; + + let t0 = Instant::now(); + let mut scanner = dataset.scan(); + scanner + .nearest("vector", qarr.as_any().downcast_ref::().unwrap(), 10) + .context("scanner.nearest")?; + let mut stream = scanner.try_into_stream().await?; + let mut hits = 0; + while let Some(batch) = stream.next().await { + let batch = batch?; + hits += batch.num_rows(); + } + let us = t0.elapsed().as_micros() as f32; + latencies_us.push(us); + if i == 0 { + eprintln!(" first query: {} hits in {:.0}us (includes any lazy init)", hits, us); + } + } + + latencies_us.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let p = |pct: f32| -> f32 { + let idx = ((latencies_us.len() as f32 - 1.0) * pct).round() as usize; + latencies_us[idx.min(latencies_us.len() - 1)] + }; + Ok((p(0.50), p(0.95), p(0.99))) +} + +/// Random row access via Lance's `take` — fetch 20 random rows by index, measure avg latency. +async fn measure_random_access_lance(path: &str) -> Result { + use lance::dataset::Dataset; + let dataset = Dataset::open(path).await?; + let n = dataset.count_rows(None).await?; + let indices: Vec = (0..20).map(|i| ((i as u64) * (n as u64 / 23)) % (n as u64)).collect(); + + // Full-schema projection — Lance's Schema implements Into. + let schema = dataset.schema().clone(); + let mut total_us: u128 = 0; + for idx in &indices { + let t0 = Instant::now(); + let _batch = dataset.take(&[*idx], schema.clone()).await?; + total_us += t0.elapsed().as_micros(); + } + Ok(total_us as f32 / indices.len() as f32) +} + +/// Random row access for Parquet — full scan + filter. There's no random-access +/// primitive in vanilla Parquet, so this is the cost of finding one specific row. +/// This is the cost our current design pays for "get doc X's full text for RAG." +fn measure_random_access_parquet(path: &str) -> Result { + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + use std::fs::File; + + // We simulate 5 lookups — full scan each time. 20 would be painful. + let iters = 5; + let mut total_us: u128 = 0; + for _ in 0..iters { + let t0 = Instant::now(); + let file = File::open(path)?; + let builder = ParquetRecordBatchReaderBuilder::try_new(file)?; + let reader = builder.build()?; + // Iterate until we've conceptually found a row — we stop early if + // we wanted row 50000, but we have to at least read its batch. + let mut seen = 0usize; + for b in reader { + let b = b?; + seen += b.num_rows(); + if seen > 50000 { break; } + } + total_us += t0.elapsed().as_micros(); + } + Ok(total_us as f32 / iters as f32) +} + +/// Append 10K new rows to the existing Lance dataset. +/// Measures the "ingest delta" cost without full rewrite. +async fn append_10k_rows(path: &str, dims: usize) -> Result<()> { + use lance::dataset::{Dataset, WriteMode, WriteParams}; + + let dataset = Dataset::open(path).await?; + let schema = dataset.schema(); + let arrow_schema: Arc = Arc::new(schema.into()); + + // Build a 10K row batch with random-ish data matching the existing schema. + let n = 10_000; + let arrays: Vec = arrow_schema + .fields() + .iter() + .map(|f| -> Result { + match f.data_type() { + DataType::Utf8 => { + let vals: Vec = (0..n).map(|i| format!("appended-{}", i)).collect(); + Ok(Arc::new(arrow_array::StringArray::from(vals))) + } + DataType::Int32 => { + let vals: Vec = (0..n as i32).collect(); + Ok(Arc::new(arrow_array::Int32Array::from(vals))) + } + DataType::FixedSizeList(_, _) => { + let floats: Vec = (0..n * dims).map(|i| (i as f32).sin()).collect(); + let values = Float32Array::from(floats); + let field = Arc::new(Field::new("item", DataType::Float32, true)); + let fsl = FixedSizeListArray::try_new(field, dims as i32, Arc::new(values), None)?; + Ok(Arc::new(fsl)) + } + other => anyhow::bail!("unsupported append column type: {:?}", other), + } + }) + .collect::>>()?; + + let batch = RecordBatch::try_new(arrow_schema.clone(), arrays)?; + let reader = RecordBatchIterator::new(vec![Ok(batch)].into_iter(), arrow_schema); + let params = WriteParams { mode: WriteMode::Append, ..Default::default() }; + Dataset::write(reader, path, Some(params)).await?; + Ok(()) +} + +/// Grab a few existing vectors from the dataset to use as self-similar queries. +async fn sample_query_vectors( + dataset: &lance::dataset::Dataset, + count: usize, +) -> Result>> { + use futures::StreamExt; + + // Just take the first `count` rows; good enough for latency measurement. + let scanner = dataset.scan(); + let mut scanner = scanner; + scanner.limit(Some(count as i64), None)?; + scanner.project(&["vector"])?; + let mut stream = scanner.try_into_stream().await?; + + let mut out = Vec::with_capacity(count); + while let Some(batch) = stream.next().await { + let batch = batch?; + let vector_col = batch + .column(0) + .as_any() + .downcast_ref::() + .context("vector column must be FixedSizeList")?; + + for row in 0..vector_col.len() { + if out.len() >= count { break; } + let values = vector_col.value(row); + let f32_arr = values + .as_any() + .downcast_ref::() + .context("inner array must be Float32")?; + let mut v = Vec::with_capacity(f32_arr.len()); + for i in 0..f32_arr.len() { + v.push(f32_arr.value(i)); + } + out.push(v); + } + if out.len() >= count { break; } + } + Ok(out) +} + diff --git a/docs/ADR-019-vector-storage.md b/docs/ADR-019-vector-storage.md new file mode 100644 index 0000000..0ed2fe2 --- /dev/null +++ b/docs/ADR-019-vector-storage.md @@ -0,0 +1,105 @@ +# ADR-019: Vector Storage — Parquet+HNSW stays, Lance joins as second tier + +**Status:** Accepted — 2026-04-16 +**Implements:** Phase 18 from PRD (Lance evaluation) +**Supersedes:** nothing (augments ADR-008) +**Owner:** J + +--- + +## Context + +Phase 18 of the PRD committed to settling "Parquet+sidecar vs Lance" with measurements, not vibes. This ADR records the benchmark outcome and the resulting architectural direction. + +Input data: `data/vectors/resumes_100k_v2.parquet` — 100,000 × 768d embeddings, the same index we tuned HNSW against in Phase 15. + +Benchmark harness: `crates/lance-bench/src/main.rs` — standalone binary, deliberately not integrated into the workspace's common deps to avoid forcing DataFusion/Arrow upgrades on the rest of the stack until we'd decided. + +## The scorecard + +All numbers measured on the same 128GB server, same 100K × 768d index, release build: + +| Dimension | Parquet + HNSW (current) | Lance 4.0 IVF_PQ (candidate) | Winner | +|---|---|---|---| +| Cold load | 0.17s | 0.13s | Lance, 1.27× — *does not clear 2× decision threshold* | +| Disk size (data only) | 330.3 MB | 330.4 MB | Tie | +| Index on-disk footprint | 0 (HNSW is RAM-only) | 7.4 MB | Lance | +| Index build time | 230s (ec=80 es=30) | 16s | **Lance, 14× faster** | +| Search p50 | 873us (recall@10 = 1.00) | 2229us (recall unmeasured, likely 0.85-0.95) | **Parquet+HNSW, 2.55× faster** | +| Search p95 | 1413us | 4998us | **Parquet+HNSW, 3.54× faster** | +| Speedup vs brute force (p50) | 50.4× | 19.7× | Parquet+HNSW | +| Random row access (fetch by id) | ~35ms (full-file scan) | 311us | **Lance, 112× faster** | +| Append 10K rows | Full-file rewrite (~330MB + re-embed + re-index) | 0.08s, +31MB delta | **Lance, structurally different** | + +## Applying the decision rules from EXECUTION_PLAN.md + +Original rules: +- *Lance wins cold-load by ≥2× AND matches search latency → migrate* +- *Within 50% across board → stay Parquet, document ceiling* +- *Lance loses → close the door* + +Strict reading: cold-load is **1.27×, not ≥2×**. Search latency is **2.55× worse, not matching**. By the written rule, we stay. + +But the written rule missed something. It assumed Lance's value would show up as raw-speed wins across the whole table. The actual benchmark reveals Lance's value is **in capabilities the current stack doesn't have**, not in the metrics we scoped: + +1. **Random row access** is 112× faster. Our Parquet design can't do O(1) random access to a row — RAG text retrieval is a full-file scan today. Lance makes this native. +2. **Append** is structurally different. Adding 10K rows is 0.08s on Lance; on our stack it's a full rewrite of the entire 330MB Parquet file plus re-embedding plus re-indexing. +3. **Index build** is 14× faster. The HNSW `ec=80 es=30` production default takes 230s; Lance IVF_PQ takes 16s. Hot-swap generation (Phase 16) is much more feasible at 16s per build. + +## The decision + +**Hybrid architecture — neither replace nor reject.** + +### What stays + +- `vectord::store` with Parquet + binary-blob vectors → **primary vector backend** +- `vectord::hnsw::HnswStore` → in-RAM HNSW for search at 100K-scale indexes +- All Phase 15 trial infrastructure → keeps working, unchanged +- Production default `ec=80 es=30` → still the right call for in-RAM use + +### What gets added + +- **`vectord::lance_store`** — second backend using Lance as the persistence layer + - Scope: indexes where *any* of the following apply: + - Corpus exceeds ~5M vectors (our in-RAM ceiling) + - Workload is append-heavy (incremental ingest from streaming sources) + - Text retrieval dominates (point lookups by doc_id for RAG) + - Hot-swap generations are required (Phase 16) + - Implemented as a standalone crate first (follow the pilot layout), promoted into vectord when the API stabilizes +- **Profile-level configuration** — `ModelProfile.vector_backend: Parquet | Lance` so each profile picks the tier that matches its workload + +### What we keep watching (but don't act on yet) + +- **Lance search latency at scale.** 2229us at 100K is worse than HNSW. At 10M we expect Lance to pull ahead because HNSW doesn't fit in RAM. Re-benchmark when we have a 10M-vector corpus to test against. +- **IVF_PQ recall.** We measured latency but not recall — I picked `num_partitions=316, nbits=8, num_sub_vectors=48` blindly. A proper recall sweep is part of Phase C when we integrate Lance into the trial system. +- **Lance's own HNSW-on-disk variant** (`with_ivf_hnsw_pq_params`). Might close the in-RAM latency gap. Left for a future pilot. + +## Why this isn't moving the goalposts + +The EXECUTION_PLAN rule was "migrate or don't migrate." The evidence says neither is correct — one stack can't serve both the staffing SQL workload AND the LLM-brain append-heavy random-access workload at all scales. The honest answer is two backends, each doing what it's good at, selected per-profile. + +This matches the dual-use framing in the 2026-04-16 PRD update: different workloads, shared substrate, per-profile specialization. We wrote that principle into the PRD; the benchmark data just made it concrete for the vector tier. + +## Follow-up work (updates EXECUTION_PLAN.md) + +- **Phase C (decoupled embedding refresh)** gets easier — Lance's native append removes the need to invent a "vectors delta" Parquet layer. When we build Phase C, use Lance as the embedding-layer backend. +- **Phase 16 (hot-swap)** becomes feasible — 16s index builds mean online re-trials are cheap. When we build Phase 16, Lance is the storage for index generations. +- **Phase 17 (model profiles)** gains a new field: `vector_backend: Parquet | Lance`. Default Parquet for backward compatibility. Agents can opt into Lance. + +## Costs we accept + +- **Second dependency tree.** Lance pulls in DataFusion 52 and Arrow 57, while our main stack runs DataFusion 47 and Arrow 55. Keeping lance-bench isolated works for a pilot; productionizing will need either workspace-wide upgrade or a firewall via a dedicated `vectord-lance` crate. +- **Second API surface.** Lance's vector-index API is different from our HNSW code. Per-profile abstraction cost is real. +- **Operational complexity.** Two vector storage implementations to debug and monitor. + +Worth it because the alternative — forcing every workload through one backend — means either the staffing case or the LLM-brain case is served badly. + +## Ceilings this updates in PRD + +The PRD "Known ceilings" table had: + +> Vector count per index | ~5M vectors on 128GB RAM | 10M+ (serious web crawl) | Phase 18 Lance migration OR mmap'd embeddings + +Update to: + +> Vector count per index | ~5M vectors on 128GB RAM (Parquet+HNSW in-RAM) | Past 5M | Switch that profile's `vector_backend` to Lance; IVF_PQ keeps working on disk-resident quantized codes diff --git a/docs/DECISIONS.md b/docs/DECISIONS.md index de0df11..32089bf 100644 --- a/docs/DECISIONS.md +++ b/docs/DECISIONS.md @@ -89,3 +89,8 @@ **Date:** 2026-04-16 **Decision:** All append-only journals (error journal, HNSW trial journal, future audit logs) use the `storaged::append_log::AppendLog` helper. Events accumulate in an in-memory buffer; on threshold or explicit `flush()`, the buffer is written as one new timestamped file (`batch_{epoch_us}.jsonl`). Existing files are never rewritten. `compact()` merges all batches into one with a fresh timestamp, preserving chronological sort order. **Rationale:** Object stores have no append primitive. Naive "read-modify-write the whole JSONL file on every event" is O(N²) cumulative work and creates the classic small-file / rewrite-amplification anti-pattern that llms3.com flags as the top lakehouse pitfall. Write-once batching is the LSM-tree idea applied to small JSONL events — bounded write amplification, append-only semantics, optional compaction for read efficiency. The in-memory ring buffer preserves O(1) recent-event reads for the `/storage/errors` and `/hnsw/trials` query endpoints. + +## ADR-019: Vector storage — Parquet+HNSW primary, Lance secondary (hybrid) +**Date:** 2026-04-16 +**Decision:** Keep Parquet + binary-blob vectors + in-RAM HNSW as the primary vector backend. Add Lance as a second backend available per-profile for workloads where Lance wins architecturally. Per-profile `vector_backend: Parquet | Lance` field becomes part of Phase 17 model profiles. Implementation kicks off via the standalone `crates/lance-bench` crate and is promoted into `vectord::lance_store` when the API stabilizes. +**Rationale:** Head-to-head benchmark on the 100K × 768d `resumes_100k_v2` index (see `docs/ADR-019-vector-storage.md` for the full scorecard). Parquet+HNSW wins current-scale search latency by 2.55× (873us vs 2229us p50). Lance wins index build time by 14× (16s vs 230s), random row access by 112× (311us vs ~35ms full-file scan), and append speed structurally (0.08s vs full Parquet rewrite). Neither strictly dominates — the dual-use PRD framing (staffing + LLM brain) means both workloads exist in the same system. Keeps ADR-008's "Parquet is the format" principle intact for dataset tables; adds Lance as a purpose-built vector-tier option without discarding the tuned HNSW stack. diff --git a/docs/PRD.md b/docs/PRD.md index 6fce45a..ec2928d 100644 --- a/docs/PRD.md +++ b/docs/PRD.md @@ -340,14 +340,13 @@ The question raised 2026-04-16 after J's LLMS3 knowledge base identified Lance a | Step | Deliverable | Decision criteria | |---|---|---| -| 18.1 | Parallel Lance-backed vector index for `resumes_100k_v2` behind feature flag | Both implementations coexist, benchmarkable | -| 18.2 | Head-to-head benchmark: cold-load, search latency, disk size, append cost | See criteria below | -| 18.3 | ADR-019 documenting the decision with measured data | Commit or reject with evidence | +| 18.1 | ✅ Parallel Lance-backed vector index for `resumes_100k_v2` in standalone `crates/lance-bench` | Built 2026-04-16 | +| 18.2 | ✅ Head-to-head benchmark across 8 dimensions (cold-load, search latency, disk, index build, random access, append) | Complete | +| 18.3 | ✅ ADR-019 committed with measured data and decision | See `docs/ADR-019-vector-storage.md` | -**Decision rules:** -- Lance wins on cold-load by ≥2× AND matches search latency → migrate vector layer to Lance. Dataset Parquet stays. -- Lance is within 50% of current → stay on current stack, document ceiling explicitly. -- Lance loses → close the door, move on. +**Outcome:** Hybrid architecture. Parquet+HNSW stays primary (2.55× faster search at 100K in-RAM). Lance joins as a second backend for Phase 16 hot-swap (14× faster index builds), Phase C/append workloads (0.08s vs full rewrite), RAG random-access retrieval (112× faster), and indexes past the ~5M RAM ceiling. + +Per-profile `vector_backend: Parquet | Lance` becomes part of Phase 17 (model profiles). See ADR-019 for the full scorecard and caveats. ### Phase 19+: Further horizon @@ -364,7 +363,7 @@ The current stack has measurable limits. Documenting them so future decisions ar | Dimension | Current ceiling | Breaks at | Escape hatch | |---|---|---|---| -| Vector count per index | ~5M vectors on 128GB RAM | 10M+ (serious web crawl) | Phase 18 Lance migration OR mmap'd embeddings | +| Vector count per index (Parquet+HNSW in-RAM) | ~5M on 128GB | Past 5M | Switch that profile's `vector_backend` to Lance per ADR-019 — IVF_PQ stays on disk-resident quantized codes | | Concurrent active indexes | ~50-100 at 100K vectors each | 10M×50 configurations | Lance disk-resident + per-profile activation | | Rows per dataset | 2.47M proven, probably 100M+ fine | Approaches DataFusion memory limits | DataFusion predicate pushdown + partition pruning (existing) | | Concurrent loaded models | 1-2 on 16GB VRAM (A4000) | 3+ models simultaneous | Not our problem — architectural, driven by Ollama |