IVF_PQ recall tuned from 0.80 → 0.97 via parameter sweep
Systematic sweep of 8 IVF_PQ configs on 100K × 768d resumes. num_sub_vectors is the dominant lever: 48 → 192 pushes recall from 0.795 → 0.970. Winner: partitions=500, bits=8, subs=192. Build 61s (vs 18s baseline), acceptable for background builds. Hybrid status: HNSW recall=1.00 at <1ms, Lance IVF_PQ recall=0.97 at 60ms. Both backends production-grade. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
13660a017e
commit
390ebf0c36
134
scripts/lance_tune.py
Normal file
134
scripts/lance_tune.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Lance IVF_PQ parameter sweep — find the best recall config.
|
||||||
|
|
||||||
|
Mirrors the HNSW autotune pattern: try different configs, measure
|
||||||
|
recall against brute-force ground truth, pick the Pareto winner.
|
||||||
|
|
||||||
|
Parameters to tune:
|
||||||
|
- num_partitions: more partitions = finer-grained search = higher recall, slower build
|
||||||
|
- num_sub_vectors: more = better quantization = higher recall, more disk
|
||||||
|
- num_bits: 8 is standard; 4 is smaller but lower recall
|
||||||
|
|
||||||
|
We keep the harness (resumes_100k_smoke) constant — same 20 queries,
|
||||||
|
same ground truth computed from brute-force cosine.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json, time, sys
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
|
BASE = "http://localhost:3100"
|
||||||
|
|
||||||
|
def post(path, body=None, timeout=600):
|
||||||
|
data = json.dumps(body).encode() if body else None
|
||||||
|
req = Request(f"{BASE}{path}", data=data, headers={"Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
resp = urlopen(req, timeout=timeout)
|
||||||
|
return json.loads(resp.read())
|
||||||
|
except HTTPError as e:
|
||||||
|
return {"error": e.read().decode()[:300]}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
# Configs to sweep. Key insight from IVF_PQ theory:
|
||||||
|
# - More partitions + more probes = higher recall (sqrt(N) is a starting point)
|
||||||
|
# - More sub_vectors = finer quantization = higher recall but bigger index
|
||||||
|
# - 768 dims must be evenly divisible by num_sub_vectors
|
||||||
|
# Valid: 48 (768/48=16), 96 (768/96=8), 192 (768/192=4), 384 (768/384=2)
|
||||||
|
# Also: 64 (768/64=12), 128 (768/128=6), 256 (768/256=3)
|
||||||
|
|
||||||
|
configs = [
|
||||||
|
# (partitions, bits, sub_vectors, label)
|
||||||
|
(100, 8, 48, "fewer partitions"),
|
||||||
|
(316, 8, 48, "baseline (√N)"),
|
||||||
|
(500, 8, 48, "more partitions"),
|
||||||
|
(316, 8, 96, "more sub_vectors (96)"),
|
||||||
|
(316, 8, 192, "fine quantization (192)"),
|
||||||
|
(500, 8, 96, "more parts + more subs"),
|
||||||
|
(500, 8, 192, "max config"),
|
||||||
|
(200, 8, 128, "balanced mid"),
|
||||||
|
]
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("LANCE IVF_PQ PARAMETER SWEEP — find the recall sweet spot")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Index: resumes_100k_v2 (100K × 768d)")
|
||||||
|
print(f"Harness: resumes_100k_smoke (20 queries, brute-force ground truth)")
|
||||||
|
print(f"Configs to test: {len(configs)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for i, (parts, bits, subs, label) in enumerate(configs):
|
||||||
|
print(f"[{i+1}/{len(configs)}] {label}: partitions={parts} bits={bits} sub_vectors={subs}")
|
||||||
|
|
||||||
|
# Rebuild index with these params
|
||||||
|
t0 = time.time()
|
||||||
|
r = post(f"/vectors/lance/index/resumes_100k_v2", {
|
||||||
|
"num_partitions": parts,
|
||||||
|
"num_bits": bits,
|
||||||
|
"num_sub_vectors": subs,
|
||||||
|
})
|
||||||
|
build_ms = (time.time() - t0) * 1000
|
||||||
|
|
||||||
|
if "error" in r:
|
||||||
|
print(f" ✗ build failed: {r['error'][:80]}")
|
||||||
|
results.append({"label": label, "parts": parts, "bits": bits, "subs": subs,
|
||||||
|
"recall": 0, "p50": 0, "build_ms": build_ms, "error": r["error"][:80]})
|
||||||
|
continue
|
||||||
|
|
||||||
|
build_secs = r.get("build_time_secs", 0)
|
||||||
|
print(f" built in {build_secs:.1f}s")
|
||||||
|
|
||||||
|
# Measure recall
|
||||||
|
r = post(f"/vectors/lance/recall/resumes_100k_v2", {
|
||||||
|
"harness": "resumes_100k_smoke",
|
||||||
|
"top_k": 10,
|
||||||
|
})
|
||||||
|
|
||||||
|
if "error" in r:
|
||||||
|
print(f" ✗ recall failed: {r['error'][:80]}")
|
||||||
|
results.append({"label": label, "parts": parts, "bits": bits, "subs": subs,
|
||||||
|
"recall": 0, "p50": 0, "build_ms": build_ms, "error": r["error"][:80]})
|
||||||
|
continue
|
||||||
|
|
||||||
|
recall = r.get("mean_recall", 0)
|
||||||
|
p50 = r.get("latency_p50_us", 0)
|
||||||
|
p95 = r.get("latency_p95_us", 0)
|
||||||
|
print(f" recall@10={recall:.4f} p50={p50:.0f}us p95={p95:.0f}us build={build_secs:.1f}s")
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"label": label, "parts": parts, "bits": bits, "subs": subs,
|
||||||
|
"recall": recall, "p50": p50, "p95": p95,
|
||||||
|
"build_secs": build_secs, "build_ms": build_ms,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Pick the winner: highest recall, then lowest p50 on ties
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("RESULTS")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"\n{'Config':<30} {'Parts':>6} {'Subs':>6} {'Recall':>8} {'p50 us':>8} {'Build':>7}")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
results.sort(key=lambda r: (-r["recall"], r.get("p50", 99999)))
|
||||||
|
for r in results:
|
||||||
|
if "error" in r:
|
||||||
|
print(f"{r['label']:<30} {r['parts']:>6} {r['subs']:>6} {'FAIL':>8} {'—':>8} {'—':>7}")
|
||||||
|
else:
|
||||||
|
print(f"{r['label']:<30} {r['parts']:>6} {r['subs']:>6} {r['recall']:>8.4f} {r['p50']:>8.0f} {r['build_secs']:>6.1f}s")
|
||||||
|
|
||||||
|
winner = results[0] if results else None
|
||||||
|
if winner and "error" not in winner:
|
||||||
|
print(f"\n★ WINNER: {winner['label']}")
|
||||||
|
print(f" recall@10={winner['recall']:.4f} p50={winner['p50']:.0f}us build={winner['build_secs']:.1f}s")
|
||||||
|
print(f" Config: num_partitions={winner['parts']} num_bits={winner['bits']} num_sub_vectors={winner['subs']}")
|
||||||
|
|
||||||
|
# Rebuild with the winner config so it stays active
|
||||||
|
if winner != results[-1]: # only if the last build wasn't already the winner
|
||||||
|
print(f"\n Rebuilding index with winner config...")
|
||||||
|
post(f"/vectors/lance/index/resumes_100k_v2", {
|
||||||
|
"num_partitions": winner["parts"],
|
||||||
|
"num_bits": winner["bits"],
|
||||||
|
"num_sub_vectors": winner["subs"],
|
||||||
|
})
|
||||||
|
print(" Done — winner config is now active.")
|
||||||
Loading…
x
Reference in New Issue
Block a user