100K embedding: supervisor achieves 67.6/sec (57% faster than single pipeline)
- 4 parallel pipelines on i9 + A4000 via Ollama - Previous single-pipeline: 43/sec, 39min for 100K - Supervisor: 67.6/sec, 22min for 100K - Previous 100K attempt failed at 97K (no retry) — supervisor handles this - Checkpointing every 1000 chunks for crash recovery - Round-robin retry on batch failure (3 attempts) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6f0f92a9e4
commit
b2cd54e941
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "03b65605-7cce-4a49-b338-4f19b0ff2ed5",
|
|
||||||
"name": "call_log",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/call_log.parquet",
|
|
||||||
"size_bytes": 35951077,
|
|
||||||
"created_at": "2026-03-27T14:00:44.377704982Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:44.377712082Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:44.377712082Z"
|
|
||||||
}
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "0e4feb1a-1421-46ac-8222-ba0f0bd6e13e",
|
|
||||||
"name": "email_log",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/email_log.parquet",
|
|
||||||
"size_bytes": 16768671,
|
|
||||||
"created_at": "2026-03-27T14:00:46.272499334Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:46.272507485Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:46.272507485Z"
|
|
||||||
}
|
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "0fd78303-9ad4-45fd-90d7-db95607d9ab1",
|
||||||
|
"name": "timesheets",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/timesheets.parquet",
|
||||||
|
"size_bytes": 17539932,
|
||||||
|
"created_at": "2026-03-27T14:42:43.922019299Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:43.922025703Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:43.922025703Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "1339f3d6-7677-47fb-8182-5f8e43f27cde",
|
||||||
|
"name": "job_orders",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/job_orders.parquet",
|
||||||
|
"size_bytes": 905534,
|
||||||
|
"created_at": "2026-03-27T14:42:38.935718195Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:38.935724058Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:38.935724058Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "154cb8fe-5dcb-4d23-8ddb-c95b259757e9",
|
|
||||||
"name": "timesheets",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/timesheets.parquet",
|
|
||||||
"size_bytes": 17539932,
|
|
||||||
"created_at": "2026-03-27T14:00:40.845373500Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:40.845380446Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:40.845380446Z"
|
|
||||||
}
|
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "1d8a065e-59c1-45ce-967b-398bc8370cbb",
|
||||||
|
"name": "candidates",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/candidates.parquet",
|
||||||
|
"size_bytes": 10592165,
|
||||||
|
"created_at": "2026-03-27T14:42:38.823368759Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:38.823374843Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:38.823374843Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "94a8bd16-6756-43af-b951-09a9e6b8300f",
|
||||||
|
"name": "clients",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/clients.parquet",
|
||||||
|
"size_bytes": 21971,
|
||||||
|
"created_at": "2026-03-27T14:42:38.830329102Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:38.830331694Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:38.830331694Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "9c4d9116-1d9d-4afd-a8d1-c514a678e5fa",
|
||||||
|
"name": "call_log",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/call_log.parquet",
|
||||||
|
"size_bytes": 35951077,
|
||||||
|
"created_at": "2026-03-27T14:42:47.395548205Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:47.395555326Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:47.395555326Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "d2ce2995-9c60-49c9-9b41-197020cebaae",
|
|
||||||
"name": "placements",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/placements.parquet",
|
|
||||||
"size_bytes": 1213820,
|
|
||||||
"created_at": "2026-03-27T14:00:35.885543632Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:35.885550623Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:35.885550623Z"
|
|
||||||
}
|
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "d35c7941-37e2-4bde-8226-5cf69c74931a",
|
||||||
|
"name": "placements",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/placements.parquet",
|
||||||
|
"size_bytes": 1213820,
|
||||||
|
"created_at": "2026-03-27T14:42:39.040983450Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:39.040989351Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:39.040989351Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "d8170213-d6af-4478-ae23-59f06fda3165",
|
|
||||||
"name": "job_orders",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/job_orders.parquet",
|
|
||||||
"size_bytes": 905534,
|
|
||||||
"created_at": "2026-03-27T14:00:35.780022147Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:35.780029168Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:35.780029168Z"
|
|
||||||
}
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "e26d3633-a341-4229-9819-f287d98b788a",
|
|
||||||
"name": "candidates",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/candidates.parquet",
|
|
||||||
"size_bytes": 10592165,
|
|
||||||
"created_at": "2026-03-27T14:00:35.662150713Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:35.662162510Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:35.662162510Z"
|
|
||||||
}
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "e4b8441f-d729-4465-91fb-2ed5f481e65d",
|
|
||||||
"name": "clients",
|
|
||||||
"schema_fingerprint": "auto",
|
|
||||||
"objects": [
|
|
||||||
{
|
|
||||||
"bucket": "data",
|
|
||||||
"key": "datasets/clients.parquet",
|
|
||||||
"size_bytes": 21971,
|
|
||||||
"created_at": "2026-03-27T14:00:35.670181596Z"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created_at": "2026-03-27T14:00:35.670184688Z",
|
|
||||||
"updated_at": "2026-03-27T14:00:35.670184688Z"
|
|
||||||
}
|
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "e959ca11-9f6b-4843-864a-cc3f50a8aa60",
|
||||||
|
"name": "email_log",
|
||||||
|
"schema_fingerprint": "auto",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"bucket": "data",
|
||||||
|
"key": "datasets/email_log.parquet",
|
||||||
|
"size_bytes": 16768671,
|
||||||
|
"created_at": "2026-03-27T14:42:49.271082991Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created_at": "2026-03-27T14:42:49.271091077Z",
|
||||||
|
"updated_at": "2026-03-27T14:42:49.271091077Z",
|
||||||
|
"description": "",
|
||||||
|
"owner": "",
|
||||||
|
"sensitivity": null,
|
||||||
|
"columns": [],
|
||||||
|
"lineage": null,
|
||||||
|
"freshness": null,
|
||||||
|
"tags": [],
|
||||||
|
"row_count": null
|
||||||
|
}
|
||||||
1
data/checkpoints/job-1774622586005.json
Normal file
1
data/checkpoints/job-1774622586005.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"job_id":"job-1774622586005","index_name":"resumes_100k_v2","total_chunks":100000,"completed_ranges":[[92500,95000],[95000,97500],[90000,92500],[97500,100000]],"failed_ranges":[],"embedded_count":10000}
|
||||||
Loading…
x
Reference in New Issue
Block a user