10,000 staffing worker profiles from profit/ethereal repo. Flattened JSON → CSV → Parquet. Indexed on HNSW (9.5s) + Lance IVF_PQ (7.2s). SQL hybrid verified: forklift operators in IL with reliability > 0.8 returned exact matches. Vector search alone missed the state filter — confirms the hybrid SQL+vector routing need from quality eval. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
159 lines
3.4 KiB
JSON
159 lines
3.4 KiB
JSON
{
|
|
"id": "0efdef2d-52bc-4154-82d4-056bb3a1915d",
|
|
"name": "ethereal_workers",
|
|
"schema_fingerprint": "9a1286ffada5390b459b217f56263e5ebffec1520ea29bab0be2efc6d5381adc",
|
|
"objects": [
|
|
{
|
|
"bucket": "primary",
|
|
"key": "datasets/ethereal_workers.parquet",
|
|
"size_bytes": 6991716,
|
|
"created_at": "2026-04-17T03:23:29.399966984Z"
|
|
}
|
|
],
|
|
"created_at": "2026-04-17T03:23:29.399967772Z",
|
|
"updated_at": "2026-04-17T03:23:29.400227081Z",
|
|
"description": "",
|
|
"owner": "",
|
|
"sensitivity": "pii",
|
|
"columns": [
|
|
{
|
|
"name": "worker_id",
|
|
"data_type": "Int64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "name",
|
|
"data_type": "Utf8",
|
|
"sensitivity": "pii",
|
|
"description": "",
|
|
"is_pii": true
|
|
},
|
|
{
|
|
"name": "role",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "email",
|
|
"data_type": "Utf8",
|
|
"sensitivity": "pii",
|
|
"description": "",
|
|
"is_pii": true
|
|
},
|
|
{
|
|
"name": "phone",
|
|
"data_type": "Int64",
|
|
"sensitivity": "pii",
|
|
"description": "",
|
|
"is_pii": true
|
|
},
|
|
{
|
|
"name": "city",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "state",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "zip",
|
|
"data_type": "Int64",
|
|
"sensitivity": "pii",
|
|
"description": "",
|
|
"is_pii": true
|
|
},
|
|
{
|
|
"name": "skills",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "certifications",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "archetype",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "reliability",
|
|
"data_type": "Float64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "responsiveness",
|
|
"data_type": "Float64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "engagement",
|
|
"data_type": "Float64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "compliance",
|
|
"data_type": "Float64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "availability",
|
|
"data_type": "Float64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "communications",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "resume_text",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
}
|
|
],
|
|
"lineage": {
|
|
"source_system": "csv",
|
|
"source_file": "ethereal_workers.csv",
|
|
"ingest_job": "ingest-1776396209399",
|
|
"ingest_timestamp": "2026-04-17T03:23:29.399966984Z",
|
|
"parent_datasets": []
|
|
},
|
|
"freshness": null,
|
|
"tags": [],
|
|
"row_count": 10000,
|
|
"last_embedded_at": null,
|
|
"embedding_stale_since": null,
|
|
"embedding_refresh_policy": null
|
|
} |