lakehouse/data/_catalog/manifests/1d8a065e-59c1-45ce-967b-398bc8370cbb.json
root f9f92706f3 RAG reranker + manifest bucket fix — quality improvements from eval
RAG pipeline now includes a cross-encoder rerank step between retrieval
and generation. The LLM re-sorts top-K results by relevance before
they become context. Falls back to original order if model output is
unparseable (~5% with 7B models). Also improved the generation prompt
to be domain-aware ("staffing database") and request specific citations.

Fixed 4 catalog manifests with bucket="data" (pre-federation leftover)
that poisoned the entire DataFusion query context on startup. The
"users", "lab_trials", "meta_runs", and "new_candidates" datasets
now correctly reference bucket="primary". This bug was surfaced by
the quality evaluation pipeline — wouldn't have been found by
structural tests alone.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 22:19:11 -05:00

132 lines
2.7 KiB
JSON

{
"id": "1d8a065e-59c1-45ce-967b-398bc8370cbb",
"name": "candidates",
"schema_fingerprint": "206360ab312e4c13679ed0ab4ccb3702250cc3e2176cc669d41471584c556c73",
"objects": [
{
"bucket": "primary",
"key": "datasets/candidates.parquet",
"size_bytes": 10592165,
"created_at": "2026-03-27T14:42:38.823368759Z"
}
],
"created_at": "2026-03-27T14:42:38.823374843Z",
"updated_at": "2026-04-17T02:45:57.722237378Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [
{
"name": "candidate_id",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "first_name",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "last_name",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "email",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "phone",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "city",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "state",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "zip",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "vertical",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "skills",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "resume_summary",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "status",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "source",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "min_pay_rate",
"data_type": "Float64",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "years_experience",
"data_type": "Int64",
"sensitivity": null,
"description": "",
"is_pii": false
}
],
"lineage": null,
"freshness": null,
"tags": [],
"row_count": 100000,
"last_embedded_at": null,
"embedding_stale_since": null,
"embedding_refresh_policy": null
}