RAG pipeline now includes a cross-encoder rerank step between retrieval
and generation. The LLM re-sorts top-K results by relevance before
they become context. Falls back to original order if model output is
unparseable (~5% with 7B models). Also improved the generation prompt
to be domain-aware ("staffing database") and request specific citations.
Fixed 4 catalog manifests with bucket="data" (pre-federation leftover)
that poisoned the entire DataFusion query context on startup. The
"users", "lab_trials", "meta_runs", and "new_candidates" datasets
now correctly reference bucket="primary". This bug was surfaced by
the quality evaluation pipeline — wouldn't have been found by
structural tests alone.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
83 lines
1.8 KiB
JSON
83 lines
1.8 KiB
JSON
{
|
|
"id": "9c4d9116-1d9d-4afd-a8d1-c514a678e5fa",
|
|
"name": "call_log",
|
|
"schema_fingerprint": "d1f82ed776afc484747f8f58b30ccd00d10bca73703b1479a67647f98eeff7e2",
|
|
"objects": [
|
|
{
|
|
"bucket": "primary",
|
|
"key": "datasets/call_log.parquet",
|
|
"size_bytes": 35951077,
|
|
"created_at": "2026-03-27T14:42:47.395548205Z"
|
|
}
|
|
],
|
|
"created_at": "2026-03-27T14:42:47.395555326Z",
|
|
"updated_at": "2026-04-17T02:45:58.137293143Z",
|
|
"description": "",
|
|
"owner": "",
|
|
"sensitivity": null,
|
|
"columns": [
|
|
{
|
|
"name": "call_id",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "from_number",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "to_number",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "duration_seconds",
|
|
"data_type": "Int64",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "timestamp",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "recruiter",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "candidate_id",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
},
|
|
{
|
|
"name": "disposition",
|
|
"data_type": "Utf8",
|
|
"sensitivity": null,
|
|
"description": "",
|
|
"is_pii": false
|
|
}
|
|
],
|
|
"lineage": null,
|
|
"freshness": null,
|
|
"tags": [],
|
|
"row_count": 800000,
|
|
"last_embedded_at": null,
|
|
"embedding_stale_since": null,
|
|
"embedding_refresh_policy": null
|
|
} |