lakehouse/data/_catalog/manifests/3c2579b4-f3f3-4875-95fa-58d8b49ad94c.json
root 0bd753294b Robust SQL extraction: handles explanations, markdown, prefixes
clean_sql now uses 3 strategies in priority order:
1. Extract from ```sql...``` markdown blocks
2. Find first SELECT/WITH/INSERT statement in text
3. Strip leading "sql" keyword fallback

Tested against 5 real model output patterns:
- Clean SQL ✓
- "sql" prefixed ✓
- Markdown fenced ✓
- Explanation before ```sql block ✓
- Explanation with SELECT buried in text ✓

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 20:42:11 -05:00

93 lines
2.1 KiB
JSON

{
"id": "3c2579b4-f3f3-4875-95fa-58d8b49ad94c",
"name": "meta_runs",
"schema_fingerprint": "68f2c0d7a3ceb0aaa3c17c64900704519c72d213161bc9e5179c42ee53f6d0df",
"objects": [
{
"bucket": "data",
"key": "datasets/meta_runs.parquet",
"size_bytes": 729773,
"created_at": "2026-03-28T01:38:57.380576453Z"
}
],
"created_at": "2026-03-28T01:38:57.380577270Z",
"updated_at": "2026-03-28T01:38:57.380846224Z",
"description": "",
"owner": "",
"sensitivity": null,
"columns": [
{
"name": "id",
"data_type": "Int32",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "pipeline_id",
"data_type": "Int32",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "iteration",
"data_type": "Int32",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "stage_results",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "final_output",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "score",
"data_type": "Float64",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "model_config",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "duration_ms",
"data_type": "Int32",
"sensitivity": null,
"description": "",
"is_pii": false
},
{
"name": "created_at",
"data_type": "Utf8",
"sensitivity": null,
"description": "",
"is_pii": false
}
],
"lineage": {
"source_system": "postgresql",
"source_file": "127.0.0.1:5432/knowledge_base.meta_runs",
"ingest_job": "pg-import-1774661937380",
"ingest_timestamp": "2026-03-28T01:38:57.380576453Z",
"parent_datasets": []
},
"freshness": null,
"tags": [],
"row_count": 38
}