diff --git a/crates/ui/src/main.rs b/crates/ui/src/main.rs index e8e719d..dbbd979 100644 --- a/crates/ui/src/main.rs +++ b/crates/ui/src/main.rs @@ -1025,23 +1025,47 @@ fn ResultsTable(response: QueryResponse) -> Element { } } -/// Clean AI-generated SQL: strip markdown fences, leading "sql" keyword, explanations. +/// Clean AI-generated SQL: extract only the SQL query, strip everything else. fn clean_sql(raw: &str) -> String { - let mut s = raw.trim().to_string(); - // Remove markdown code fences - s = s.trim_start_matches("```sql").trim_start_matches("```").trim_end_matches("```").trim().to_string(); - // Remove leading "sql" keyword on its own line - let lines: Vec<&str> = s.lines().collect(); - if let Some(first) = lines.first() { - if first.trim().eq_ignore_ascii_case("sql") || first.trim().eq_ignore_ascii_case("sql;") { - s = lines[1..].join("\n").trim().to_string(); + let s = raw.trim(); + + // Strategy 1: If there's a ```sql...``` block, extract just that + if let Some(start) = s.find("```sql") { + let after = &s[start + 6..]; + if let Some(end) = after.find("```") { + return after[..end].trim().to_string(); } } - // If the model added explanation after the SQL, keep only up to the first semicolon line - if let Some(pos) = s.find(";\n\n") { - s = s[..pos + 1].to_string(); + if let Some(start) = s.find("```") { + let after = &s[start + 3..]; + if let Some(end) = after.find("```") { + let inner = after[..end].trim(); + // Skip leading "sql" keyword + let inner = inner.strip_prefix("sql").map(|s| s.trim_start()).unwrap_or(inner); + return inner.to_string(); + } } - s + + // Strategy 2: Find the first SELECT/WITH/INSERT/UPDATE/DELETE statement + let upper = s.to_uppercase(); + for keyword in &["SELECT", "WITH", "INSERT", "UPDATE", "DELETE"] { + if let Some(pos) = upper.find(keyword) { + let sql_part = &s[pos..]; + // Take up to the first semicolon (or end) + let end = sql_part.find(';').map(|p| p + 1).unwrap_or(sql_part.len()); + return sql_part[..end].trim().to_string(); + } + } + + // Strategy 3: Strip leading "sql" and clean up + let mut result = s.to_string(); + let lines: Vec<&str> = result.lines().collect(); + if let Some(first) = lines.first() { + if first.trim().eq_ignore_ascii_case("sql") { + result = lines[1..].join("\n").trim().to_string(); + } + } + result } fn format_cell(val: Option<&serde_json::Value>) -> String { diff --git a/data/_catalog/manifests/3b12ae24-17d4-4325-92bf-f3155982f3bf.json b/data/_catalog/manifests/3b12ae24-17d4-4325-92bf-f3155982f3bf.json new file mode 100644 index 0000000..cf0a039 --- /dev/null +++ b/data/_catalog/manifests/3b12ae24-17d4-4325-92bf-f3155982f3bf.json @@ -0,0 +1,65 @@ +{ + "id": "3b12ae24-17d4-4325-92bf-f3155982f3bf", + "name": "users", + "schema_fingerprint": "30c0e31f0963e6f4af02131bbb9ea246fbbd068b849b833565a4b28211fbc90b", + "objects": [ + { + "bucket": "data", + "key": "datasets/users.parquet", + "size_bytes": 2012, + "created_at": "2026-03-28T01:38:59.904968123Z" + } + ], + "created_at": "2026-03-28T01:38:59.904968680Z", + "updated_at": "2026-03-28T01:38:59.905221340Z", + "description": "", + "owner": "", + "sensitivity": null, + "columns": [ + { + "name": "id", + "data_type": "Int32", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "username", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "password_hash", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "role", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "created_at", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + } + ], + "lineage": { + "source_system": "postgresql", + "source_file": "127.0.0.1:5432/knowledge_base.users", + "ingest_job": "pg-import-1774661939904", + "ingest_timestamp": "2026-03-28T01:38:59.904968123Z", + "parent_datasets": [] + }, + "freshness": null, + "tags": [], + "row_count": 1 +} \ No newline at end of file diff --git a/data/_catalog/manifests/3c2579b4-f3f3-4875-95fa-58d8b49ad94c.json b/data/_catalog/manifests/3c2579b4-f3f3-4875-95fa-58d8b49ad94c.json new file mode 100644 index 0000000..af6411b --- /dev/null +++ b/data/_catalog/manifests/3c2579b4-f3f3-4875-95fa-58d8b49ad94c.json @@ -0,0 +1,93 @@ +{ + "id": "3c2579b4-f3f3-4875-95fa-58d8b49ad94c", + "name": "meta_runs", + "schema_fingerprint": "68f2c0d7a3ceb0aaa3c17c64900704519c72d213161bc9e5179c42ee53f6d0df", + "objects": [ + { + "bucket": "data", + "key": "datasets/meta_runs.parquet", + "size_bytes": 729773, + "created_at": "2026-03-28T01:38:57.380576453Z" + } + ], + "created_at": "2026-03-28T01:38:57.380577270Z", + "updated_at": "2026-03-28T01:38:57.380846224Z", + "description": "", + "owner": "", + "sensitivity": null, + "columns": [ + { + "name": "id", + "data_type": "Int32", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "pipeline_id", + "data_type": "Int32", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "iteration", + "data_type": "Int32", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "stage_results", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "final_output", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "score", + "data_type": "Float64", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "model_config", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "duration_ms", + "data_type": "Int32", + "sensitivity": null, + "description": "", + "is_pii": false + }, + { + "name": "created_at", + "data_type": "Utf8", + "sensitivity": null, + "description": "", + "is_pii": false + } + ], + "lineage": { + "source_system": "postgresql", + "source_file": "127.0.0.1:5432/knowledge_base.meta_runs", + "ingest_job": "pg-import-1774661937380", + "ingest_timestamp": "2026-03-28T01:38:57.380576453Z", + "parent_datasets": [] + }, + "freshness": null, + "tags": [], + "row_count": 38 +} \ No newline at end of file diff --git a/data/datasets/meta_runs.parquet b/data/datasets/meta_runs.parquet new file mode 100644 index 0000000..63626e7 Binary files /dev/null and b/data/datasets/meta_runs.parquet differ diff --git a/data/datasets/users.parquet b/data/datasets/users.parquet new file mode 100644 index 0000000..45f3be7 Binary files /dev/null and b/data/datasets/users.parquet differ