7 changed files with 49 additions and 432 deletions
--- a/crates/catalogd/src/registry.rs
+++ b/crates/catalogd/src/registry.rs
@ -1,7 +1,4 @@
-use shared::types::{
-    DatasetId, DatasetManifest, ObjectRef, SchemaFingerprint,
-    ColumnMeta, Lineage, FreshnessContract, Sensitivity,
-};
+use shared::types::{DatasetId, DatasetManifest, ObjectRef, SchemaFingerprint};
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
@ -9,19 +6,6 @@ use tokio::sync::RwLock;
 use storaged::ops;
 use object_store::ObjectStore;

-/// Partial metadata update — only set fields are applied.
-#[derive(Debug, Clone, Default, serde::Deserialize)]
-pub struct MetadataUpdate {
-    pub description: Option<String>,
-    pub owner: Option<String>,
-    pub sensitivity: Option<Sensitivity>,
-    pub tags: Option<Vec<String>>,
-    pub columns: Option<Vec<ColumnMeta>>,
-    pub lineage: Option<Lineage>,
-    pub freshness: Option<FreshnessContract>,
-    pub row_count: Option<u64>,
-}
-
 const MANIFEST_PREFIX: &str = "_catalog/manifests";

 /// In-memory dataset registry backed by manifest persistence in object storage.
@ -70,14 +54,6 @@ impl Registry {
            objects,
            created_at: now,
            updated_at: now,
-            description: String::new(),
-            owner: String::new(),
-            sensitivity: None,
-            columns: vec![],
-            lineage: None,
-            freshness: None,
-            tags: vec![],
-            row_count: None,
        };

        // Write-ahead: persist before in-memory update
@ -92,36 +68,6 @@ impl Registry {
        Ok(manifest)
    }

-    /// Update metadata on an existing dataset (owner, description, tags, sensitivity, etc.)
-    pub async fn update_metadata(
-        &self,
-        name: &str,
-        updates: MetadataUpdate,
-    ) -> Result<DatasetManifest, String> {
-        let mut datasets = self.datasets.write().await;
-        let manifest = datasets.values_mut()
-            .find(|d| d.name == name)
-            .ok_or_else(|| format!("dataset not found: {name}"))?;
-
-        if let Some(desc) = updates.description { manifest.description = desc; }
-        if let Some(owner) = updates.owner { manifest.owner = owner; }
-        if let Some(sens) = updates.sensitivity { manifest.sensitivity = Some(sens); }
-        if let Some(tags) = updates.tags { manifest.tags = tags; }
-        if let Some(cols) = updates.columns { manifest.columns = cols; }
-        if let Some(lineage) = updates.lineage { manifest.lineage = Some(lineage); }
-        if let Some(freshness) = updates.freshness { manifest.freshness = Some(freshness); }
-        if let Some(count) = updates.row_count { manifest.row_count = Some(count); }
-        manifest.updated_at = chrono::Utc::now();
-
-        // Persist
-        let manifest_key = format!("{MANIFEST_PREFIX}/{}.json", manifest.id);
-        let json = serde_json::to_vec_pretty(manifest).map_err(|e| e.to_string())?;
-        ops::put(&self.store, &manifest_key, json.into()).await?;
-
-        let result = manifest.clone();
-        Ok(result)
-    }
-
    /// Get a dataset by ID.
    pub async fn get(&self, id: &DatasetId) -> Option<DatasetManifest> {
        let datasets = self.datasets.read().await;
--- a/crates/catalogd/src/service.rs
+++ b/crates/catalogd/src/service.rs
@ -18,7 +18,6 @@ pub fn router(registry: Registry) -> Router {
        .route("/datasets", get(list_datasets))
        .route("/datasets/{id}", get(get_dataset))
        .route("/datasets/by-name/{name}", get(get_dataset_by_name))
-        .route("/datasets/by-name/{name}/metadata", post(update_metadata))
        .with_state(registry)
 }

@ -48,15 +47,6 @@ struct DatasetResponse {
    objects: Vec<ObjectRefResponse>,
    created_at: String,
    updated_at: String,
-    // Rich metadata
-    description: String,
-    owner: String,
-    sensitivity: Option<shared::types::Sensitivity>,
-    columns: Vec<shared::types::ColumnMeta>,
-    lineage: Option<shared::types::Lineage>,
-    freshness: Option<shared::types::FreshnessContract>,
-    tags: Vec<String>,
-    row_count: Option<u64>,
 }

 #[derive(Serialize)]
@ -81,14 +71,6 @@ impl From<&shared::types::DatasetManifest> for DatasetResponse {
            }).collect(),
            created_at: m.created_at.to_rfc3339(),
            updated_at: m.updated_at.to_rfc3339(),
-            description: m.description.clone(),
-            owner: m.owner.clone(),
-            sensitivity: m.sensitivity.clone(),
-            columns: m.columns.clone(),
-            lineage: m.lineage.clone(),
-            freshness: m.freshness.clone(),
-            tags: m.tags.clone(),
-            row_count: m.row_count,
        }
    }
 }
@ -141,14 +123,3 @@ async fn get_dataset_by_name(
        None => Err((StatusCode::NOT_FOUND, format!("dataset not found: {name}"))),
    }
 }
-
-async fn update_metadata(
-    State(registry): State<Registry>,
-    Path(name): Path<String>,
-    Json(updates): Json<crate::registry::MetadataUpdate>,
-) -> impl IntoResponse {
-    match registry.update_metadata(&name, updates).await {
-        Ok(manifest) => Ok(Json(DatasetResponse::from(&manifest))),
-        Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
-    }
-}
--- a/crates/ingestd/src/pipeline.rs
+++ b/crates/ingestd/src/pipeline.rs
@ -94,7 +94,7 @@ pub async fn ingest_file(
    ops::put(store, &storage_key, parquet_bytes).await?;
    tracing::info!("stored {} as {} ({} bytes)", filename, storage_key, parquet_size);

-    // 7. Register in catalog with rich metadata
+    // 7. Register in catalog
    let schema_fp = fingerprint_schema(&schema);
    let now = chrono::Utc::now();
    let obj_ref = ObjectRef {
@ -104,49 +104,13 @@ pub async fn ingest_file(
        created_at: now,
    };

-    // Register base dataset
+    // Use content hash as fingerprint for dedup tracking
    registry.register(
        safe_name.clone(),
        SchemaFingerprint(hash.clone()),
        vec![obj_ref],
    ).await?;

-    // Auto-populate metadata: PII detection, column info, lineage, row count
-    let col_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
-    let dataset_sensitivity = shared::pii::detect_dataset_sensitivity(&col_names);
-
-    let column_meta: Vec<shared::types::ColumnMeta> = schema.fields().iter().map(|f| {
-        let sens = shared::pii::detect_sensitivity(f.name());
-        shared::types::ColumnMeta {
-            name: f.name().clone(),
-            data_type: f.data_type().to_string(),
-            sensitivity: sens.clone(),
-            description: String::new(),
-            is_pii: matches!(sens, Some(shared::types::Sensitivity::Pii)),
-        }
-    }).collect();
-
-    let lineage = shared::types::Lineage {
-        source_system: format!("{:?}", file_type).to_lowercase(),
-        source_file: filename.to_string(),
-        ingest_job: format!("ingest-{}", now.timestamp_millis()),
-        ingest_timestamp: now,
-        parent_datasets: vec![],
-    };
-
-    let pii_count = column_meta.iter().filter(|c| c.is_pii).count();
-    if pii_count > 0 {
-        tracing::info!("auto-detected {} PII columns in '{}'", pii_count, safe_name);
-    }
-
-    let _ = registry.update_metadata(&safe_name, catalogd::registry::MetadataUpdate {
-        sensitivity: dataset_sensitivity,
-        columns: Some(column_meta),
-        lineage: Some(lineage),
-        row_count: Some(total_rows as u64),
-        ..Default::default()
-    }).await;
-
    Ok(IngestResult {
        dataset_name: safe_name,
        file_type: format!("{:?}", file_type),
--- a/crates/shared/src/lib.rs
+++ b/crates/shared/src/lib.rs
@ -2,4 +2,3 @@ pub mod types;
 pub mod errors;
 pub mod arrow_helpers;
 pub mod config;
-pub mod pii;
--- a/crates/shared/src/pii.rs
+++ b/crates/shared/src/pii.rs
@ -1,116 +0,0 @@
-/// Auto-detect PII columns by name patterns.
-/// Conservative: flags likely PII, doesn't miss obvious cases.
-
-use crate::types::Sensitivity;
-
-/// Check if a column name suggests PII content.
-pub fn detect_sensitivity(column_name: &str) -> Option<Sensitivity> {
-    let lower = column_name.to_lowercase();
-
-    // Direct PII identifiers
-    if matches!(lower.as_str(),
-        "ssn" | "social_security" | "social_security_number" |
-        "sin" | "national_id" | "passport" | "passport_number" |
-        "drivers_license" | "driver_license" | "dl_number"
-    ) {
-        return Some(Sensitivity::Pii);
-    }
-
-    // Names
-    if lower.contains("first_name") || lower.contains("last_name") ||
-       lower.contains("full_name") || lower.contains("middle_name") ||
-       lower == "name" || lower == "fname" || lower == "lname" {
-        return Some(Sensitivity::Pii);
-    }
-
-    // Contact info
-    if lower.contains("email") || lower.contains("e_mail") ||
-       lower.contains("phone") || lower.contains("mobile") || lower.contains("cell") ||
-       lower.contains("fax") || lower == "tel" || lower == "telephone" {
-        return Some(Sensitivity::Pii);
-    }
-
-    // Address
-    if lower.contains("address") || lower.contains("street") ||
-       (lower.contains("zip") && !lower.contains("unzip")) ||
-       lower == "postal_code" || lower == "postcode" {
-        return Some(Sensitivity::Pii);
-    }
-
-    // Financial
-    if lower.contains("salary") || lower.contains("wage") ||
-       lower.contains("pay_rate") || lower.contains("bill_rate") ||
-       lower.contains("compensation") || lower.contains("revenue") ||
-       lower.contains("bank_account") || lower.contains("routing_number") ||
-       lower.contains("credit_card") {
-        return Some(Sensitivity::Financial);
-    }
-
-    // Health
-    if lower.contains("diagnosis") || lower.contains("medication") ||
-       lower.contains("medical") || lower.contains("health") ||
-       lower.contains("patient_id") || lower.contains("mrn") {
-        return Some(Sensitivity::Phi);
-    }
-
-    // Date of birth
-    if lower == "dob" || lower == "date_of_birth" || lower == "birthdate" || lower == "birth_date" {
-        return Some(Sensitivity::Pii);
-    }
-
-    None
-}
-
-/// Classify all columns and return the highest sensitivity found.
-pub fn detect_dataset_sensitivity(column_names: &[&str]) -> Option<Sensitivity> {
-    let mut highest: Option<Sensitivity> = None;
-
-    for name in column_names {
-        if let Some(sens) = detect_sensitivity(name) {
-            highest = Some(match (&highest, &sens) {
-                (None, s) => s.clone(),
-                (Some(Sensitivity::Public), s) => s.clone(),
-                (Some(Sensitivity::Internal), s) if !matches!(s, Sensitivity::Public | Sensitivity::Internal) => s.clone(),
-                (Some(Sensitivity::Financial), Sensitivity::Pii | Sensitivity::Phi) => sens.clone(),
-                (Some(Sensitivity::Pii), Sensitivity::Phi) => Sensitivity::Phi,
-                (existing, _) => existing.clone().unwrap(),
-            });
-        }
-    }
-
-    highest
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn detects_email_as_pii() {
-        assert_eq!(detect_sensitivity("email"), Some(Sensitivity::Pii));
-        assert_eq!(detect_sensitivity("contact_email"), Some(Sensitivity::Pii));
-    }
-
-    #[test]
-    fn detects_salary_as_financial() {
-        assert_eq!(detect_sensitivity("salary"), Some(Sensitivity::Financial));
-        assert_eq!(detect_sensitivity("bill_rate"), Some(Sensitivity::Financial));
-    }
-
-    #[test]
-    fn detects_ssn() {
-        assert_eq!(detect_sensitivity("ssn"), Some(Sensitivity::Pii));
-    }
-
-    #[test]
-    fn non_sensitive_returns_none() {
-        assert_eq!(detect_sensitivity("status"), None);
-        assert_eq!(detect_sensitivity("created_at"), None);
-    }
-
-    #[test]
-    fn dataset_sensitivity_picks_highest() {
-        let cols = vec!["id", "name", "email", "status"];
-        assert_eq!(detect_dataset_sensitivity(&cols), Some(Sensitivity::Pii));
-    }
-}
--- a/crates/shared/src/types.rs
+++ b/crates/shared/src/types.rs
@ -30,50 +30,7 @@ pub struct ObjectRef {
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct SchemaFingerprint(pub String);

-/// Sensitivity classification for a field or dataset.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum Sensitivity {
-    Public,      // anyone can see
-    Internal,    // employees only
-    Pii,         // personally identifiable (name, email, phone, SSN)
-    Phi,         // protected health info (HIPAA)
-    Financial,   // compensation, billing, revenue
-}
-
-/// Column-level metadata.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ColumnMeta {
-    pub name: String,
-    pub data_type: String,
-    #[serde(default)]
-    pub sensitivity: Option<Sensitivity>,
-    #[serde(default)]
-    pub description: String,
-    #[serde(default)]
-    pub is_pii: bool,
-}
-
-/// Data lineage — where this dataset came from.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Lineage {
-    pub source_system: String,    // "csv_upload", "mysql_import", "api_ingest"
-    pub source_file: String,      // original filename or URI
-    pub ingest_job: String,       // job ID that created this dataset
-    pub ingest_timestamp: chrono::DateTime<chrono::Utc>,
-    #[serde(default)]
-    pub parent_datasets: Vec<String>, // derived from these datasets
-}
-
-/// Freshness contract — how often this data should be updated.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FreshnessContract {
-    pub expected_frequency: String, // "daily", "weekly", "hourly", "manual"
-    pub stale_after_hours: u32,     // alert if not updated within this many hours
-}
-
-/// Dataset manifest — the catalog's complete view of a dataset.
-/// All new fields use #[serde(default)] for backward compatibility with existing manifests.
+/// Dataset manifest — the catalog's view of a dataset.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct DatasetManifest {
    pub id: DatasetId,
@ -82,38 +39,4 @@ pub struct DatasetManifest {
    pub objects: Vec<ObjectRef>,
    pub created_at: chrono::DateTime<chrono::Utc>,
    pub updated_at: chrono::DateTime<chrono::Utc>,
-
-    // --- Rich metadata (Phase 10) ---
-
-    /// Human-readable description (auto-generated on ingest or manual)
-    #[serde(default)]
-    pub description: String,
-
-    /// Who owns this dataset
-    #[serde(default)]
-    pub owner: String,
-
-    /// Overall sensitivity classification
-    #[serde(default)]
-    pub sensitivity: Option<Sensitivity>,
-
-    /// Column-level metadata including sensitivity and descriptions
-    #[serde(default)]
-    pub columns: Vec<ColumnMeta>,
-
-    /// Where this data came from
-    #[serde(default)]
-    pub lineage: Option<Lineage>,
-
-    /// How fresh the data should be
-    #[serde(default)]
-    pub freshness: Option<FreshnessContract>,
-
-    /// Free-form tags for discovery
-    #[serde(default)]
-    pub tags: Vec<String>,
-
-    /// Row count (updated on ingest/compact)
-    #[serde(default)]
-    pub row_count: Option<u64>,
 }
--- a/docs/PHASES.md
+++ b/docs/PHASES.md
@ -1,128 +1,58 @@
 # Phase Tracker

-## Phase 0: Bootstrap ✅
- [x] Cargo workspace with all crate stubs compiling
- [x] `shared` crate: error types, ObjectRef, DatasetId
- [x] `gateway` with Axum: GET /health → 200
- [x] tracing + tracing-subscriber wired in gateway
- [x] justfile with build, test, run recipes
- [x] docs committed to git
+## Phase 0: Bootstrap
+- [x] 0.1 — Cargo workspace with all crate stubs compiling
+- [x] 0.2 — `shared` crate: error types, ObjectRef, DatasetId
+- [x] 0.3 — `gateway` with Axum: GET /health → 200
+- [x] 0.4 — tracing + tracing-subscriber wired in gateway
+- [x] 0.5 — justfile with build, test, run recipes
+- [x] 0.6 — docs committed to git

-## Phase 1: Storage + Catalog ✅
- [x] storaged: object_store backend init (LocalFileSystem)
- [x] storaged: Axum endpoints (PUT/GET/DELETE/LIST)
- [x] shared/arrow_helpers.rs: RecordBatch ↔ Parquet + schema fingerprinting
- [x] catalogd/registry.rs: in-memory index + manifest persistence
- [x] catalogd service: POST/GET /datasets + by-name
- [x] gateway routes wired
+**Gate: PASSED** — All crates compile. Gateway runs. Logs emit. Docs committed.

-## Phase 2: Query Engine ✅
- [x] queryd: SessionContext + object_store config
- [x] queryd: ListingTable from catalog ObjectRefs
- [x] queryd service: POST /query/sql → JSON
- [x] queryd → catalogd wiring
- [x] gateway routes /query
+## Phase 1: Storage + Catalog
+- [x] 1.1 — storaged: object_store backend init (LocalFileSystem)
+- [x] 1.2 — storaged: Axum endpoints (PUT/GET/DELETE/LIST /objects/{key})
+- [x] 1.3 — shared/arrow_helpers.rs: RecordBatch ↔ Parquet + schema fingerprinting
+- [x] 1.4 — catalogd/registry.rs: in-memory index + manifest persistence to object storage
+- [x] 1.5 — catalogd/schema.rs: schema fingerprinting (merged into shared/arrow_helpers.rs)
+- [x] 1.6 — catalogd service: POST/GET /datasets + GET /datasets/by-name/{name}
+- [x] 1.7 — gateway routes to storaged + catalogd with shared state

-## Phase 3: AI Integration ✅
- [x] Python sidecar: FastAPI + Ollama (embed/generate/rerank)
- [x] Dockerfile for sidecar
- [x] aibridge/client.rs: HTTP client
- [x] aibridge service: Axum proxy endpoints
- [x] Model config via env vars
+**Gate: PASSED** — PUT object → register dataset → list → get by name. All via gateway HTTP.

-## Phase 4: Frontend ✅
- [x] Dioxus scaffold, WASM build
- [x] Ask tab: natural language → AI SQL → results
- [x] Explore tab: dataset browser + AI summary
- [x] SQL tab: raw DataFusion editor
- [x] System tab: health checks for all services
+## Phase 2: Query Engine
+- [x] 2.1 — queryd: SessionContext + object_store config (custom scheme to avoid path doubling)
+- [x] 2.2 — queryd: ListingTable from catalog ObjectRefs with schema inference
+- [x] 2.3 — queryd service: POST /query/sql → JSON (columns + rows + row_count)
+- [x] 2.4 — queryd → catalogd wiring (reads dataset list, registers as tables)
+- [x] 2.5 — gateway routes /query with QueryEngine state

-## Phase 5: Hardening ✅
- [x] Proto definitions (lakehouse.proto)
- [x] Internal gRPC: CatalogService on :3101
- [x] OpenTelemetry tracing: stdout exporter
- [x] Auth middleware: X-API-Key (toggleable)
- [x] Config-driven startup: lakehouse.toml
+**Gate: PASSED** — SELECT *, WHERE/ORDER BY, COUNT/AVG all return correct results via catalog.

-## Phase 6: Ingest Pipeline ✅
- [x] CSV ingest with auto schema detection
- [x] JSON ingest (array + newline-delimited, nested flattening)
- [x] PDF text extraction (lopdf)
- [x] Text/SMS file ingest
- [x] Content hash dedup (SHA-256)
- [x] POST /ingest/file multipart upload
- [x] 12 unit tests
+## Phase 3: AI Integration
+- [x] 3.1 — Python sidecar: FastAPI + Ollama (embed/generate/rerank) — real models, no mocks
+- [x] 3.2 — Dockerfile for sidecar
+- [x] 3.3 — aibridge/client.rs: reqwest HTTP client with 120s timeout
+- [x] 3.4 — aibridge service: Axum proxy endpoints (POST /ai/embed, /ai/generate, /ai/rerank)
+- [x] 3.5 — Model config via env vars (EMBED_MODEL, GEN_MODEL, RERANK_MODEL, SIDECAR_URL)

-## Phase 7: Vector Index + RAG ✅
- [x] chunker: configurable size + overlap, sentence-boundary aware
- [x] store: embeddings as Parquet (binary f32 vectors)
- [x] search: brute-force cosine similarity
- [x] rag: embed → search → retrieve → LLM answer with citations
- [x] POST /vectors/index, /search, /rag
- [x] Background job system with progress tracking
- [x] Dual-pipeline supervisor with checkpointing + retry
- [x] 6 unit tests
+**Gate: PASSED** — Gateway → aibridge → sidecar → Ollama → real 768d embeddings + generation.

-## Phase 8: Hot Cache + Incremental Updates ✅
- [x] MemTable hot cache: LRU, configurable max (16GB)
- [x] POST /query/cache/pin, /cache/evict, GET /cache/stats
- [x] Delta store: append-only delta Parquet files
- [x] Merge-on-read: queries combine base + deltas
- [x] Compaction: POST /query/compact
- [x] Benchmarked: 9.8x speedup (1M rows: 942ms → 96ms)
+## Phase 4: Frontend
+- [x] 4.1 — Dioxus scaffold, WASM build (dx build --platform web)
+- [x] 4.2 — Dataset browser (sidebar, click to select, refresh)
+- [x] 4.3 — Query editor + results table (Ctrl+Enter to run, column types, row count)
+- [x] 4.4 — Error display + loading states
+- [x] 4.5 — Nginx proxy (lakehouse.devop.live), same-origin API detection

-## Phase 8.5: Agent Workspaces ✅
- [x] WorkspaceManager with daily/weekly/monthly/pinned tiers
- [x] Saved searches, shortlists, activity logs per workspace
- [x] Instant zero-copy handoff between agents
- [x] Persistence to object storage, rebuild on startup
+**Gate: PASSED** — Browse datasets and query from browser at lakehouse.devop.live.

-## Phase 9: Event Journal ✅
- [x] journald crate: append-only mutation log
- [x] Event schema: entity, field, old/new value, actor, source, workspace
- [x] In-memory buffer with auto-flush to Parquet
- [x] GET /journal/history/{entity_id}, /recent, /stats
- [x] POST /journal/event, /update, /flush
+## Phase 5: Hardening
+- [x] 5.1 — Proto definitions (lakehouse.proto: CatalogService, QueryService, StorageService, AiService)
+- [x] 5.2 — Internal gRPC: CatalogService on :3101, proto crate with tonic codegen
+- [x] 5.3 — OpenTelemetry tracing: stdout exporter, configurable via lakehouse.toml
+- [x] 5.4 — Auth middleware: X-API-Key header check, toggleable via config
+- [x] 5.5 — Config-driven startup: lakehouse.toml (gateway, storage, catalog, sidecar, ai, auth, observability)

-## Phase 10: Rich Catalog v2 ✅
- [x] DatasetManifest: description, owner, sensitivity, columns, lineage, freshness, tags
- [x] PII auto-detection: email, phone, SSN, salary, address, medical
- [x] Column-level metadata with sensitivity flags
- [x] Lineage tracking: source_system → ingest_job → dataset
- [x] PATCH /catalog/datasets/by-name/{name}/metadata
- [x] Backward compatible (serde default)
- [x] 25 unit tests total
-
-## Phase 11: Embedding Versioning ⬜
- [ ] Vector index metadata: model_name, model_version, dimensions
- [ ] Multi-version indexes coexist
- [ ] Incremental re-embed on model upgrade
- [ ] A/B search comparison
-
-## Phase 12: Tool Registry ⬜
- [ ] Named business actions with parameter validation
- [ ] Read vs write tool permissions
- [ ] Audit logging per tool invocation
- [ ] MCP-compatible interface
- [ ] Rate limiting per agent/tool
-
-## Phase 13: Security & Access Control ⬜
- [ ] Field-level sensitivity enforcement
- [ ] Row-level access policies
- [ ] Column masking
- [ ] Query audit log
- [ ] Policy-as-code (TOML/YAML)
-
-## Phase 14: Schema Evolution ⬜
- [ ] Schema diff detection
- [ ] AI-generated migration rules
- [ ] Migration preview before apply
- [ ] Versioned schemas in catalog
-
-## Phase 15+: Horizon ⬜
- [ ] Federated multi-bucket query
- [ ] Database connector ingest (Postgres/MySQL)
- [ ] PDF OCR (Tesseract)
- [ ] Scheduled ingest (cron)
- [ ] Fine-tuned domain models
- [ ] Multi-node query distribution
+**Gate: PASSED** — gRPC on :3101, OTel traces, auth ready, system starts from repo + lakehouse.toml.