lakehouse/crates/shared/src/profiles/retrieval.rs

//! RetrievalProfile — what + how the agent reaches into memory.
//!
//! Phase 41 decomposition: the old ModelProfile bundled "what dataset
//! can I read" (bound_datasets) AND "how do I rank results"
//! (hnsw_config) with the model tag. Retrieval concerns split out here
//! so a profile can swap its retrieval strategy without re-activating
//! the model.
//!
//! Fields chosen for what's actually varied per-workload today:
//!   - `top_k` / `rerank_top_k` — how many hits to fetch + rerank
//!   - `freshness_cutoff_days` — Phase 45 doc-drift uses this
//!   - `boost_playbook_memory` — Phase 19 meta-index feedback
//!   - `enforce_sensitivity_gates` — Phase 13 access-control integration
//!
//! All fields are `#[serde(default)]` so loading a profile file that
//! predates Phase 41 works without migration.

use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RetrievalProfile {
    /// Unique id — slug form, separate namespace from ExecutionProfile.
    pub id: String,
    /// Free-text operator description.
    #[serde(default)]
    pub description: String,
    /// Default top-K for /vectors/search + /vectors/hybrid.
    #[serde(default = "default_top_k")]
    pub top_k: u32,
    /// How many of the top-K to pass through the reranker. 0 disables
    /// reranking for this profile.
    #[serde(default = "default_rerank_top_k")]
    pub rerank_top_k: u32,
    /// Don't consider playbooks / docs older than this (days). 0 or
    /// absent = no freshness filter.
    #[serde(default)]
    pub freshness_cutoff_days: u32,
    /// Phase 19: boost workers/results by playbook_memory similarity.
    #[serde(default)]
    pub boost_playbook_memory: bool,
    /// Phase 13: apply access-control masking on sensitive columns.
    /// Default on — safety-first.
    #[serde(default = "default_true")]
    pub enforce_sensitivity_gates: bool,
    pub created_at: chrono::DateTime<chrono::Utc>,
    #[serde(default)]
    pub created_by: String,
}

fn default_top_k() -> u32 { 10 }
fn default_rerank_top_k() -> u32 { 5 }
fn default_true() -> bool { true }