lakehouse/crates/shared/src/config.rs

use serde::Deserialize;
use std::path::Path;

#[derive(Debug, Clone, Deserialize)]
pub struct Config {
    pub gateway: GatewayConfig,
    pub storage: StorageConfig,
    #[serde(default)]
    pub catalog: CatalogConfig,
    #[serde(default)]
    pub query: QueryConfig,
    pub sidecar: SidecarConfig,
    #[serde(default)]
    pub ai: AiConfig,
    #[serde(default)]
    pub auth: AuthConfig,
    #[serde(default)]
    pub observability: ObservabilityConfig,
    #[serde(default)]
    pub agent: AgentSettings,
}

/// Phase 16.2 — background autotune agent settings.
///
/// Duplicated from `vectord::agent::AgentConfig` because `shared` can't
/// depend on `vectord` (vectord already depends on shared). The gateway
/// copies these into the vectord config at startup.
#[derive(Debug, Clone, Deserialize)]
pub struct AgentSettings {
    #[serde(default)]
    pub enabled: bool,
    #[serde(default = "default_cycle_interval_secs")]
    pub cycle_interval_secs: u64,
    #[serde(default = "default_cooldown_secs")]
    pub cooldown_between_trials_secs: u64,
    #[serde(default = "default_min_recall")]
    pub min_recall: f32,
    #[serde(default = "default_max_trials_per_hour")]
    pub max_trials_per_hour: u32,
}

impl Default for AgentSettings {
    fn default() -> Self {
        Self {
            enabled: false,
            cycle_interval_secs: default_cycle_interval_secs(),
            cooldown_between_trials_secs: default_cooldown_secs(),
            min_recall: default_min_recall(),
            max_trials_per_hour: default_max_trials_per_hour(),
        }
    }
}

fn default_cycle_interval_secs() -> u64 { 60 }
fn default_cooldown_secs() -> u64 { 30 }
fn default_min_recall() -> f32 { 0.9 }
fn default_max_trials_per_hour() -> u32 { 30 }

#[derive(Debug, Clone, Deserialize)]
pub struct GatewayConfig {
    #[serde(default = "default_host")]
    pub host: String,
    #[serde(default = "default_gateway_port")]
    pub port: u16,
}

#[derive(Debug, Clone, Deserialize)]
pub struct StorageConfig {
    /// Legacy single-backend root. If `buckets` is empty, this is used to
    /// create an implicit `primary` bucket at this path — preserves the
    /// pre-federation config shape.
    #[serde(default = "default_storage_root")]
    pub root: String,

    /// Where profile buckets are rooted when auto-provisioned.
    #[serde(default = "default_profile_root")]
    pub profile_root: String,

    /// Name of the bucket used for read fallback when a target bucket is
    /// unreachable. If `None`, no fallback — reads fail hard.
    #[serde(default)]
    pub rescue_bucket: Option<String>,

    /// Explicitly configured buckets. Empty = backward-compat single-bucket
    /// mode driven by `root`.
    #[serde(default)]
    pub buckets: Vec<BucketConfig>,
}

#[derive(Debug, Clone, Deserialize)]
pub struct BucketConfig {
    pub name: String,
    pub backend: String,           // "local" | "s3"
    /// Local filesystem root (for backend = "local")
    pub root: Option<String>,
    /// S3 bucket name (for backend = "s3")
    pub bucket: Option<String>,
    pub region: Option<String>,
    pub endpoint: Option<String>,
    /// Handle for the secrets provider — never the literal credential.
    pub secret_ref: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Default)]
pub struct CatalogConfig {
    #[serde(default = "default_manifest_prefix")]
    pub manifest_prefix: String,
}

#[derive(Debug, Clone, Deserialize, Default)]
pub struct QueryConfig {
    pub max_rows_per_query: Option<usize>,
}

#[derive(Debug, Clone, Deserialize)]
pub struct SidecarConfig {
    #[serde(default = "default_sidecar_url")]
    pub url: String,
}

#[derive(Debug, Clone, Deserialize, Default)]
pub struct AiConfig {
    #[serde(default = "default_embed_model")]
    pub embed_model: String,
    #[serde(default = "default_gen_model")]
    pub gen_model: String,
    #[serde(default = "default_rerank_model")]
    pub rerank_model: String,
}

#[derive(Debug, Clone, Deserialize, Default)]
pub struct AuthConfig {
    #[serde(default)]
    pub enabled: bool,
    pub api_key: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Default)]
pub struct ObservabilityConfig {
    #[serde(default = "default_exporter")]
    pub exporter: String,
    #[serde(default = "default_service_name")]
    pub service_name: String,
}

// Defaults
fn default_host() -> String { "0.0.0.0".to_string() }
fn default_gateway_port() -> u16 { 3100 }
fn default_storage_root() -> String { "./data".to_string() }
fn default_profile_root() -> String { "./data/_profiles".to_string() }
fn default_manifest_prefix() -> String { "_catalog/manifests".to_string() }
fn default_sidecar_url() -> String { "http://localhost:3200".to_string() }
fn default_embed_model() -> String { "nomic-embed-text".to_string() }
fn default_gen_model() -> String { "qwen2.5".to_string() }
fn default_rerank_model() -> String { "qwen2.5".to_string() }
fn default_exporter() -> String { "stdout".to_string() }
fn default_service_name() -> String { "lakehouse".to_string() }

impl Config {
    pub fn load(path: &str) -> Result<Self, String> {
        let path = Path::new(path);
        if !path.exists() {
            return Err(format!("config file not found: {}", path.display()));
        }
        let content = std::fs::read_to_string(path)
            .map_err(|e| format!("failed to read config: {e}"))?;
        toml::from_str(&content)
            .map_err(|e| format!("failed to parse config: {e}"))
    }

    pub fn load_or_default() -> Self {
        // Try lakehouse.toml in current dir, then /etc/lakehouse/lakehouse.toml
        for path in &["lakehouse.toml", "/etc/lakehouse/lakehouse.toml"] {
            if let Ok(config) = Self::load(path) {
                tracing::info!("loaded config from {path}");
                return config;
            }
        }
        tracing::warn!("no config file found, using defaults");
        Self::default()
    }
}

impl Default for Config {
    fn default() -> Self {
        Self {
            gateway: GatewayConfig { host: default_host(), port: default_gateway_port() },
            storage: StorageConfig {
                root: default_storage_root(),
                profile_root: default_profile_root(),
                rescue_bucket: None,
                buckets: Vec::new(),
            },
            catalog: CatalogConfig::default(),
            query: QueryConfig::default(),
            sidecar: SidecarConfig { url: default_sidecar_url() },
            ai: AiConfig::default(),
            auth: AuthConfig::default(),
            observability: ObservabilityConfig::default(),
            agent: AgentSettings::default(),
        }
    }
}