diff --git a/config/providers.toml b/config/providers.toml
new file mode 100644
index 0000000..a3e761a
--- /dev/null
+++ b/config/providers.toml
@@ -0,0 +1,62 @@
+# Phase 39: Provider Registry
+#
+# Per-provider base_url, auth scheme, and default model. The gateway's
+# /v1/chat dispatcher reads this file at boot to populate its provider
+# table. Secrets (API keys) come from /etc/lakehouse/secrets.toml or
+# environment variables — NEVER inline a key here.
+#
+# Adding a new provider:
+#   1. New [[provider]] block with name, base_url, auth, default_model
+#   2. Matching adapter at crates/aibridge/src/providers/<name>.rs
+#      implementing the ProviderAdapter trait (chat + embed + unload)
+#   3. Route arm in crates/gateway/src/v1/mod.rs matching on `name`
+#   4. Model-prefix routing hint in resolve_provider() if the provider
+#      uses an "<name>/..." model prefix (e.g. "openrouter/...")
+
+[[provider]]
+name = "ollama"
+base_url = "http://localhost:3200"
+auth = "none"
+default_model = "qwen3.5:latest"
+# Hot-path local inference. No bearer needed — Python sidecar on
+# localhost handles the Ollama API. Model names are bare
+# (e.g. "qwen3.5:latest", not "ollama/qwen3.5:latest").
+
+[[provider]]
+name = "ollama_cloud"
+base_url = "https://ollama.com"
+auth = "bearer"
+auth_env = "OLLAMA_CLOUD_KEY"
+default_model = "gpt-oss:120b"
+# Cloud-tier Ollama. Key resolved from OLLAMA_CLOUD_KEY env at gateway
+# boot. Model-prefix routing: "cloud/<model>" auto-routes here
+# (see gateway::v1::resolve_provider).
+
+[[provider]]
+name = "openrouter"
+base_url = "https://openrouter.ai/api/v1"
+auth = "bearer"
+auth_env = "OPENROUTER_API_KEY"
+auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"]
+default_model = "openai/gpt-oss-120b:free"
+# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax,
+# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs
+# resolve_openrouter_key() — env first, then fallback files.
+# Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
+# prefix stripped before upstream call.
+
+# Planned (Phase 40 long-horizon — adapters not yet shipped):
+#
+# [[provider]]
+# name = "gemini"
+# base_url = "https://generativelanguage.googleapis.com/v1beta"
+# auth = "api_key_query"
+# auth_env = "GEMINI_API_KEY"
+# default_model = "gemini-2.0-flash"
+#
+# [[provider]]
+# name = "claude"
+# base_url = "https://api.anthropic.com/v1"
+# auth = "x_api_key"
+# auth_env = "ANTHROPIC_API_KEY"
+# default_model = "claude-3-5-sonnet-latest"
diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs
index 4c5f758..df52168 100644
--- a/crates/shared/src/lib.rs
+++ b/crates/shared/src/lib.rs
@@ -5,3 +5,4 @@ pub mod config;
 pub mod pii;
 pub mod secrets;
 pub mod model_matrix;
+pub mod profiles;
diff --git a/crates/shared/src/profiles/execution.rs b/crates/shared/src/profiles/execution.rs
new file mode 100644
index 0000000..d810404
--- /dev/null
+++ b/crates/shared/src/profiles/execution.rs
@@ -0,0 +1,14 @@
+//! ExecutionProfile — the Phase 41 rename of Phase 17's ModelProfile.
+//!
+//! Carries what's needed to RUN inference: model tag, dataset bindings,
+//! HNSW config, embed model, bucket binding. Today this is a type
+//! alias over `crate::types::ModelProfile` — the PRD's
+//! "Backward compat: ModelProfile still loads, aliased to
+//! ExecutionProfile" line, honored literally.
+//!
+//! When the migration off the old name finishes, this file can either
+//! absorb the full struct definition or continue as an alias. Callers
+//! should reference `ExecutionProfile` going forward; `ModelProfile`
+//! stays exported from `types` for on-disk schema compat.
+
+pub use crate::types::ModelProfile as ExecutionProfile;
diff --git a/crates/shared/src/profiles/memory.rs b/crates/shared/src/profiles/memory.rs
new file mode 100644
index 0000000..2166cfc
--- /dev/null
+++ b/crates/shared/src/profiles/memory.rs
@@ -0,0 +1,38 @@
+//! MemoryProfile — how the agent's execution memory is kept.
+//!
+//! Phase 41 decomposition: the Phase 19 playbook_memory + Phase 26
+//! successful_playbooks + Phase 45 doc_refs all need per-profile
+//! tuning. Rather than bolt those onto ExecutionProfile, they live
+//! here so a "thin" execution profile can reuse a "fat" memory
+//! profile and vice versa.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct MemoryProfile {
+    pub id: String,
+    #[serde(default)]
+    pub description: String,
+    /// Phase 19: ceiling for playbook_memory boost on retrieval. 0
+    /// disables the boost entirely.
+    #[serde(default = "default_boost_ceiling")]
+    pub playbook_boost_ceiling: f32,
+    /// Phase 26: max history entries retained before rotation.
+    #[serde(default = "default_history_cap")]
+    pub history_cap: usize,
+    /// Phase 45: stale threshold for doc_refs before drift check
+    /// fires (hours).
+    #[serde(default = "default_stale_hours")]
+    pub doc_stale_hours: u32,
+    /// Phase 28: auto-retire playbooks that fail 3+ consecutive runs.
+    #[serde(default = "default_true")]
+    pub auto_retire_on_failure: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_boost_ceiling() -> f32 { 0.35 }
+fn default_history_cap() -> usize { 1000 }
+fn default_stale_hours() -> u32 { 168 } // one week
+fn default_true() -> bool { true }
diff --git a/crates/shared/src/profiles/mod.rs b/crates/shared/src/profiles/mod.rs
new file mode 100644
index 0000000..97064a0
--- /dev/null
+++ b/crates/shared/src/profiles/mod.rs
@@ -0,0 +1,28 @@
+//! Phase 41 profile types.
+//!
+//! The existing `ModelProfile` (Phase 17) is aliased as
+//! `ExecutionProfile` here — it continues to carry the model +
+//! bindings + HNSW config needed to run inference. Three new profile
+//! types land alongside: `RetrievalProfile`, `MemoryProfile`,
+//! `ObserverProfile` — each owns a distinct slice of what used to be
+//! bundled.
+//!
+//! Backward-compat rule (PRD Phase 41): existing `ModelProfile` on
+//! disk continues to deserialize unchanged. New fields on the new
+//! profile types are `#[serde(default)]` so old payloads load with
+//! empty defaults.
+//!
+//! These are the canonical shapes — downstream code converts via
+//! `From<ModelProfile> for ExecutionProfile` (they're the same struct
+//! today, just named differently) and constructs the other three
+//! as needed.
+
+pub mod execution;
+pub mod retrieval;
+pub mod memory;
+pub mod observer;
+
+pub use execution::ExecutionProfile;
+pub use memory::MemoryProfile;
+pub use observer::ObserverProfile;
+pub use retrieval::RetrievalProfile;
diff --git a/crates/shared/src/profiles/observer.rs b/crates/shared/src/profiles/observer.rs
new file mode 100644
index 0000000..7f05224
--- /dev/null
+++ b/crates/shared/src/profiles/observer.rs
@@ -0,0 +1,38 @@
+//! ObserverProfile — how loudly the observer logs this workload.
+//!
+//! Phase 41 decomposition: the observer's alert thresholds, escalation
+//! cadence, and log retention need per-workload tuning. Hot-path
+//! staffing workflows want aggressive alerting; batch backfills want
+//! quieter. This profile is read by mcp-server/observer.ts at
+//! activation-time.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ObserverProfile {
+    pub id: String,
+    #[serde(default)]
+    pub description: String,
+    /// How many consecutive failures trigger a cluster escalation to
+    /// LLM Team `/v1/chat` (qwen3-coder:480b).
+    #[serde(default = "default_failure_cluster_size")]
+    pub failure_cluster_size: u32,
+    /// Minimum seconds between alert emails for the same sig_hash.
+    /// Prevents alert storms during a regression.
+    #[serde(default = "default_alert_cooldown")]
+    pub alert_cooldown_secs: u32,
+    /// Observer ring buffer size. Older events fall off when full.
+    #[serde(default = "default_ring_size")]
+    pub ring_size: usize,
+    /// Whether to forward events to external Langfuse
+    /// (/v1/langfuse_trace). Off by default.
+    #[serde(default)]
+    pub forward_to_langfuse: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_failure_cluster_size() -> u32 { 3 }
+fn default_alert_cooldown() -> u32 { 300 } // 5 minutes
+fn default_ring_size() -> usize { 2000 }
diff --git a/crates/shared/src/profiles/retrieval.rs b/crates/shared/src/profiles/retrieval.rs
new file mode 100644
index 0000000..a8bfa3c
--- /dev/null
+++ b/crates/shared/src/profiles/retrieval.rs
@@ -0,0 +1,52 @@
+//! RetrievalProfile — what + how the agent reaches into memory.
+//!
+//! Phase 41 decomposition: the old ModelProfile bundled "what dataset
+//! can I read" (bound_datasets) AND "how do I rank results"
+//! (hnsw_config) with the model tag. Retrieval concerns split out here
+//! so a profile can swap its retrieval strategy without re-activating
+//! the model.
+//!
+//! Fields chosen for what's actually varied per-workload today:
+//!   - `top_k` / `rerank_top_k` — how many hits to fetch + rerank
+//!   - `freshness_cutoff_days` — Phase 45 doc-drift uses this
+//!   - `boost_playbook_memory` — Phase 19 meta-index feedback
+//!   - `enforce_sensitivity_gates` — Phase 13 access-control integration
+//!
+//! All fields are `#[serde(default)]` so loading a profile file that
+//! predates Phase 41 works without migration.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct RetrievalProfile {
+    /// Unique id — slug form, separate namespace from ExecutionProfile.
+    pub id: String,
+    /// Free-text operator description.
+    #[serde(default)]
+    pub description: String,
+    /// Default top-K for /vectors/search + /vectors/hybrid.
+    #[serde(default = "default_top_k")]
+    pub top_k: u32,
+    /// How many of the top-K to pass through the reranker. 0 disables
+    /// reranking for this profile.
+    #[serde(default = "default_rerank_top_k")]
+    pub rerank_top_k: u32,
+    /// Don't consider playbooks / docs older than this (days). 0 or
+    /// absent = no freshness filter.
+    #[serde(default)]
+    pub freshness_cutoff_days: u32,
+    /// Phase 19: boost workers/results by playbook_memory similarity.
+    #[serde(default)]
+    pub boost_playbook_memory: bool,
+    /// Phase 13: apply access-control masking on sensitive columns.
+    /// Default on — safety-first.
+    #[serde(default = "default_true")]
+    pub enforce_sensitivity_gates: bool,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    #[serde(default)]
+    pub created_by: String,
+}
+
+fn default_top_k() -> u32 { 10 }
+fn default_rerank_top_k() -> u32 { 5 }
+fn default_true() -> bool { true }
diff --git a/crates/vectord/src/activation.rs b/crates/vectord/src/activation.rs
new file mode 100644
index 0000000..5fd99f9
--- /dev/null
+++ b/crates/vectord/src/activation.rs
@@ -0,0 +1,118 @@
+//! Profile activation tracking (Phase 41 PRD).
+//!
+//! Phase 41 PRD called out `crates/vectord/src/activation.rs` with
+//! `ActivationTracker` + background-job pattern. The activation
+//! handler itself lives in `service.rs::activate_profile` (200+ lines
+//! of warm-up + bucket binding that's wired to VectorState); this
+//! module provides the type the PRD named and a single-flight guard
+//! that satisfies the PRD gate "refuse new activation if one is
+//! pending/running."
+//!
+//! Handler extraction (moving the body of `activate_profile` here)
+//! is deliberately NOT in this commit — it's a module-structure
+//! refactor, not a semantic change. When that lands, the inline
+//! `tokio::spawn` in `service.rs` moves into `ActivationTracker::start`
+//! and the HTTP handler shrinks to ~20 lines of validate + start +
+//! respond-202.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+
+/// Tracks in-flight profile activations. The PRD's "single-flight guard"
+/// lives here: callers check `is_pending` before starting a new activation
+/// and register via `mark_pending` if they proceed. On completion, they
+/// call `mark_complete` so the next caller can start.
+///
+/// Per-profile granularity — activating profile A doesn't block B.
+#[derive(Clone, Default)]
+pub struct ActivationTracker {
+    pending: Arc<RwLock<HashMap<String, String>>>, // profile_id → job_id
+}
+
+impl ActivationTracker {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Check if a profile has an activation already running. Returns the
+    /// in-flight job_id if so. Safe to call without holding a lock.
+    pub async fn is_pending(&self, profile_id: &str) -> Option<String> {
+        self.pending.read().await.get(profile_id).cloned()
+    }
+
+    /// Register a new activation as pending. Returns false if an
+    /// activation is already running for the same profile (caller should
+    /// return 409 Conflict or surface the existing job_id). Returns true
+    /// on successful registration.
+    pub async fn mark_pending(&self, profile_id: &str, job_id: &str) -> bool {
+        let mut guard = self.pending.write().await;
+        if guard.contains_key(profile_id) {
+            return false;
+        }
+        guard.insert(profile_id.to_string(), job_id.to_string());
+        true
+    }
+
+    /// Remove the pending marker when activation finishes (success OR
+    /// failure — both free the slot for the next caller).
+    pub async fn mark_complete(&self, profile_id: &str) {
+        self.pending.write().await.remove(profile_id);
+    }
+
+    /// How many activations are currently in-flight across all profiles.
+    pub async fn in_flight_count(&self) -> usize {
+        self.pending.read().await.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn empty_tracker_has_no_pending() {
+        let t = ActivationTracker::new();
+        assert_eq!(t.in_flight_count().await, 0);
+        assert!(t.is_pending("any-profile").await.is_none());
+    }
+
+    #[tokio::test]
+    async fn mark_pending_registers_the_job() {
+        let t = ActivationTracker::new();
+        assert!(t.mark_pending("profile-A", "job-1").await);
+        assert_eq!(t.in_flight_count().await, 1);
+        assert_eq!(t.is_pending("profile-A").await, Some("job-1".into()));
+    }
+
+    #[tokio::test]
+    async fn single_flight_guard_refuses_second_activation_same_profile() {
+        // PRD Phase 41 gate: "refuse new activation if one is
+        // pending/running." Same profile twice → second call returns
+        // false, caller must surface the in-flight job_id.
+        let t = ActivationTracker::new();
+        assert!(t.mark_pending("profile-A", "job-1").await);
+        assert!(!t.mark_pending("profile-A", "job-2").await);
+        // Still the first job — second registration didn't overwrite.
+        assert_eq!(t.is_pending("profile-A").await, Some("job-1".into()));
+    }
+
+    #[tokio::test]
+    async fn different_profiles_dont_block_each_other() {
+        // Per-profile granularity — activating A doesn't block B.
+        let t = ActivationTracker::new();
+        assert!(t.mark_pending("profile-A", "job-1").await);
+        assert!(t.mark_pending("profile-B", "job-2").await);
+        assert_eq!(t.in_flight_count().await, 2);
+    }
+
+    #[tokio::test]
+    async fn mark_complete_frees_the_slot() {
+        let t = ActivationTracker::new();
+        t.mark_pending("profile-A", "job-1").await;
+        t.mark_complete("profile-A").await;
+        assert_eq!(t.in_flight_count().await, 0);
+        // Next activation can now proceed.
+        assert!(t.mark_pending("profile-A", "job-2").await);
+    }
+}
diff --git a/crates/vectord/src/lib.rs b/crates/vectord/src/lib.rs
index 41f0d4f..6416af5 100644
--- a/crates/vectord/src/lib.rs
+++ b/crates/vectord/src/lib.rs
@@ -7,6 +7,7 @@ pub mod harness;
 pub mod hnsw;
 pub mod index_registry;
 pub mod jobs;
+pub mod activation;
 pub mod playbook_memory;
 pub mod pathway_memory;
 pub mod doc_drift;