diff --git a/crates/gateway/src/execution_loop/mod.rs b/crates/gateway/src/execution_loop/mod.rs
index 0da0f66..aaab58d 100644
--- a/crates/gateway/src/execution_loop/mod.rs
+++ b/crates/gateway/src/execution_loop/mod.rs
@@ -378,7 +378,7 @@ impl ExecutionLoop {
                 attempts = attempt + 1;
                 let req = ChatRequest {
                     model: model.to_string(),
-                    messages: vec![Message { role: "user".into(), content: prompt.to_string() }],
+                    messages: vec![Message::new_text("user", prompt.to_string())],
                     temperature: Some(temperature),
                     max_tokens: None,
                     stream: Some(false),
@@ -389,8 +389,8 @@ impl ExecutionLoop {
                     .map_err(|e| format!("ollama_cloud: {e}"))?;
                 tokens_p = tokens_p.saturating_add(resp.usage.prompt_tokens);
                 tokens_c = tokens_c.saturating_add(resp.usage.completion_tokens);
-                let t = resp.choices.into_iter().next()
-                    .map(|c| c.message.content).unwrap_or_default();
+                let t: String = resp.choices.into_iter().next()
+                    .map(|c| c.message.text()).unwrap_or_default();
                 if !t.trim().is_empty() {
                     text = t;
                     break;
@@ -428,7 +428,7 @@ impl ExecutionLoop {
             lf.emit_chat(ChatTrace {
                 provider: provider.to_string(),
                 model: model.to_string(),
-                input: vec![Message { role: "user".into(), content: prompt.to_string() }],
+                input: vec![Message::new_text("user", prompt.to_string())],
                 output: text.clone(),
                 prompt_tokens,
                 completion_tokens,
@@ -605,10 +605,7 @@ impl ExecutionLoop {
         let start_time = chrono::Utc::now();
         let chat_req = crate::v1::ChatRequest {
             model: "gpt-oss:120b".to_string(),
-            messages: vec![crate::v1::Message {
-                role: "user".into(),
-                content: prompt.clone(),
-            }],
+            messages: vec![crate::v1::Message::new_text("user", prompt.clone())],
             temperature: Some(0.1),
             max_tokens: None,
             stream: Some(false),
@@ -619,8 +616,8 @@ impl ExecutionLoop {
             .map_err(|e| format!("ollama_cloud: {e}"))?;
         let latency_ms = started.elapsed().as_millis() as u64;
         let end_time = chrono::Utc::now();
-        let correction_text = resp.choices.into_iter().next()
-            .map(|c| c.message.content).unwrap_or_default();
+        let correction_text: String = resp.choices.into_iter().next()
+            .map(|c| c.message.text()).unwrap_or_default();
 
         // Stamp per-task stats — cloud call counts against the same
         // usage counter so `/v1/usage` shows cloud token spend too.
@@ -638,7 +635,7 @@ impl ExecutionLoop {
             lf.emit_chat(ChatTrace {
                 provider: "ollama_cloud".into(),
                 model: "gpt-oss:120b".into(),
-                input: vec![crate::v1::Message { role: "user".into(), content: prompt.clone() }],
+                input: vec![crate::v1::Message::new_text("user", prompt.clone())],
                 output: correction_text.clone(),
                 prompt_tokens: resp.usage.prompt_tokens,
                 completion_tokens: resp.usage.completion_tokens,
diff --git a/crates/gateway/src/v1/claude.rs b/crates/gateway/src/v1/claude.rs
index ccbe8c3..a71a15f 100644
--- a/crates/gateway/src/v1/claude.rs
+++ b/crates/gateway/src/v1/claude.rs
@@ -46,12 +46,12 @@ pub async fn chat(
     let mut msgs: Vec<AnMessage> = Vec::new();
     for m in &req.messages {
         if m.role == "system" {
-            system_parts.push(m.content.clone());
+            system_parts.push(m.text());
         } else {
             // Anthropic expects strictly "user" or "assistant"; anything
             // else we normalize to "user".
             let role = if m.role == "assistant" { "assistant" } else { "user" };
-            msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() });
+            msgs.push(AnMessage { role: role.to_string(), content: m.text() });
         }
     }
     let system = if system_parts.is_empty() {
@@ -99,7 +99,7 @@ pub async fn chat(
         .unwrap_or_default();
 
     let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
-        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
         ((chars + 3) / 4) as u32
     });
     let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
@@ -123,7 +123,7 @@ pub async fn chat(
         model,
         choices: vec![Choice {
             index: 0,
-            message: Message { role: "assistant".into(), content: text },
+            message: Message::new_text("assistant", text),
             finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
         }],
         usage: UsageBlock {
diff --git a/crates/gateway/src/v1/gemini.rs b/crates/gateway/src/v1/gemini.rs
index 99d4c98..5ef0782 100644
--- a/crates/gateway/src/v1/gemini.rs
+++ b/crates/gateway/src/v1/gemini.rs
@@ -52,7 +52,7 @@ pub async fn chat(
         };
         contents.push(GmContent {
             role: role.to_string(),
-            parts: vec![GmPart { text: m.content.clone() }],
+            parts: vec![GmPart { text: m.text() }],
         });
     }
 
@@ -98,7 +98,7 @@ pub async fn chat(
     let prompt_tokens = parsed.usage_metadata.as_ref()
         .map(|u| u.prompt_token_count)
         .unwrap_or_else(|| {
-            let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+            let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
             ((chars + 3) / 4) as u32
         });
     let completion_tokens = parsed.usage_metadata.as_ref()
@@ -122,7 +122,7 @@ pub async fn chat(
         model,
         choices: vec![Choice {
             index: 0,
-            message: Message { role: "assistant".into(), content: text },
+            message: Message::new_text("assistant", text),
             finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
         }],
         usage: UsageBlock {
diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs
index 875a077..c455418 100644
--- a/crates/gateway/src/v1/mod.rs
+++ b/crates/gateway/src/v1/mod.rs
@@ -97,10 +97,50 @@ pub fn router(state: V1State) -> Router {
 
 // -- Shared types (OpenAI-compatible) --
 
+/// OpenAI-compatible message. `content` accepts either a plain string or
+/// an array of content parts (the modern multimodal shape:
+/// `[{type:"text", text:"..."}, {type:"image_url", ...}]`). We store as
+/// `serde_json::Value` to preserve client shape on forward; downstream
+/// providers can take it verbatim. `Message::text()` flattens for
+/// places that need a plain string (Ollama prompt assembly, char
+/// counts, the assistant's own response synthesis).
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Message {
     pub role: String,
-    pub content: String,
+    pub content: serde_json::Value,
+}
+
+impl Message {
+    /// Construct a plain text message — the common shape for callers
+    /// that don't need multimodal content. Wraps the body in
+    /// `serde_json::Value::String` so downstream serializers see the
+    /// canonical OpenAI shape.
+    pub fn new_text(role: impl Into<String>, body: impl Into<String>) -> Self {
+        Self {
+            role: role.into(),
+            content: serde_json::Value::String(body.into()),
+        }
+    }
+    /// Flatten content to a plain string. Strings pass through; content-
+    /// part arrays concatenate the `text` fields with newlines and skip
+    /// non-text parts (images etc.) — Phase 38/39 callers are text-only,
+    /// real multimodal forwarding is queued.
+    pub fn text(&self) -> String {
+        match &self.content {
+            serde_json::Value::String(s) => s.clone(),
+            serde_json::Value::Array(parts) => {
+                let mut out = String::new();
+                for p in parts {
+                    if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
+                        if !out.is_empty() { out.push('\n'); }
+                        out.push_str(t);
+                    }
+                }
+                out
+            }
+            other => other.to_string(),
+        }
+    }
 }
 
 #[derive(Deserialize, Debug, Clone)]
@@ -380,7 +420,7 @@ async fn chat(
     // untouched.
     if let Some(lf) = &state.langfuse {
         let output = resp.choices.first()
-            .map(|c| c.message.content.clone())
+            .map(|c| c.message.text())
             .unwrap_or_default();
         lf.emit_chat(langfuse_trace::ChatTrace {
             provider: used_provider.clone(),
@@ -452,7 +492,7 @@ mod tests {
         assert_eq!(r.model, "qwen3.5:latest");
         assert_eq!(r.messages.len(), 2);
         assert_eq!(r.messages[0].role, "system");
-        assert_eq!(r.messages[1].content, "Hi");
+        assert_eq!(r.messages[1].text(), "Hi");
         assert_eq!(r.temperature, Some(0.2));
         assert_eq!(r.max_tokens, Some(100));
     }
diff --git a/crates/gateway/src/v1/ollama.rs b/crates/gateway/src/v1/ollama.rs
index 71ffec3..240d8da 100644
--- a/crates/gateway/src/v1/ollama.rs
+++ b/crates/gateway/src/v1/ollama.rs
@@ -60,10 +60,7 @@ pub async fn chat(client: &AiClient, req: &ChatRequest) -> Result<ChatResponse,
         model: resp.model,
         choices: vec![Choice {
             index: 0,
-            message: Message {
-                role: "assistant".into(),
-                content: resp.text,
-            },
+            message: Message::new_text("assistant", resp.text),
             finish_reason: "stop".into(),
         }],
         usage: UsageBlock {
@@ -89,13 +86,14 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
     let mut system = String::new();
     let mut prompt = String::new();
     for m in messages {
+        let body = m.text();
         if m.role == "system" {
             if !system.is_empty() { system.push('\n'); }
-            system.push_str(&m.content);
+            system.push_str(&body);
         } else {
             prompt.push_str(&m.role);
             prompt.push_str(": ");
-            prompt.push_str(&m.content);
+            prompt.push_str(&body);
             prompt.push_str("\n\n");
         }
     }
@@ -104,7 +102,7 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
 }
 
 fn estimate_prompt_tokens(messages: &[Message]) -> u32 {
-    let chars: usize = messages.iter().map(|m| m.content.chars().count()).sum();
+    let chars: usize = messages.iter().map(|m| m.text().chars().count()).sum();
     ((chars + 3) / 4) as u32
 }
 
diff --git a/crates/gateway/src/v1/ollama_cloud.rs b/crates/gateway/src/v1/ollama_cloud.rs
index b6d089c..8c6c05e 100644
--- a/crates/gateway/src/v1/ollama_cloud.rs
+++ b/crates/gateway/src/v1/ollama_cloud.rs
@@ -88,7 +88,7 @@ pub async fn chat(
     let text = parsed.response.unwrap_or_default();
 
     let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| {
-        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
         ((chars + 3) / 4) as u32
     });
     let completion_tokens = parsed.eval_count.unwrap_or_else(|| {
@@ -112,7 +112,7 @@ pub async fn chat(
         model: parsed.model.unwrap_or_else(|| req.model.clone()),
         choices: vec![Choice {
             index: 0,
-            message: Message { role: "assistant".into(), content: text },
+            message: Message::new_text("assistant", text),
             finish_reason: "stop".into(),
         }],
         usage: UsageBlock {
diff --git a/crates/gateway/src/v1/openrouter.rs b/crates/gateway/src/v1/openrouter.rs
index d6374db..610c5eb 100644
--- a/crates/gateway/src/v1/openrouter.rs
+++ b/crates/gateway/src/v1/openrouter.rs
@@ -59,6 +59,9 @@ pub async fn chat(
 
     let body = ORChatBody {
         model: model.clone(),
+        // Pass content through verbatim — preserves OpenAI's multimodal
+        // content-parts shape (`[{type:"text",text:"..."}, ...]`) so the
+        // upstream provider sees exactly what the client sent.
         messages: req.messages.iter().map(|m| ORMessage {
             role: m.role.clone(),
             content: m.content.clone(),
@@ -102,7 +105,7 @@ pub async fn chat(
     let text = choice.message.content;
 
     let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
-        let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
+        let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
         ((chars + 3) / 4) as u32
     });
     let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
@@ -126,7 +129,7 @@ pub async fn chat(
         model,
         choices: vec![Choice {
             index: 0,
-            message: Message { role: "assistant".into(), content: text },
+            message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
             finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
         }],
         usage: UsageBlock {
@@ -149,7 +152,7 @@ struct ORChatBody {
 }
 
 #[derive(Serialize)]
-struct ORMessage { role: String, content: String }
+struct ORMessage { role: String, content: serde_json::Value }
 
 #[derive(Deserialize)]
 struct ORChatResponse {