v1: accept OpenAI multimodal content shape (array-of-parts)
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Modern OpenAI clients (pi-ai, openai SDK 6.x, langchain-js, the official
agents) send `messages[].content` as an array of content parts:
`[{type:"text", text:"..."}, {type:"image_url", ...}]`. Our gateway
typed `content` as plain `String` and 422'd those calls.
Fix: `Message.content` is now `serde_json::Value` so requests
deserialize regardless of shape. `Message::text()` flattens
content-parts arrays (concat'd `text` fields, non-text parts skipped)
for places that need a plain string — Ollama prompt assembly, char
counts, the assistant's own response synthesis. `Message::new_text()`
constructs string-content messages without writing the wrapper at
each call site. Forwarders (openrouter) clone content through
verbatim so providers see exactly what the client sent.
Verified end-to-end: Pi CLI (`pi --print --provider openrouter`)
landed a clean 1902-token request through `/v1/chat/completions`,
routed to OpenRouter as `openai/gpt-oss-120b:free`, response in
1.62s, Langfuse trace `v1.chat:openrouter` recorded with provider
tag. Same path that any tool using the official openai SDK takes.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3a0b37ed93
commit
540a9a27ee
@ -378,7 +378,7 @@ impl ExecutionLoop {
|
|||||||
attempts = attempt + 1;
|
attempts = attempt + 1;
|
||||||
let req = ChatRequest {
|
let req = ChatRequest {
|
||||||
model: model.to_string(),
|
model: model.to_string(),
|
||||||
messages: vec![Message { role: "user".into(), content: prompt.to_string() }],
|
messages: vec![Message::new_text("user", prompt.to_string())],
|
||||||
temperature: Some(temperature),
|
temperature: Some(temperature),
|
||||||
max_tokens: None,
|
max_tokens: None,
|
||||||
stream: Some(false),
|
stream: Some(false),
|
||||||
@ -389,8 +389,8 @@ impl ExecutionLoop {
|
|||||||
.map_err(|e| format!("ollama_cloud: {e}"))?;
|
.map_err(|e| format!("ollama_cloud: {e}"))?;
|
||||||
tokens_p = tokens_p.saturating_add(resp.usage.prompt_tokens);
|
tokens_p = tokens_p.saturating_add(resp.usage.prompt_tokens);
|
||||||
tokens_c = tokens_c.saturating_add(resp.usage.completion_tokens);
|
tokens_c = tokens_c.saturating_add(resp.usage.completion_tokens);
|
||||||
let t = resp.choices.into_iter().next()
|
let t: String = resp.choices.into_iter().next()
|
||||||
.map(|c| c.message.content).unwrap_or_default();
|
.map(|c| c.message.text()).unwrap_or_default();
|
||||||
if !t.trim().is_empty() {
|
if !t.trim().is_empty() {
|
||||||
text = t;
|
text = t;
|
||||||
break;
|
break;
|
||||||
@ -428,7 +428,7 @@ impl ExecutionLoop {
|
|||||||
lf.emit_chat(ChatTrace {
|
lf.emit_chat(ChatTrace {
|
||||||
provider: provider.to_string(),
|
provider: provider.to_string(),
|
||||||
model: model.to_string(),
|
model: model.to_string(),
|
||||||
input: vec![Message { role: "user".into(), content: prompt.to_string() }],
|
input: vec![Message::new_text("user", prompt.to_string())],
|
||||||
output: text.clone(),
|
output: text.clone(),
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
@ -605,10 +605,7 @@ impl ExecutionLoop {
|
|||||||
let start_time = chrono::Utc::now();
|
let start_time = chrono::Utc::now();
|
||||||
let chat_req = crate::v1::ChatRequest {
|
let chat_req = crate::v1::ChatRequest {
|
||||||
model: "gpt-oss:120b".to_string(),
|
model: "gpt-oss:120b".to_string(),
|
||||||
messages: vec![crate::v1::Message {
|
messages: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
||||||
role: "user".into(),
|
|
||||||
content: prompt.clone(),
|
|
||||||
}],
|
|
||||||
temperature: Some(0.1),
|
temperature: Some(0.1),
|
||||||
max_tokens: None,
|
max_tokens: None,
|
||||||
stream: Some(false),
|
stream: Some(false),
|
||||||
@ -619,8 +616,8 @@ impl ExecutionLoop {
|
|||||||
.map_err(|e| format!("ollama_cloud: {e}"))?;
|
.map_err(|e| format!("ollama_cloud: {e}"))?;
|
||||||
let latency_ms = started.elapsed().as_millis() as u64;
|
let latency_ms = started.elapsed().as_millis() as u64;
|
||||||
let end_time = chrono::Utc::now();
|
let end_time = chrono::Utc::now();
|
||||||
let correction_text = resp.choices.into_iter().next()
|
let correction_text: String = resp.choices.into_iter().next()
|
||||||
.map(|c| c.message.content).unwrap_or_default();
|
.map(|c| c.message.text()).unwrap_or_default();
|
||||||
|
|
||||||
// Stamp per-task stats — cloud call counts against the same
|
// Stamp per-task stats — cloud call counts against the same
|
||||||
// usage counter so `/v1/usage` shows cloud token spend too.
|
// usage counter so `/v1/usage` shows cloud token spend too.
|
||||||
@ -638,7 +635,7 @@ impl ExecutionLoop {
|
|||||||
lf.emit_chat(ChatTrace {
|
lf.emit_chat(ChatTrace {
|
||||||
provider: "ollama_cloud".into(),
|
provider: "ollama_cloud".into(),
|
||||||
model: "gpt-oss:120b".into(),
|
model: "gpt-oss:120b".into(),
|
||||||
input: vec![crate::v1::Message { role: "user".into(), content: prompt.clone() }],
|
input: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
||||||
output: correction_text.clone(),
|
output: correction_text.clone(),
|
||||||
prompt_tokens: resp.usage.prompt_tokens,
|
prompt_tokens: resp.usage.prompt_tokens,
|
||||||
completion_tokens: resp.usage.completion_tokens,
|
completion_tokens: resp.usage.completion_tokens,
|
||||||
|
|||||||
@ -46,12 +46,12 @@ pub async fn chat(
|
|||||||
let mut msgs: Vec<AnMessage> = Vec::new();
|
let mut msgs: Vec<AnMessage> = Vec::new();
|
||||||
for m in &req.messages {
|
for m in &req.messages {
|
||||||
if m.role == "system" {
|
if m.role == "system" {
|
||||||
system_parts.push(m.content.clone());
|
system_parts.push(m.text());
|
||||||
} else {
|
} else {
|
||||||
// Anthropic expects strictly "user" or "assistant"; anything
|
// Anthropic expects strictly "user" or "assistant"; anything
|
||||||
// else we normalize to "user".
|
// else we normalize to "user".
|
||||||
let role = if m.role == "assistant" { "assistant" } else { "user" };
|
let role = if m.role == "assistant" { "assistant" } else { "user" };
|
||||||
msgs.push(AnMessage { role: role.to_string(), content: m.content.clone() });
|
msgs.push(AnMessage { role: role.to_string(), content: m.text() });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let system = if system_parts.is_empty() {
|
let system = if system_parts.is_empty() {
|
||||||
@ -99,7 +99,7 @@ pub async fn chat(
|
|||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
|
let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
|
||||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||||
((chars + 3) / 4) as u32
|
((chars + 3) / 4) as u32
|
||||||
});
|
});
|
||||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
|
let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
|
||||||
@ -123,7 +123,7 @@ pub async fn chat(
|
|||||||
model,
|
model,
|
||||||
choices: vec![Choice {
|
choices: vec![Choice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: Message { role: "assistant".into(), content: text },
|
message: Message::new_text("assistant", text),
|
||||||
finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
|
finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
|
||||||
}],
|
}],
|
||||||
usage: UsageBlock {
|
usage: UsageBlock {
|
||||||
|
|||||||
@ -52,7 +52,7 @@ pub async fn chat(
|
|||||||
};
|
};
|
||||||
contents.push(GmContent {
|
contents.push(GmContent {
|
||||||
role: role.to_string(),
|
role: role.to_string(),
|
||||||
parts: vec![GmPart { text: m.content.clone() }],
|
parts: vec![GmPart { text: m.text() }],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ pub async fn chat(
|
|||||||
let prompt_tokens = parsed.usage_metadata.as_ref()
|
let prompt_tokens = parsed.usage_metadata.as_ref()
|
||||||
.map(|u| u.prompt_token_count)
|
.map(|u| u.prompt_token_count)
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||||
((chars + 3) / 4) as u32
|
((chars + 3) / 4) as u32
|
||||||
});
|
});
|
||||||
let completion_tokens = parsed.usage_metadata.as_ref()
|
let completion_tokens = parsed.usage_metadata.as_ref()
|
||||||
@ -122,7 +122,7 @@ pub async fn chat(
|
|||||||
model,
|
model,
|
||||||
choices: vec![Choice {
|
choices: vec![Choice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: Message { role: "assistant".into(), content: text },
|
message: Message::new_text("assistant", text),
|
||||||
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
|
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||||
}],
|
}],
|
||||||
usage: UsageBlock {
|
usage: UsageBlock {
|
||||||
|
|||||||
@ -97,10 +97,50 @@ pub fn router(state: V1State) -> Router {
|
|||||||
|
|
||||||
// -- Shared types (OpenAI-compatible) --
|
// -- Shared types (OpenAI-compatible) --
|
||||||
|
|
||||||
|
/// OpenAI-compatible message. `content` accepts either a plain string or
|
||||||
|
/// an array of content parts (the modern multimodal shape:
|
||||||
|
/// `[{type:"text", text:"..."}, {type:"image_url", ...}]`). We store as
|
||||||
|
/// `serde_json::Value` to preserve client shape on forward; downstream
|
||||||
|
/// providers can take it verbatim. `Message::text()` flattens for
|
||||||
|
/// places that need a plain string (Ollama prompt assembly, char
|
||||||
|
/// counts, the assistant's own response synthesis).
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
pub struct Message {
|
pub struct Message {
|
||||||
pub role: String,
|
pub role: String,
|
||||||
pub content: String,
|
pub content: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Message {
|
||||||
|
/// Construct a plain text message — the common shape for callers
|
||||||
|
/// that don't need multimodal content. Wraps the body in
|
||||||
|
/// `serde_json::Value::String` so downstream serializers see the
|
||||||
|
/// canonical OpenAI shape.
|
||||||
|
pub fn new_text(role: impl Into<String>, body: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
role: role.into(),
|
||||||
|
content: serde_json::Value::String(body.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Flatten content to a plain string. Strings pass through; content-
|
||||||
|
/// part arrays concatenate the `text` fields with newlines and skip
|
||||||
|
/// non-text parts (images etc.) — Phase 38/39 callers are text-only,
|
||||||
|
/// real multimodal forwarding is queued.
|
||||||
|
pub fn text(&self) -> String {
|
||||||
|
match &self.content {
|
||||||
|
serde_json::Value::String(s) => s.clone(),
|
||||||
|
serde_json::Value::Array(parts) => {
|
||||||
|
let mut out = String::new();
|
||||||
|
for p in parts {
|
||||||
|
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
||||||
|
if !out.is_empty() { out.push('\n'); }
|
||||||
|
out.push_str(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
other => other.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone)]
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
@ -380,7 +420,7 @@ async fn chat(
|
|||||||
// untouched.
|
// untouched.
|
||||||
if let Some(lf) = &state.langfuse {
|
if let Some(lf) = &state.langfuse {
|
||||||
let output = resp.choices.first()
|
let output = resp.choices.first()
|
||||||
.map(|c| c.message.content.clone())
|
.map(|c| c.message.text())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
lf.emit_chat(langfuse_trace::ChatTrace {
|
lf.emit_chat(langfuse_trace::ChatTrace {
|
||||||
provider: used_provider.clone(),
|
provider: used_provider.clone(),
|
||||||
@ -452,7 +492,7 @@ mod tests {
|
|||||||
assert_eq!(r.model, "qwen3.5:latest");
|
assert_eq!(r.model, "qwen3.5:latest");
|
||||||
assert_eq!(r.messages.len(), 2);
|
assert_eq!(r.messages.len(), 2);
|
||||||
assert_eq!(r.messages[0].role, "system");
|
assert_eq!(r.messages[0].role, "system");
|
||||||
assert_eq!(r.messages[1].content, "Hi");
|
assert_eq!(r.messages[1].text(), "Hi");
|
||||||
assert_eq!(r.temperature, Some(0.2));
|
assert_eq!(r.temperature, Some(0.2));
|
||||||
assert_eq!(r.max_tokens, Some(100));
|
assert_eq!(r.max_tokens, Some(100));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -60,10 +60,7 @@ pub async fn chat(client: &AiClient, req: &ChatRequest) -> Result<ChatResponse,
|
|||||||
model: resp.model,
|
model: resp.model,
|
||||||
choices: vec![Choice {
|
choices: vec![Choice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: Message {
|
message: Message::new_text("assistant", resp.text),
|
||||||
role: "assistant".into(),
|
|
||||||
content: resp.text,
|
|
||||||
},
|
|
||||||
finish_reason: "stop".into(),
|
finish_reason: "stop".into(),
|
||||||
}],
|
}],
|
||||||
usage: UsageBlock {
|
usage: UsageBlock {
|
||||||
@ -89,13 +86,14 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
|
|||||||
let mut system = String::new();
|
let mut system = String::new();
|
||||||
let mut prompt = String::new();
|
let mut prompt = String::new();
|
||||||
for m in messages {
|
for m in messages {
|
||||||
|
let body = m.text();
|
||||||
if m.role == "system" {
|
if m.role == "system" {
|
||||||
if !system.is_empty() { system.push('\n'); }
|
if !system.is_empty() { system.push('\n'); }
|
||||||
system.push_str(&m.content);
|
system.push_str(&body);
|
||||||
} else {
|
} else {
|
||||||
prompt.push_str(&m.role);
|
prompt.push_str(&m.role);
|
||||||
prompt.push_str(": ");
|
prompt.push_str(": ");
|
||||||
prompt.push_str(&m.content);
|
prompt.push_str(&body);
|
||||||
prompt.push_str("\n\n");
|
prompt.push_str("\n\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,7 +102,7 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn estimate_prompt_tokens(messages: &[Message]) -> u32 {
|
fn estimate_prompt_tokens(messages: &[Message]) -> u32 {
|
||||||
let chars: usize = messages.iter().map(|m| m.content.chars().count()).sum();
|
let chars: usize = messages.iter().map(|m| m.text().chars().count()).sum();
|
||||||
((chars + 3) / 4) as u32
|
((chars + 3) / 4) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -88,7 +88,7 @@ pub async fn chat(
|
|||||||
let text = parsed.response.unwrap_or_default();
|
let text = parsed.response.unwrap_or_default();
|
||||||
|
|
||||||
let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| {
|
let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| {
|
||||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||||
((chars + 3) / 4) as u32
|
((chars + 3) / 4) as u32
|
||||||
});
|
});
|
||||||
let completion_tokens = parsed.eval_count.unwrap_or_else(|| {
|
let completion_tokens = parsed.eval_count.unwrap_or_else(|| {
|
||||||
@ -112,7 +112,7 @@ pub async fn chat(
|
|||||||
model: parsed.model.unwrap_or_else(|| req.model.clone()),
|
model: parsed.model.unwrap_or_else(|| req.model.clone()),
|
||||||
choices: vec![Choice {
|
choices: vec![Choice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: Message { role: "assistant".into(), content: text },
|
message: Message::new_text("assistant", text),
|
||||||
finish_reason: "stop".into(),
|
finish_reason: "stop".into(),
|
||||||
}],
|
}],
|
||||||
usage: UsageBlock {
|
usage: UsageBlock {
|
||||||
|
|||||||
@ -59,6 +59,9 @@ pub async fn chat(
|
|||||||
|
|
||||||
let body = ORChatBody {
|
let body = ORChatBody {
|
||||||
model: model.clone(),
|
model: model.clone(),
|
||||||
|
// Pass content through verbatim — preserves OpenAI's multimodal
|
||||||
|
// content-parts shape (`[{type:"text",text:"..."}, ...]`) so the
|
||||||
|
// upstream provider sees exactly what the client sent.
|
||||||
messages: req.messages.iter().map(|m| ORMessage {
|
messages: req.messages.iter().map(|m| ORMessage {
|
||||||
role: m.role.clone(),
|
role: m.role.clone(),
|
||||||
content: m.content.clone(),
|
content: m.content.clone(),
|
||||||
@ -102,7 +105,7 @@ pub async fn chat(
|
|||||||
let text = choice.message.content;
|
let text = choice.message.content;
|
||||||
|
|
||||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
|
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
|
||||||
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum();
|
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||||
((chars + 3) / 4) as u32
|
((chars + 3) / 4) as u32
|
||||||
});
|
});
|
||||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
|
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
|
||||||
@ -126,7 +129,7 @@ pub async fn chat(
|
|||||||
model,
|
model,
|
||||||
choices: vec![Choice {
|
choices: vec![Choice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: Message { role: "assistant".into(), content: text },
|
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
|
||||||
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
|
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||||
}],
|
}],
|
||||||
usage: UsageBlock {
|
usage: UsageBlock {
|
||||||
@ -149,7 +152,7 @@ struct ORChatBody {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct ORMessage { role: String, content: String }
|
struct ORMessage { role: String, content: serde_json::Value }
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct ORChatResponse {
|
struct ORChatResponse {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user