diff --git a/crates/gateway/src/v1/mod.rs b/crates/gateway/src/v1/mod.rs index ca18487..cba16a2 100644 --- a/crates/gateway/src/v1/mod.rs +++ b/crates/gateway/src/v1/mod.rs @@ -68,6 +68,14 @@ pub struct ChatRequest { /// Phase 39+ wires real streaming. #[serde(default)] pub stream: Option, + /// Non-OpenAI extension. Passes through to the provider's thinking + /// toggle. Default: **false** — hot-path discipline for thinking + /// models (qwen3.5, qwen3, gpt-oss) that otherwise burn the token + /// budget on hidden reasoning before visible output starts, + /// producing empty responses. Set true explicitly when calling an + /// overseer / reasoning-heavy path. + #[serde(default)] + pub think: Option, } #[derive(Serialize)] diff --git a/crates/gateway/src/v1/ollama.rs b/crates/gateway/src/v1/ollama.rs index c768960..d7a5e4d 100644 --- a/crates/gateway/src/v1/ollama.rs +++ b/crates/gateway/src/v1/ollama.rs @@ -20,11 +20,14 @@ pub async fn chat(client: &AiClient, req: &ChatRequest) -> Result