diff --git a/tests/multi-agent/agent.ts b/tests/multi-agent/agent.ts
index 6ee7449..487f1d4 100644
--- a/tests/multi-agent/agent.ts
+++ b/tests/multi-agent/agent.ts
@@ -394,11 +394,15 @@ export async function generate(model: string, prompt: string, opts: {
   return text;
 }
 
-// Cloud generate — hits Ollama Cloud directly with the bearer key. Same
-// /api/generate shape as local Ollama; `thinking` field (for gpt-oss:Nb)
-// is discarded, only `response` is returned. Caller should budget
-// num_predict ≥ 400 so thinking-model reasoning has room before the
-// visible response starts.
+// Cloud generate — routes through the lakehouse gateway's /v1/chat
+// with provider="ollama_cloud". Phase 44 migration (2026-04-24): was
+// hitting OLLAMA_CLOUD_URL/api/generate directly with a bearer key,
+// bypassing the gateway's usage tracking + audit path. Now every call
+// flows through /v1/chat so /v1/usage accounts for it. Gateway holds
+// the OLLAMA_CLOUD_KEY; caller no longer needs it in env.
+//
+// Thinking-model budget note: num_predict ≥ 400 still matters, just
+// expressed via max_tokens on the /v1/chat request.
 export async function generateCloud(model: string, prompt: string, opts: {
   max_tokens?: number;
   temperature?: number;
@@ -406,41 +410,35 @@ export async function generateCloud(model: string, prompt: string, opts: {
   bypass_budget?: boolean;
   think?: boolean;
 } = {}): Promise<string> {
-  if (!OLLAMA_CLOUD_KEY) {
-    throw new Error("OLLAMA_CLOUD_KEY not set; cannot reach Ollama Cloud");
-  }
   assertContextBudget(model, prompt, {
     system: opts.system,
     max_tokens: opts.max_tokens,
     bypass: opts.bypass_budget,
   });
+  const messages: Array<{ role: string; content: string }> = [];
+  if (opts.system) messages.push({ role: "system", content: opts.system });
+  messages.push({ role: "user", content: prompt });
   const body: Record<string, any> = {
     model,
-    prompt,
-    stream: false,
-    options: {
-      temperature: opts.temperature ?? 0.3,
-      num_predict: Math.max(opts.max_tokens ?? 800, 400),
-    },
+    messages,
+    provider: "ollama_cloud",
+    temperature: opts.temperature ?? 0.3,
+    max_tokens: Math.max(opts.max_tokens ?? 800, 400),
   };
-  if (opts.system) body.system = opts.system;
   if (opts.think !== undefined) body.think = opts.think;
-  const resp = await fetch(`${OLLAMA_CLOUD_URL}/api/generate`, {
+  const resp = await fetch(`${GATEWAY}/v1/chat`, {
     method: "POST",
-    headers: {
-      "Authorization": `Bearer ${OLLAMA_CLOUD_KEY}`,
-      "Content-Type": "application/json",
-    },
+    headers: { "Content-Type": "application/json" },
     body: JSON.stringify(body),
   });
   if (!resp.ok) {
-    throw new Error(`Ollama Cloud ${resp.status}: ${await resp.text().catch(() => "?")}`);
+    throw new Error(`gateway /v1/chat ${resp.status}: ${await resp.text().catch(() => "?")}`);
   }
   const data: any = await resp.json();
-  const text = typeof data.response === "string" ? data.response : "";
+  const text = data?.choices?.[0]?.message?.content ?? "";
   // Same non-throw policy as local generate() — empty text is a valid
   // signal that thinking ate the budget. Let generateContinuable retry.
-  return text;
+  return typeof text === "string" ? text : "";
 }
 
 // --- Prompt construction ---