Session infrastructure: OpenRouter + tree-split reducer + observer→LLM Team + scrum_applier #11

Merged
profit merged 118 commits from scrum/auto-apply-19814 into main 2026-04-27 15:55:24 +00:00
Showing only changes of commit 021c1b557f - Show all commits

View File

@ -394,11 +394,15 @@ export async function generate(model: string, prompt: string, opts: {
return text; return text;
} }
// Cloud generate — hits Ollama Cloud directly with the bearer key. Same // Cloud generate — routes through the lakehouse gateway's /v1/chat
// /api/generate shape as local Ollama; `thinking` field (for gpt-oss:Nb) // with provider="ollama_cloud". Phase 44 migration (2026-04-24): was
// is discarded, only `response` is returned. Caller should budget // hitting OLLAMA_CLOUD_URL/api/generate directly with a bearer key,
// num_predict ≥ 400 so thinking-model reasoning has room before the // bypassing the gateway's usage tracking + audit path. Now every call
// visible response starts. // flows through /v1/chat so /v1/usage accounts for it. Gateway holds
// the OLLAMA_CLOUD_KEY; caller no longer needs it in env.
//
// Thinking-model budget note: num_predict ≥ 400 still matters, just
// expressed via max_tokens on the /v1/chat request.
export async function generateCloud(model: string, prompt: string, opts: { export async function generateCloud(model: string, prompt: string, opts: {
max_tokens?: number; max_tokens?: number;
temperature?: number; temperature?: number;
@ -406,41 +410,35 @@ export async function generateCloud(model: string, prompt: string, opts: {
bypass_budget?: boolean; bypass_budget?: boolean;
think?: boolean; think?: boolean;
} = {}): Promise<string> { } = {}): Promise<string> {
if (!OLLAMA_CLOUD_KEY) {
throw new Error("OLLAMA_CLOUD_KEY not set; cannot reach Ollama Cloud");
}
assertContextBudget(model, prompt, { assertContextBudget(model, prompt, {
system: opts.system, system: opts.system,
max_tokens: opts.max_tokens, max_tokens: opts.max_tokens,
bypass: opts.bypass_budget, bypass: opts.bypass_budget,
}); });
const messages: Array<{ role: string; content: string }> = [];
if (opts.system) messages.push({ role: "system", content: opts.system });
messages.push({ role: "user", content: prompt });
const body: Record<string, any> = { const body: Record<string, any> = {
model, model,
prompt, messages,
stream: false, provider: "ollama_cloud",
options: { temperature: opts.temperature ?? 0.3,
temperature: opts.temperature ?? 0.3, max_tokens: Math.max(opts.max_tokens ?? 800, 400),
num_predict: Math.max(opts.max_tokens ?? 800, 400),
},
}; };
if (opts.system) body.system = opts.system;
if (opts.think !== undefined) body.think = opts.think; if (opts.think !== undefined) body.think = opts.think;
const resp = await fetch(`${OLLAMA_CLOUD_URL}/api/generate`, { const resp = await fetch(`${GATEWAY}/v1/chat`, {
method: "POST", method: "POST",
headers: { headers: { "Content-Type": "application/json" },
"Authorization": `Bearer ${OLLAMA_CLOUD_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body), body: JSON.stringify(body),
}); });
if (!resp.ok) { if (!resp.ok) {
throw new Error(`Ollama Cloud ${resp.status}: ${await resp.text().catch(() => "?")}`); throw new Error(`gateway /v1/chat ${resp.status}: ${await resp.text().catch(() => "?")}`);
} }
const data: any = await resp.json(); const data: any = await resp.json();
const text = typeof data.response === "string" ? data.response : ""; const text = data?.choices?.[0]?.message?.content ?? "";
// Same non-throw policy as local generate() — empty text is a valid // Same non-throw policy as local generate() — empty text is a valid
// signal that thinking ate the budget. Let generateContinuable retry. // signal that thinking ate the budget. Let generateContinuable retry.
return text; return typeof text === "string" ? text : "";
} }
// --- Prompt construction --- // --- Prompt construction ---