Some checks failed
lakehouse/auditor 2 blocking issues: State field rename likely incomplete — `opencode_key` may not exist on `self.state`
Ollama Pro plan went live today (39-model fleet on the same
OLLAMA_CLOUD_KEY) and OpenCode Zen was already wired in the gateway
but not consumed. Routing every gpt-oss call site to faster /
stronger replacements:
| Site | gpt-oss → replacement | Why |
|---|---|---|
| ollama_cloud default | gpt-oss:120b → deepseek-v3.2 | newest DeepSeek revision; live-probed `pong` |
| openrouter default | openai/gpt-oss-120b:free → x-ai/grok-4.1-fast | already the scrum LADDER's PRIMARY |
| modes.toml staffing_inference | openai/gpt-oss-120b:free → kimi-k2.6 | coding-specialized, on Ollama Pro |
| modes.toml doc_drift_check | gpt-oss:120b → gemini-3-flash-preview | speed leader for factual checks |
| scrum_master_pipeline tree-split MAP+REDUCE | gpt-oss:120b → gemini-3-flash-preview | latency-dominated path (5-20× per file) |
| bot/propose.ts CLOUD_MODEL | gpt-oss:120b → deepseek-v3.2 | same Ollama key, faster |
| mcp-server/observer.ts overseer label fallback | gpt-oss:120b → claude-opus-4-7 | matches new overseer model |
| crates/gateway/src/execution_loop overseer escalation | ollama_cloud/gpt-oss:120b → opencode/claude-opus-4-7 | frontier reasoning matters here — fires only after local self-correct fails twice; Zen pay-per-token cost is bounded |
Verification:
- `cargo check -p gateway --tests` — clean
- Live probes through localhost:3100/v1/chat:
- `opencode/claude-opus-4-7` → "pong"
- `gemini-3-flash-preview` (ollama_cloud) → "pong"
- `kimi-k2.6` (ollama_cloud) → "pong"
- `deepseek-v3.2` (ollama_cloud) → "Pong! 🏓"
Notes:
- kimi-k2:1t still upstream-broken (HTTP 500 on Ollama Pro probe today,
matches yesterday's memory). Replacement table never picks it.
- The Rust changes need a `systemctl restart lakehouse.service` to
take effect on the running gateway. TS callers reload on next run.
- aibridge/src/context.rs still has gpt-oss:{20b,120b} in its window-
size lookup table; harmless and kept for callers that pass it
explicitly as an override.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
152 lines
5.4 KiB
TypeScript
152 lines
5.4 KiB
TypeScript
// Gap detection + cloud proposal.
|
|
//
|
|
// Gap detection: scan docs/PRD.md for lines tagged [bot-eligible].
|
|
// Each match becomes a Gap with surrounding context.
|
|
//
|
|
// Proposal: one-shot call to the T3 cloud model via the Python sidecar's
|
|
// /generate endpoint. Asks for a structured JSON response with file
|
|
// contents. Truncation-resistant via Phase 21's generate_continuable —
|
|
// for now we pass max_tokens high and rely on the model completing in
|
|
// one pass; swap to the Rust continuation wrapper if we see truncation.
|
|
|
|
import { readFile } from "node:fs/promises";
|
|
import { createHash } from "node:crypto";
|
|
import type { Gap, Proposal } from "./types.ts";
|
|
|
|
// Phase 44 migration (2026-04-27): bot/propose.ts now flows through
|
|
// the gateway's /v1/chat instead of hitting the sidecar's /generate
|
|
// directly. /v1/usage tracks the call, Langfuse traces it, observer
|
|
// sees it. Gateway owns the routing.
|
|
//
|
|
// 2026-04-28: gpt-oss:120b → deepseek-v3.2 via Ollama Pro. Newer
|
|
// DeepSeek revision, faster, still on the same OLLAMA_CLOUD_KEY.
|
|
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
|
const REPO_ROOT = "/home/profit/lakehouse";
|
|
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
|
|
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "deepseek-v3.2";
|
|
const MAX_TOKENS = 6000;
|
|
|
|
export async function findGaps(): Promise<Gap[]> {
|
|
const prd = await readFile(PRD_PATH, "utf8");
|
|
const lines = prd.split("\n");
|
|
const gaps: Gap[] = [];
|
|
for (let i = 0; i < lines.length; i++) {
|
|
if (!lines[i].includes("[bot-eligible]")) continue;
|
|
const contextLines = lines.slice(i, Math.min(i + 6, lines.length)).join("\n");
|
|
const id = createHash("sha256").update(lines[i]).digest("hex").slice(0, 12);
|
|
gaps.push({
|
|
id,
|
|
prd_line: lines[i].trim(),
|
|
context: contextLines,
|
|
source_file: "docs/PRD.md",
|
|
line_number: i + 1,
|
|
});
|
|
}
|
|
return gaps;
|
|
}
|
|
|
|
const SYSTEM_PROMPT = `You are an assistant that proposes small, testable code changes to the Lakehouse repo.
|
|
The Lakehouse is a Rust-first data platform with 13 crates + Bun/TypeScript test harness.
|
|
You will be given one PRD gap tagged [bot-eligible] and must respond with a STRICT JSON object — no prose.
|
|
|
|
Rules:
|
|
- Response MUST be a single JSON object, no markdown fences, no commentary.
|
|
- Change MUST be small: <200 lines total, ≤5 files.
|
|
- Include at least one test file (new or modified) that proves the change.
|
|
- NEVER touch .git/, secrets, lakehouse.toml, docs/ADR-*, docs/DECISIONS.md, docs/PRD.md, /etc/, /root/, Cargo.lock.
|
|
- Paths MUST be repo-relative (no leading /).
|
|
- Whole-file contents only — no patches, no diffs.
|
|
|
|
Response shape:
|
|
{
|
|
"summary": "one line",
|
|
"rationale": "why this addresses the gap",
|
|
"files": [ { "path": "crates/foo/src/bar.rs", "content": "<full file>", "is_new": false } ],
|
|
"estimated_loc": 42
|
|
}`;
|
|
|
|
export async function generateProposal(gap: Gap, historySummary: string = ""): Promise<Proposal> {
|
|
const sections = [
|
|
`PRD gap (line ${gap.line_number}):`,
|
|
"```",
|
|
gap.context,
|
|
"```",
|
|
"",
|
|
];
|
|
if (historySummary) {
|
|
sections.push(historySummary, "");
|
|
}
|
|
sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
|
|
const userPrompt = sections.join("\n");
|
|
|
|
const r = await fetch(`${GATEWAY_URL}/v1/chat`, {
|
|
method: "POST",
|
|
headers: { "content-type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: CLOUD_MODEL,
|
|
provider: "ollama_cloud",
|
|
messages: [
|
|
{ role: "system", content: SYSTEM_PROMPT },
|
|
{ role: "user", content: userPrompt },
|
|
],
|
|
temperature: 0.2,
|
|
max_tokens: MAX_TOKENS,
|
|
think: false,
|
|
}),
|
|
signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
|
|
});
|
|
if (!r.ok) {
|
|
throw new Error(`gateway /v1/chat ${r.status}: ${await r.text()}`);
|
|
}
|
|
const j = await r.json() as any;
|
|
const raw: string = j?.choices?.[0]?.message?.content ?? "";
|
|
const usage = j.usage ?? {};
|
|
const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
|
|
|
|
const parsed = extractJson(raw);
|
|
if (!parsed || typeof parsed !== "object") {
|
|
throw new Error(`model returned no JSON object. Raw head: ${raw.slice(0, 300)}`);
|
|
}
|
|
if (!Array.isArray(parsed.files)) {
|
|
throw new Error(`proposal.files not an array: ${JSON.stringify(parsed).slice(0, 200)}`);
|
|
}
|
|
|
|
return {
|
|
summary: String(parsed.summary ?? "").trim(),
|
|
rationale: String(parsed.rationale ?? "").trim(),
|
|
files: parsed.files.map((f: any) => ({
|
|
path: String(f.path ?? ""),
|
|
content: String(f.content ?? ""),
|
|
is_new: Boolean(f.is_new),
|
|
})).filter((f: any) => f.path && f.content !== undefined),
|
|
estimated_loc: Number(parsed.estimated_loc ?? 0),
|
|
model_used: CLOUD_MODEL,
|
|
tokens_used: tokens,
|
|
};
|
|
}
|
|
|
|
// Find the first balanced JSON object in the string. Tolerates leading
|
|
// "```json" fences even though we asked the model not to emit them.
|
|
function extractJson(text: string): any | null {
|
|
const cleaned = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
|
let depth = 0;
|
|
let start = -1;
|
|
for (let i = 0; i < cleaned.length; i++) {
|
|
const c = cleaned[i];
|
|
if (c === "{") {
|
|
if (depth === 0) start = i;
|
|
depth++;
|
|
} else if (c === "}") {
|
|
depth--;
|
|
if (depth === 0 && start >= 0) {
|
|
try {
|
|
return JSON.parse(cleaned.slice(start, i + 1));
|
|
} catch {
|
|
start = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|