// Gap detection + cloud proposal. // // Gap detection: scan docs/PRD.md for lines tagged [bot-eligible]. // Each match becomes a Gap with surrounding context. // // Proposal: one-shot call to the T3 cloud model via the Python sidecar's // /generate endpoint. Asks for a structured JSON response with file // contents. Truncation-resistant via Phase 21's generate_continuable — // for now we pass max_tokens high and rely on the model completing in // one pass; swap to the Rust continuation wrapper if we see truncation. import { readFile } from "node:fs/promises"; import { createHash } from "node:crypto"; import type { Gap, Proposal } from "./types.ts"; // Phase 44 migration (2026-04-27): bot/propose.ts now flows through // the gateway's /v1/chat instead of hitting the sidecar's /generate // directly. /v1/usage tracks the call, Langfuse traces it, observer // sees it. Same upstream model (CLOUD_MODEL gpt-oss:120b on // Ollama Cloud) — gateway just owns the routing. const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const REPO_ROOT = "/home/profit/lakehouse"; const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`; const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b"; const MAX_TOKENS = 6000; export async function findGaps(): Promise { const prd = await readFile(PRD_PATH, "utf8"); const lines = prd.split("\n"); const gaps: Gap[] = []; for (let i = 0; i < lines.length; i++) { if (!lines[i].includes("[bot-eligible]")) continue; const contextLines = lines.slice(i, Math.min(i + 6, lines.length)).join("\n"); const id = createHash("sha256").update(lines[i]).digest("hex").slice(0, 12); gaps.push({ id, prd_line: lines[i].trim(), context: contextLines, source_file: "docs/PRD.md", line_number: i + 1, }); } return gaps; } const SYSTEM_PROMPT = `You are an assistant that proposes small, testable code changes to the Lakehouse repo. The Lakehouse is a Rust-first data platform with 13 crates + Bun/TypeScript test harness. You will be given one PRD gap tagged [bot-eligible] and must respond with a STRICT JSON object — no prose. Rules: - Response MUST be a single JSON object, no markdown fences, no commentary. - Change MUST be small: <200 lines total, ≤5 files. - Include at least one test file (new or modified) that proves the change. - NEVER touch .git/, secrets, lakehouse.toml, docs/ADR-*, docs/DECISIONS.md, docs/PRD.md, /etc/, /root/, Cargo.lock. - Paths MUST be repo-relative (no leading /). - Whole-file contents only — no patches, no diffs. Response shape: { "summary": "one line", "rationale": "why this addresses the gap", "files": [ { "path": "crates/foo/src/bar.rs", "content": "", "is_new": false } ], "estimated_loc": 42 }`; export async function generateProposal(gap: Gap, historySummary: string = ""): Promise { const sections = [ `PRD gap (line ${gap.line_number}):`, "```", gap.context, "```", "", ]; if (historySummary) { sections.push(historySummary, ""); } sections.push("Propose a small change that addresses this gap. Respond with the JSON object only."); const userPrompt = sections.join("\n"); const r = await fetch(`${GATEWAY_URL}/v1/chat`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ model: CLOUD_MODEL, provider: "ollama_cloud", messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: userPrompt }, ], temperature: 0.2, max_tokens: MAX_TOKENS, think: false, }), signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min }); if (!r.ok) { throw new Error(`gateway /v1/chat ${r.status}: ${await r.text()}`); } const j = await r.json() as any; const raw: string = j?.choices?.[0]?.message?.content ?? ""; const usage = j.usage ?? {}; const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0); const parsed = extractJson(raw); if (!parsed || typeof parsed !== "object") { throw new Error(`model returned no JSON object. Raw head: ${raw.slice(0, 300)}`); } if (!Array.isArray(parsed.files)) { throw new Error(`proposal.files not an array: ${JSON.stringify(parsed).slice(0, 200)}`); } return { summary: String(parsed.summary ?? "").trim(), rationale: String(parsed.rationale ?? "").trim(), files: parsed.files.map((f: any) => ({ path: String(f.path ?? ""), content: String(f.content ?? ""), is_new: Boolean(f.is_new), })).filter((f: any) => f.path && f.content !== undefined), estimated_loc: Number(parsed.estimated_loc ?? 0), model_used: CLOUD_MODEL, tokens_used: tokens, }; } // Find the first balanced JSON object in the string. Tolerates leading // "```json" fences even though we asked the model not to emit them. function extractJson(text: string): any | null { const cleaned = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim(); let depth = 0; let start = -1; for (let i = 0; i < cleaned.length; i++) { const c = cleaned[i]; if (c === "{") { if (depth === 0) start = i; depth++; } else if (c === "}") { depth--; if (depth === 0 && start >= 0) { try { return JSON.parse(cleaned.slice(start, i + 1)); } catch { start = -1; } } } } return null; }