lakehouse/bot/propose.ts
profit f44b6b3e6b Control-plane pivot: Phase 38-44 plan + bot scaffold
Direction shift 2026-04-22: docs/CONTROL_PLANE_PRD.md becomes the
long-horizon architecture target. Existing Lakehouse (docs/PRD.md,
Phases 0-37) is preserved as the reference implementation and first
consumer. New 6-layer architecture:

  L1 Universal API /v1/chat /v1/usage /v1/sessions /v1/tools /v1/context
  L2 Routing & Policy Engine (rules, fallback chains, cost gating)
  L3 Provider Adapter Layer (Ollama + OpenRouter + Gemini + Claude)
  L4 Knowledge + Memory + Playbooks (already built)
  L5 Execution Loop (scenarios + bot/cycle.ts instances)
  L6 Observability + token accounting

Phases 38-44 sequenced with detailed per-phase specs in the PRD.
Current scope: staffing domain (synthetic workers_500k, contracts,
emails, SMS, playbooks). DevOps (Terraform/Ansible) is long-horizon
target — architecture-compatible but not current.

Files added:
- docs/CONTROL_PLANE_PRD.md — 6-layer architecture, Phase 38-44
  sequencing with staffing-first Truth Layer + Validation pipeline
- bot/ — manual-only PR bot scaffold. First consumer test-bed for
  /v1/chat (Phase 38). Mem0-aligned ADD/UPDATE/NOOP apply semantics;
  KB feedback loop reads prior cycles on same gap and injects into
  cloud prompt so bot cycles compound like scenario.ts runs do.
- tests/multi-agent/run_stress.ts — the 6-task diverse stress test
  referenced in the previous commit but missing from its staging

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 02:43:31 -05:00

142 lines
4.9 KiB
TypeScript

// Gap detection + cloud proposal.
//
// Gap detection: scan docs/PRD.md for lines tagged [bot-eligible].
// Each match becomes a Gap with surrounding context.
//
// Proposal: one-shot call to the T3 cloud model via the Python sidecar's
// /generate endpoint. Asks for a structured JSON response with file
// contents. Truncation-resistant via Phase 21's generate_continuable —
// for now we pass max_tokens high and rely on the model completing in
// one pass; swap to the Rust continuation wrapper if we see truncation.
import { readFile } from "node:fs/promises";
import { createHash } from "node:crypto";
import type { Gap, Proposal } from "./types.ts";
const SIDECAR_URL = process.env.LH_SIDECAR_URL ?? "http://localhost:3200";
const REPO_ROOT = "/home/profit/lakehouse";
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
const MAX_TOKENS = 6000;
export async function findGaps(): Promise<Gap[]> {
const prd = await readFile(PRD_PATH, "utf8");
const lines = prd.split("\n");
const gaps: Gap[] = [];
for (let i = 0; i < lines.length; i++) {
if (!lines[i].includes("[bot-eligible]")) continue;
const contextLines = lines.slice(i, Math.min(i + 6, lines.length)).join("\n");
const id = createHash("sha256").update(lines[i]).digest("hex").slice(0, 12);
gaps.push({
id,
prd_line: lines[i].trim(),
context: contextLines,
source_file: "docs/PRD.md",
line_number: i + 1,
});
}
return gaps;
}
const SYSTEM_PROMPT = `You are an assistant that proposes small, testable code changes to the Lakehouse repo.
The Lakehouse is a Rust-first data platform with 13 crates + Bun/TypeScript test harness.
You will be given one PRD gap tagged [bot-eligible] and must respond with a STRICT JSON object — no prose.
Rules:
- Response MUST be a single JSON object, no markdown fences, no commentary.
- Change MUST be small: <200 lines total, ≤5 files.
- Include at least one test file (new or modified) that proves the change.
- NEVER touch .git/, secrets, lakehouse.toml, docs/ADR-*, docs/DECISIONS.md, docs/PRD.md, /etc/, /root/, Cargo.lock.
- Paths MUST be repo-relative (no leading /).
- Whole-file contents only — no patches, no diffs.
Response shape:
{
"summary": "one line",
"rationale": "why this addresses the gap",
"files": [ { "path": "crates/foo/src/bar.rs", "content": "<full file>", "is_new": false } ],
"estimated_loc": 42
}`;
export async function generateProposal(gap: Gap, historySummary: string = ""): Promise<Proposal> {
const sections = [
`PRD gap (line ${gap.line_number}):`,
"```",
gap.context,
"```",
"",
];
if (historySummary) {
sections.push(historySummary, "");
}
sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
const userPrompt = sections.join("\n");
const r = await fetch(`${SIDECAR_URL}/generate`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: CLOUD_MODEL,
system: SYSTEM_PROMPT,
prompt: userPrompt,
temperature: 0.2,
max_tokens: MAX_TOKENS,
think: false,
}),
signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
});
if (!r.ok) {
throw new Error(`sidecar ${r.status}: ${await r.text()}`);
}
const j = await r.json() as any;
const raw: string = j.text ?? j.response ?? "";
const usage = j.usage ?? {};
const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
const parsed = extractJson(raw);
if (!parsed || typeof parsed !== "object") {
throw new Error(`model returned no JSON object. Raw head: ${raw.slice(0, 300)}`);
}
if (!Array.isArray(parsed.files)) {
throw new Error(`proposal.files not an array: ${JSON.stringify(parsed).slice(0, 200)}`);
}
return {
summary: String(parsed.summary ?? "").trim(),
rationale: String(parsed.rationale ?? "").trim(),
files: parsed.files.map((f: any) => ({
path: String(f.path ?? ""),
content: String(f.content ?? ""),
is_new: Boolean(f.is_new),
})).filter((f: any) => f.path && f.content !== undefined),
estimated_loc: Number(parsed.estimated_loc ?? 0),
model_used: CLOUD_MODEL,
tokens_used: tokens,
};
}
// Find the first balanced JSON object in the string. Tolerates leading
// "```json" fences even though we asked the model not to emit them.
function extractJson(text: string): any | null {
const cleaned = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
let depth = 0;
let start = -1;
for (let i = 0; i < cleaned.length; i++) {
const c = cleaned[i];
if (c === "{") {
if (depth === 0) start = i;
depth++;
} else if (c === "}") {
depth--;
if (depth === 0 && start >= 0) {
try {
return JSON.parse(cleaned.slice(start, i + 1));
} catch {
start = -1;
}
}
}
}
return null;
}