lakehouse/tests/real-world/scrum_applier.ts

// scrum_applier.ts — the auto-apply pipeline.
//
// Turns the scrum master's signal into real commits. Reads
// data/_kb/scrum_reviews.jsonl, filters to rows where the scrum's
// own confidence is high enough to trust auto-apply (gradient_tier
// auto OR confidence_avg ≥ 90), asks a patch-emitting model to
// produce concrete old_string/new_string pairs, applies them via
// text replacement, runs `cargo check` after each, commits on green
// and reverts on red.
//
// Runs on its own branch (never on main). Every action is recorded
// in data/_kb/auto_apply.jsonl so the auditor and future iterations
// can see what landed and what reverted.
//
// Usage:
//   bun run tests/real-world/scrum_applier.ts                 # dry-run, print only
//   LH_APPLIER_COMMIT=1 bun run tests/real-world/scrum_applier.ts   # actually apply
//
// Env:
//   LH_APPLIER_BRANCH  — branch name (default: "scrum/auto-apply-${Date.now()}")
//   LH_APPLIER_MIN_CONF — minimum confidence_avg, default 90
//   LH_APPLIER_MAX_FILES — cap on files per run (default 5, keeps diffs reviewable)
//   LH_APPLIER_COMMIT — "1" to actually commit; otherwise dry-run
//   LH_APPLIER_MODEL  — patch-emitting model (default: kimi-k2:1t)

import { readFile, writeFile, appendFile } from "node:fs/promises";
import { existsSync } from "node:fs";
import { spawn } from "node:child_process";

const REPO = "/home/profit/lakehouse";
const GATEWAY = "http://localhost:3100";
const SCRUM_REVIEWS = `${REPO}/data/_kb/scrum_reviews.jsonl`;
const AUDIT_LOG = `${REPO}/data/_kb/auto_apply.jsonl`;

const MIN_CONF = Number(process.env.LH_APPLIER_MIN_CONF ?? 90);
const MAX_FILES = Number(process.env.LH_APPLIER_MAX_FILES ?? 5);
const COMMIT = process.env.LH_APPLIER_COMMIT === "1";
// Default patch-emitter model — qwen3-coder:480b is the coding specialist
// in the scrum ladder (rung 2). Swapped in from kimi-k2:1t after 2026-04-24
// data showed kimi-k2:1t produces architectural patches that cascade across
// the file (rename a field → 20 broken call sites). qwen3-coder is tuned
// for targeted code changes and tends to stay within the mechanical-patch
// constraint the prompt asks for. LLM Team's /api/run?mode=patch would be
// the ideal choice but that mode isn't registered in llm_team_ui.py yet.
const MODEL = process.env.LH_APPLIER_MODEL ?? "qwen3-coder:480b";
const BRANCH = process.env.LH_APPLIER_BRANCH ?? `scrum/auto-apply-${Date.now().toString(36)}`;

// Deny-list — anything whose path starts with one of these is skipped
// regardless of how confident the scrum is. Config / systemd / docs /
// auditor itself are off limits for auto-apply; they need a human.
const DENY_PREFIXES = [
  "config/",
  "ops/",
  "auditor/",
  "docs/",
  "data/",
  "/etc/",
  "mcp-server/",
  "ui/",
  "sidecar/",
  "scripts/",
];

function log(msg: string) { console.log(`[applier] ${msg}`); }

async function sh(cmd: string[], cwd = REPO): Promise<{ stdout: string; stderr: string; code: number }> {
  return new Promise((resolve) => {
    const p = spawn(cmd[0], cmd.slice(1), { cwd, stdio: ["ignore", "pipe", "pipe"] });
    let out = ""; let err = "";
    p.stdout.on("data", (d) => { out += d.toString(); });
    p.stderr.on("data", (d) => { err += d.toString(); });
    p.on("close", (code) => resolve({ stdout: out, stderr: err, code: code ?? 1 }));
  });
}

async function auditLog(row: Record<string, any>) {
  const line = JSON.stringify({ ...row, ts: new Date().toISOString() }) + "\n";
  await appendFile(AUDIT_LOG, line);
}

async function chat(opts: {
  provider: "ollama_cloud" | "openrouter" | "ollama";
  model: string;
  prompt: string;
  max_tokens?: number;
}): Promise<{ content: string; error?: string }> {
  try {
    const r = await fetch(`${GATEWAY}/v1/chat`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
        provider: opts.provider,
        model: opts.model,
        messages: [{ role: "user", content: opts.prompt }],
        max_tokens: opts.max_tokens ?? 1500,
        temperature: 0.1,
      }),
      signal: AbortSignal.timeout(180000),
    });
    if (!r.ok) return { content: "", error: `${r.status}: ${(await r.text()).slice(0, 300)}` };
    const j: any = await r.json();
    return { content: j.choices?.[0]?.message?.content ?? "" };
  } catch (e) {
    return { content: "", error: String(e) };
  }
}

interface ScrumReview {
  file: string;
  reviewed_at: string;
  accepted_model: string;
  suggestions_preview: string;
  confidences_per_finding?: number[];
  confidence_avg?: number | null;
  confidence_min?: number | null;
  gradient_tier?: string;
  gradient_tier_avg?: string;
  verdict?: string;
  critical_failures_count?: number;
  schema_version?: number;
}

async function loadLatestReviews(): Promise<Map<string, ScrumReview>> {
  // Map of file → latest review for that file. Ordered by reviewed_at.
  if (!existsSync(SCRUM_REVIEWS)) return new Map();
  const text = await readFile(SCRUM_REVIEWS, "utf8");
  const rows: ScrumReview[] = text.split("\n").filter(Boolean).map(l => {
    try { return JSON.parse(l); } catch { return null; }
  }).filter((r): r is ScrumReview => r !== null);
  // Keep the LATEST review per file.
  const latest = new Map<string, ScrumReview>();
  for (const r of rows) {
    if (!r.file) continue;
    const prev = latest.get(r.file);
    if (!prev || (r.reviewed_at > prev.reviewed_at)) latest.set(r.file, r);
  }
  return latest;
}

function passesConfidenceGate(r: ScrumReview): boolean {
  const avg = r.confidence_avg ?? 0;
  const min = r.confidence_min ?? 0;
  // Must be auto or dry_run tier AND confidence_min ≥ MIN_CONF.
  // min is the conservative tier-lower-bound (one weak finding drags
  // the whole file to "simulation" or "block" tier).
  if (r.gradient_tier === "block" || r.gradient_tier === "simulation") return false;
  return avg >= MIN_CONF && min >= 70;
}

function passesDenyList(file: string): boolean {
  return !DENY_PREFIXES.some((p) => file.startsWith(p) || file === p.replace(/\/$/, ""));
}

interface Patch {
  file: string;
  old_string: string;
  new_string: string;
  rationale: string;
  confidence: number;
}

async function requestPatches(file: string, source: string, review: string): Promise<Patch[]> {
  const prompt = `You previously produced this review of ${file}:

─── REVIEW ───
${review}
─── END REVIEW ───

The review is high-confidence and the file is eligible for auto-apply. Produce CONCRETE PATCHES as JSON so they can be applied via string replacement.

HARD CONSTRAINTS (violations → patch rejected):
  1. Output ONE JSON object with a "patches" array. NO prose, no markdown fences.
  2. Each patch is {"old_string": "...", "new_string": "...", "rationale": "short", "confidence": 0-100}.
  3. "old_string" MUST appear EXACTLY ONCE in the file (verbatim, including whitespace + trailing newlines).
  4. Max diff size: 6 lines changed per patch (NOT 20). If the change needs >6 lines, emit {"patches": []} — too risky for auto-apply.
  5. Mechanical-only (the list — nothing else):
       (a) remove a #[allow(dead_code)] marker on a function now wired elsewhere
       (b) add a missing 'use' import statement
       (c) add a single field to a struct (no renames)
       (d) flip a single boolean/match-arm that doesn't cascade
       (e) add a fire-and-forget log or tracing call
  6. FORBIDDEN (automatic reject):
       • struct field RENAMES (break all call sites)
       • function signature changes
       • new modules, new traits, new dependencies
       • any change that requires editing another file to keep it compiling
  7. Each "new_string" MUST compile in isolation with the same surrounding code.
  8. Max 2 patches per file — quality over quantity.
  9. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess.

─── SOURCE (${source.length} bytes) ───
${source.slice(0, 14000)}
─── END SOURCE ───

Emit ONLY the JSON object.`;

  const r = await chat({ provider: "ollama_cloud", model: MODEL, prompt, max_tokens: 2500 });
  if (r.error || !r.content) return [];

  // Strip markdown fences if model wrapped the JSON.
  let raw = r.content.trim();
  const fenceStart = raw.match(/^```(?:json)?\s*/);
  if (fenceStart) raw = raw.slice(fenceStart[0].length);
  if (raw.endsWith("```")) raw = raw.slice(0, -3).trim();
  // Find first { and last } to extract JSON block if there's prose.
  const first = raw.indexOf("{");
  const last = raw.lastIndexOf("}");
  if (first >= 0 && last > first) raw = raw.slice(first, last + 1);

  try {
    const obj = JSON.parse(raw);
    const patches: Patch[] = (obj.patches ?? []).filter((p: any) =>
      typeof p?.old_string === "string" &&
      typeof p?.new_string === "string" &&
      p.old_string !== p.new_string &&
      p.old_string.length > 0 &&
      typeof p?.confidence === "number"
    ).map((p: any) => ({
      file,
      old_string: p.old_string,
      new_string: p.new_string,
      rationale: String(p.rationale ?? ""),
      confidence: p.confidence,
    }));
    return patches;
  } catch (e) {
    log(`  ${file}: patch JSON parse failed — ${String(e).slice(0, 100)}`);
    return [];
  }
}

async function applyPatches(file: string, patches: Patch[]): Promise<{ applied: number; rejected: Array<{patch: Patch; reason: string}> }> {
  const full = `${REPO}/${file}`;
  let source = await readFile(full, "utf8");
  const rejected: Array<{patch: Patch; reason: string}> = [];
  let applied = 0;
  for (const p of patches) {
    // Confidence gate at the individual-patch level.
    if (p.confidence < MIN_CONF) { rejected.push({patch: p, reason: `confidence ${p.confidence} < ${MIN_CONF}`}); continue; }
    // Uniqueness gate.
    const occurrences = source.split(p.old_string).length - 1;
    if (occurrences === 0) { rejected.push({patch: p, reason: "old_string not found"}); continue; }
    if (occurrences > 1) { rejected.push({patch: p, reason: `old_string appears ${occurrences}× (not unique)`}); continue; }
    // Size gate — no patch touches > 6 lines (diff discipline; matches prompt).
    // Raised from 20 to 6 after 2026-04-24 data showed most 10-20 line patches
    // cascaded and broke the build. Mechanical changes genuinely fit in 6 lines.
    const oldLines = p.old_string.split("\n").length;
    const newLines = p.new_string.split("\n").length;
    if (Math.max(oldLines, newLines) > 6) { rejected.push({patch: p, reason: `patch too large (${Math.max(oldLines,newLines)} lines, max 6)`}); continue; }
    source = source.replace(p.old_string, p.new_string);
    applied++;
  }
  if (applied > 0) await writeFile(full, source);
  return { applied, rejected };
}

async function cargoCheck(): Promise<boolean> {
  const r = await sh(["cargo", "check", "--workspace"]);
  return r.code === 0;
}

async function gitCommit(file: string, patches: Patch[]): Promise<boolean> {
  if (!COMMIT) { log(`  (dry-run) would commit ${file}`); return true; }
  const addR = await sh(["git", "add", file]);
  if (addR.code !== 0) { log(`  git add failed: ${addR.stderr.slice(0, 200)}`); return false; }
  const msg = `auto-apply: ${patches.length} high-confidence fix${patches.length === 1 ? "" : "es"} in ${file}\n\n${patches.map(p => `- ${p.rationale} (conf ${p.confidence}%)`).join("\n")}\n\n🤖 scrum_applier.ts`;
  const commitR = await sh(["git", "commit", "-m", msg]);
  if (commitR.code !== 0) { log(`  git commit failed: ${commitR.stderr.slice(0, 200)}`); return false; }
  log(`  ✓ committed ${file}`);
  return true;
}

async function revertFile(file: string): Promise<void> {
  await sh(["git", "checkout", "--", file]);
}

async function main() {
  log(`starting · min_conf=${MIN_CONF} max_files=${MAX_FILES} model=${MODEL} commit=${COMMIT}`);

  if (COMMIT) {
    const headR = await sh(["git", "rev-parse", "--abbrev-ref", "HEAD"]);
    const currentBranch = headR.stdout.trim();
    if (currentBranch === "main") {
      log(`refusing to run on main — create a branch first or set LH_APPLIER_BRANCH`);
      const coR = await sh(["git", "checkout", "-b", BRANCH]);
      if (coR.code !== 0) { log(`could not create branch ${BRANCH}: ${coR.stderr.slice(0, 200)}`); process.exit(1); }
      log(`working branch: ${BRANCH}`);
    } else {
      log(`working branch: ${currentBranch}`);
    }
  }

  const reviews = await loadLatestReviews();
  log(`loaded ${reviews.size} latest reviews`);

  const eligible = [...reviews.values()].filter(r =>
    passesConfidenceGate(r) && passesDenyList(r.file)
  ).sort((a, b) => (b.confidence_avg ?? 0) - (a.confidence_avg ?? 0));

  log(`${eligible.length} pass confidence gate + deny-list`);
  log(`taking top ${Math.min(MAX_FILES, eligible.length)} by confidence`);

  let committedFiles = 0;
  let revertedFiles = 0;

  for (const r of eligible.slice(0, MAX_FILES)) {
    log(`${r.file} (conf_avg=${r.confidence_avg} tier=${r.gradient_tier})`);
    const full = `${REPO}/${r.file}`;
    if (!existsSync(full)) { log(`  skip — file not found on disk`); continue; }

    const source = await readFile(full, "utf8");
    const patches = await requestPatches(r.file, source, r.suggestions_preview ?? "");

    if (patches.length === 0) {
      log(`  no patches produced`);
      await auditLog({ action: "no_patches", file: r.file, reviewer_model: r.accepted_model });
      continue;
    }

    log(`  ${patches.length} candidate patches`);
    const { applied, rejected } = await applyPatches(r.file, patches);
    log(`  applied ${applied}, rejected ${rejected.length}`);
    for (const rj of rejected) log(`    ✗ ${rj.reason}`);

    if (applied === 0) {
      await auditLog({ action: "all_rejected", file: r.file, rejected: rejected.map(x => x.reason) });
      continue;
    }

    log(`  running cargo check...`);
    const green = await cargoCheck();
    if (!green) {
      log(`  ✗ build red — reverting ${r.file}`);
      await revertFile(r.file);
      revertedFiles++;
      await auditLog({ action: "build_red_reverted", file: r.file, patches_applied: applied });
      continue;
    }

    log(`  ✓ build green`);
    const ok = await gitCommit(r.file, patches.slice(0, applied));
    if (ok) {
      committedFiles++;
      await auditLog({
        action: COMMIT ? "committed" : "dry_run_committed",
        file: r.file,
        patches_applied: applied,
        patches_rejected: rejected.length,
        confidence_avg: r.confidence_avg,
        gradient_tier: r.gradient_tier,
        reviewer_model: r.accepted_model,
      });
    }
  }

  log(`DONE · committed=${committedFiles} reverted=${revertedFiles}`);
}

await main();