// distill.ts — single-entry CLI dispatcher for the distillation // pipeline. Mirrors the spec's `./scripts/distill ` shape. // // USAGE // bun run scripts/distillation/distill.ts [flags] // // COMMANDS // build-evidence materialize EvidenceRecord rows from data/_kb/*.jsonl // score run deterministic Success Scorer // export-rag RAG export (--include-review opt-in) // export-sft SFT export (--include-partial opt-in) // export-preference preference export // export-all RAG + SFT + preference (no opt-ins by default) // health evidence health audit // // All commands accept --dry-run. import { materializeAll } from "./build_evidence_index"; import { scoreAll } from "./score_runs"; import { exportRag } from "./export_rag"; import { exportSft } from "./export_sft"; import { exportPreference } from "./export_preference"; import { runAllWithReceipts } from "./receipts"; import { TRANSFORMS } from "./transforms"; import { spawnSync } from "node:child_process"; const DEFAULT_ROOT = process.env.LH_DISTILL_ROOT ?? "/home/profit/lakehouse"; async function main() { const cmd = process.argv[2]; const dry_run = process.argv.includes("--dry-run"); const include_partial = process.argv.includes("--include-partial"); const include_review = process.argv.includes("--include-review"); const recorded_at = new Date().toISOString(); switch (cmd) { case "build-evidence": { const r = await materializeAll({ root: DEFAULT_ROOT, transforms: TRANSFORMS, recorded_at, dry_run }); console.log(`[build-evidence] in=${r.totals.rows_read} out=${r.totals.rows_written} skip=${r.totals.rows_skipped} dedup=${r.totals.rows_deduped}`); if (!dry_run) console.log(`[build-evidence] receipt: ${r.receipt_path}`); if (!r.receipt.validation_pass) process.exit(1); break; } case "score": { const r = await scoreAll({ root: DEFAULT_ROOT, recorded_at, dry_run }); const c = r.totals.by_category; console.log(`[score] in=${r.totals.rows_read} out=${r.totals.rows_written} acc=${c.accepted ?? 0} part=${c.partially_accepted ?? 0} rej=${c.rejected ?? 0} hum=${c.needs_human_review ?? 0}`); if (!dry_run) console.log(`[score] receipt: ${r.receipt_path}`); break; } case "export-rag": { const r = await exportRag({ root: DEFAULT_ROOT, recorded_at, include_review, dry_run }); console.log(`[export-rag] in=${r.records_read} out=${r.records_exported} ${r.quarantine_summary}`); console.log(`[export-rag] output: ${r.output_path}${include_review ? " (review included)" : ""}`); break; } case "export-sft": { const r = await exportSft({ root: DEFAULT_ROOT, recorded_at, include_partial, dry_run }); console.log(`[export-sft] in=${r.records_read} out=${r.records_exported} ${r.quarantine_summary}`); console.log(`[export-sft] output: ${r.output_path}${include_partial ? " (partial included)" : ""}`); break; } case "export-preference": { const r = await exportPreference({ root: DEFAULT_ROOT, recorded_at, dry_run }); console.log(`[export-preference] in=${r.records_read} pairs=${r.pairs_exported} task_ids_paired=${r.task_ids_with_pairs} ${r.quarantine_summary}`); console.log(`[export-preference] output: ${r.output_path}`); break; } case "export-all": { const rRag = await exportRag({ root: DEFAULT_ROOT, recorded_at, include_review, dry_run }); const rSft = await exportSft({ root: DEFAULT_ROOT, recorded_at, include_partial, dry_run }); const rPref = await exportPreference({ root: DEFAULT_ROOT, recorded_at, dry_run }); console.log(""); console.log("─── export-all summary ───"); console.log(` RAG: in=${rRag.records_read} out=${rRag.records_exported} ${rRag.quarantine_summary}`); console.log(` SFT: in=${rSft.records_read} out=${rSft.records_exported} ${rSft.quarantine_summary}`); console.log(` Preference: in=${rPref.records_read} pairs=${rPref.pairs_exported} ${rPref.quarantine_summary}`); break; } case "run-all": { // Phase 5 entry — full pipeline with structured receipts. const r = await runAllWithReceipts({ root: DEFAULT_ROOT, include_partial, include_review }); console.log(`[run-all] run_id=${r.run_id} overall_passed=${r.summary.overall_passed}`); console.log(`[run-all] datasets: rag=${r.summary.rag_records} sft=${r.summary.sft_records} pref=${r.summary.preference_pairs}`); console.log(`[run-all] drift severity=${r.drift.severity}`); console.log(`[run-all] reports/distillation/${r.run_id}/summary.md`); if (!r.summary.overall_passed) process.exit(1); break; } case "acceptance": { // Phase 6 — fixture-driven end-to-end gate. Spawns the dedicated // acceptance script so its non-zero exit propagates. const r = spawnSync("bun", ["run", "scripts/distillation/acceptance.ts"], { cwd: DEFAULT_ROOT, stdio: "inherit", }); process.exit(r.status ?? 1); } case "receipts": { // Read receipts for a previously-run pipeline. const idx = process.argv.indexOf("--run-id"); if (idx < 0 || !process.argv[idx + 1]) { console.error("usage: distill.ts receipts --run-id "); process.exit(2); } const run_id = process.argv[idx + 1]; const path = `${DEFAULT_ROOT}/reports/distillation/${run_id}/summary.md`; // Defer to bun's file APIs to keep this lean. const { readFileSync } = await import("node:fs"); try { console.log(readFileSync(path, "utf8")); } catch { console.error(`run not found: ${path}`); process.exit(2); } break; } case "health": case "help": case undefined: { console.log("Usage: bun run scripts/distillation/distill.ts [flags]"); console.log(""); console.log("Commands:"); console.log(" build-evidence materialize EvidenceRecord rows"); console.log(" score run deterministic Success Scorer"); console.log(" export-rag RAG export (--include-review opt-in)"); console.log(" export-sft SFT export (--include-partial opt-in)"); console.log(" export-preference preference export"); console.log(" export-all RAG + SFT + preference"); console.log(" run-all full pipeline with structured receipts (Phase 5)"); console.log(" receipts read summary for a run (--run-id )"); console.log(" acceptance fixture-driven end-to-end gate (Phase 6)"); console.log(""); console.log("Flags: --dry-run, --include-partial, --include-review"); break; } default: console.error(`unknown command: ${cmd}. Try 'help'.`); process.exit(2); } } main().catch(e => { console.error(e); process.exit(1); });