// Combined schema tests for ScoredRun, Receipt, Playbook, // ScratchpadSummary, ModelLedgerEntry, RagSample, SftSample, // PreferenceSample. EvidenceRecord lives in its own file because it's // the foundational schema and warrants the JSON-fixture round-trip // pattern; the rest use inline fixture makers since they're simpler. // // Each schema: 1 positive fixture + 4-5 negative cases pinning the // non-negotiable invariants from now.md. // // Run: bun test auditor/schemas/distillation/schemas.test.ts import { test, expect } from "bun:test"; import { validateScoredRun, SCORED_RUN_SCHEMA_VERSION } from "./scored_run"; import { validateReceipt, RECEIPT_SCHEMA_VERSION } from "./receipt"; import { validatePlaybook, PLAYBOOK_SCHEMA_VERSION } from "./playbook"; import { validateScratchpadSummary, SCRATCHPAD_SCHEMA_VERSION } from "./scratchpad_summary"; import { validateModelLedgerEntry, MODEL_LEDGER_SCHEMA_VERSION } from "./model_ledger"; import { validateRagSample, RAG_SAMPLE_SCHEMA_VERSION } from "./rag_sample"; import { validateSftSample, SFT_SAMPLE_SCHEMA_VERSION } from "./sft_sample"; import { validatePreferenceSample, PREFERENCE_SAMPLE_SCHEMA_VERSION } from "./preference_sample"; const NOW = "2026-04-26T22:30:00.000Z"; const SHA = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; const GIT_SHA = "f753e11157eef753e11157eef753e11157eef753"; const PROVENANCE = { source_file: "data/_kb/scored_runs.jsonl", line_offset: 0, sig_hash: SHA, recorded_at: NOW, }; // ─── ScoredRun ─────────────────────────────────────────────────────── const SCORED_RUN_OK = { schema_version: SCORED_RUN_SCHEMA_VERSION, evidence_run_id: "run-abc", evidence_task_id: "task-abc", category: "accepted", reasons: ["cargo_green=true", "anchor_grounding=0.95"], scored_at: NOW, scorer_version: "v1.0.0", sub_scores: { cargo_green: true, anchor_grounding: 0.95 }, provenance: PROVENANCE, }; test("ScoredRun: positive validates", () => { const r = validateScoredRun(SCORED_RUN_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("ScoredRun: empty reasons rejected (every score needs a reason)", () => { const r = validateScoredRun({ ...SCORED_RUN_OK, reasons: [] }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("reasons"))).toBe(true); }); test("ScoredRun: invalid category rejected", () => { const r = validateScoredRun({ ...SCORED_RUN_OK, category: "maybe_ok" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("category"))).toBe(true); }); test("ScoredRun: anchor_grounding > 1 rejected (must be in [0, 1])", () => { const r = validateScoredRun({ ...SCORED_RUN_OK, sub_scores: { ...SCORED_RUN_OK.sub_scores, anchor_grounding: 1.5 } }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("anchor_grounding"))).toBe(true); }); // ─── Receipt ───────────────────────────────────────────────────────── const RECEIPT_OK = { schema_version: RECEIPT_SCHEMA_VERSION, command: "bun run scripts/build_evidence_index.ts", git_sha: GIT_SHA, git_branch: "scrum/auto-apply-19814", git_dirty: false, started_at: NOW, ended_at: NOW, duration_ms: 1234, input_files: [{ path: "data/_kb/scrum_reviews.jsonl", sha256: SHA, bytes: 448000 }], output_files: [{ path: "data/evidence/2026/04/26/run.jsonl", sha256: SHA }], record_counts: { in: 100, out: 95, filtered: 5 }, validation_pass: true, errors: [], warnings: [], }; test("Receipt: positive validates", () => { const r = validateReceipt(RECEIPT_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("Receipt: bad git_sha rejected (must be 40-char hex)", () => { const r = validateReceipt({ ...RECEIPT_OK, git_sha: "abc123" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("git_sha"))).toBe(true); }); test("Receipt: validation_pass must be boolean (never inferred)", () => { const r = validateReceipt({ ...RECEIPT_OK, validation_pass: "yes" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("validation_pass"))).toBe(true); }); test("Receipt: file refs without proper sha256 rejected", () => { const r = validateReceipt({ ...RECEIPT_OK, output_files: [{ path: "x", sha256: "short" }] }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("sha256"))).toBe(true); }); // ─── Playbook ──────────────────────────────────────────────────────── const PLAYBOOK_OK = { schema_version: PLAYBOOK_SCHEMA_VERSION, playbook_id: "pb-scrum-review-001", task_type: "scrum_review", problem_pattern: "Cargo workspace warning escalation after applier patch", useful_context: ["pathway memory bug fingerprints for the file area"], model_routing_path: ["x-ai/grok-4.1-fast"], commands_worked: ["cargo check --workspace"], commands_failed: [], validation_steps: ["warning count must not increase"], repo_files_touched: ["crates/queryd/src/service.rs"], recovery_strategy: "git checkout -- file when cargo red", known_failure_modes: ["unused import noise"], escalation_threshold: "use kimi-k2:1t when isolation mode rejects 2 attempts", acceptance_criteria: ["cargo green", "warning count stable", "rationale-diff aligned"], source_run_ids: ["run-xyz", "run-abc"], created_at: NOW, provenance: PROVENANCE, }; test("Playbook: positive validates", () => { const r = validatePlaybook(PLAYBOOK_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("Playbook: empty source_run_ids rejected (every playbook traces to source — spec)", () => { const r = validatePlaybook({ ...PLAYBOOK_OK, source_run_ids: [] }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("source_run_ids"))).toBe(true); }); test("Playbook: empty acceptance_criteria rejected (every playbook needs success criteria — spec)", () => { const r = validatePlaybook({ ...PLAYBOOK_OK, acceptance_criteria: [] }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("acceptance_criteria"))).toBe(true); }); // ─── ScratchpadSummary ─────────────────────────────────────────────── const SCRATCHPAD_OK = { schema_version: SCRATCHPAD_SCHEMA_VERSION, run_id: "run-abc", current_objective: "verify pr_audit mode end-to-end", completed_steps: ["restart gateway"], failed_steps: ["cloud chat returned 500"], pending_steps: ["swap default model"], important_paths: ["auditor/checks/inference.ts"], decisions: ["defer kimi-k2 swap until upstream returns"], unresolved_questions: ["does deepseek match kimi quality?"], validation_status: "partial", next_command: "bun run auditor/audit_one.ts 11", source_scratchpad_hash: SHA, summarized_at: NOW, provenance: PROVENANCE, }; test("ScratchpadSummary: positive validates", () => { const r = validateScratchpadSummary(SCRATCHPAD_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("ScratchpadSummary: invalid validation_status rejected", () => { const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, validation_status: "tbd" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("validation_status"))).toBe(true); }); test("ScratchpadSummary: short scratchpad_hash rejected", () => { const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, source_scratchpad_hash: "short" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("source_scratchpad_hash"))).toBe(true); }); // ─── ModelLedgerEntry ──────────────────────────────────────────────── const LEDGER_OK = { schema_version: MODEL_LEDGER_SCHEMA_VERSION, model_name: "kimi-k2:1t", model_provider: "ollama_cloud", task_type: "pr_audit", success_rate: 0.85, failure_modes: ["upstream_500", "context_truncation"], best_partner_model: "x-ai/grok-4.1-fast", escalation_role: "primary", cost_usd_p50: 0.0002, latency_ms_p50: 50000, latency_ms_p95: 90000, context_window: 200000, sample_count: 47, last_updated: NOW, }; test("ModelLedgerEntry: positive validates", () => { const r = validateModelLedgerEntry(LEDGER_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("ModelLedgerEntry: success_rate > 1 rejected", () => { const r = validateModelLedgerEntry({ ...LEDGER_OK, success_rate: 1.5 }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("success_rate"))).toBe(true); }); test("ModelLedgerEntry: zero sample_count rejected (no aggregate from zero)", () => { const r = validateModelLedgerEntry({ ...LEDGER_OK, sample_count: 0 }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("sample_count"))).toBe(true); }); // ─── RagSample ─────────────────────────────────────────────────────── const RAG_OK = { schema_version: RAG_SAMPLE_SCHEMA_VERSION, id: "rag-pb-001", title: "Scrum applier rationale-diff alignment", content: "When the applier emits a patch with rationale claiming X but the diff shows Y, the rationale-token alignment gate catches it...", tags: ["scrum_review", "applier"], source_run_id: "run-xyz", success_score: "accepted", source_category: "accepted", embedding_text: "applier rationale-diff alignment guard scrum", created_at: NOW, provenance: PROVENANCE, }; test("RagSample: positive validates", () => { const r = validateRagSample(RAG_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("RagSample: success_score=rejected forbidden (RAG never takes rejected)", () => { const r = validateRagSample({ ...RAG_OK, success_score: "rejected", source_category: "rejected" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("success_score"))).toBe(true); }); test("RagSample: success_score and source_category must match", () => { const r = validateRagSample({ ...RAG_OK, success_score: "accepted", source_category: "partially_accepted" }); expect(r.valid).toBe(false); }); test("RagSample: whitespace-only content rejected", () => { const r = validateRagSample({ ...RAG_OK, content: " \n " }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("content"))).toBe(true); }); // ─── SftSample (the strict one) ────────────────────────────────────── const SFT_OK = { schema_version: SFT_SAMPLE_SCHEMA_VERSION, id: "sft-pr11-001", instruction: "Audit this PR diff against ship-claims.", context: "claims: 3 strong, 2 moderate", response: "{\"claim_verdicts\": [...]}", source_run_id: "run-pr11", quality_score: "accepted", created_at: NOW, provenance: PROVENANCE, }; test("SftSample: positive validates", () => { const r = validateSftSample(SFT_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("SftSample: quality_score=partially_accepted ACCEPTED (--include-partial path)", () => { // Phase 4 update: partial allowed at schema layer; CLI gate decides. const r = validateSftSample({ ...SFT_OK, quality_score: "partially_accepted" }); expect(r.valid).toBe(true); }); test("SftSample: quality_score=rejected REJECTED (spec non-negotiable, no leak)", () => { const r = validateSftSample({ ...SFT_OK, quality_score: "rejected" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("quality_score"))).toBe(true); }); test("SftSample: quality_score=needs_human_review REJECTED (no leak)", () => { const r = validateSftSample({ ...SFT_OK, quality_score: "needs_human_review" }); expect(r.valid).toBe(false); }); test("SftSample: missing context rejected (must be string, even if empty)", () => { const fixture: Record = { ...SFT_OK }; delete fixture.context; const r = validateSftSample(fixture); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("context"))).toBe(true); }); test("SftSample: empty-string context allowed", () => { const r = validateSftSample({ ...SFT_OK, context: "" }); expect(r.valid).toBe(true); }); test("SftSample: empty response rejected (no empty pairs)", () => { const r = validateSftSample({ ...SFT_OK, response: "" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("response"))).toBe(true); }); test("SftSample: whitespace-only instruction rejected", () => { const r = validateSftSample({ ...SFT_OK, instruction: " \t\n " }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("instruction"))).toBe(true); }); // ─── PreferenceSample ──────────────────────────────────────────────── const PREF_OK = { schema_version: PREFERENCE_SAMPLE_SCHEMA_VERSION, id: "pref-task-x-001", prompt: "Verify claim: 'all 3 services running on matrix-test'", chosen: "{\"backed\": true, \"evidence\": \"systemctl status confirms 3 active\"}", rejected: "{\"backed\": true, \"evidence\": \"the README says so\"}", reason: "chosen cites runtime evidence, rejected cites doc claim only", chosen_run_id: "run-A", rejected_run_id: "run-B", created_at: NOW, provenance: PROVENANCE, }; test("PreferenceSample: positive validates", () => { const r = validatePreferenceSample(PREF_OK); if (!r.valid) console.error(r.errors); expect(r.valid).toBe(true); }); test("PreferenceSample: chosen == rejected rejected (no self-pairing)", () => { const r = validatePreferenceSample({ ...PREF_OK, chosen: "x", rejected: "x" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("chosen and rejected"))).toBe(true); }); test("PreferenceSample: chosen_run_id == rejected_run_id rejected (no self-disagreement)", () => { const r = validatePreferenceSample({ ...PREF_OK, chosen_run_id: "run-A", rejected_run_id: "run-A" }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("chosen_run_id"))).toBe(true); }); test("PreferenceSample: empty reason rejected (every preference needs WHY)", () => { const r = validatePreferenceSample({ ...PREF_OK, reason: " " }); expect(r.valid).toBe(false); if (!r.valid) expect(r.errors.some(e => e.includes("reason"))).toBe(true); });