// run_summary.ts — aggregates StageReceipt rows for one run_id. // Spec field set: total records processed, total accepted/rejected/ // quarantined, dataset sizes, validation status, overall hash of run. import { ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireSha256, } from "./types"; import type { StageName } from "./stage_receipt"; export const RUN_SUMMARY_SCHEMA_VERSION = 1; export interface RunStageSummary { stage: StageName; records_in: number; records_out: number; accepted: number; rejected: number; quarantined: number; skipped: number; passed: boolean; duration_ms: number; output_hash: string; } export interface RunSummary { schema_version: number; run_id: string; started_at: string; // earliest stage timestamp ended_at: string; // latest stage timestamp + duration git_commit: string; stages: RunStageSummary[]; // Aggregates across stages total_records_in: number; total_records_out: number; total_accepted: number; total_rejected: number; total_quarantined: number; total_skipped: number; // Dataset sizes — final outputs of each export stage rag_records: number; sft_records: number; preference_pairs: number; // Pipeline-wide pass = AND of every stage validation.passed overall_passed: boolean; // Run-wide hash: sha256 over each stage's output hash, sorted by stage name. // Detects ANY change in any stage output across runs. run_hash: string; total_duration_ms: number; } export function validateRunSummary(input: unknown): ValidationResult { const errors: string[] = []; if (typeof input !== "object" || input === null) { return { valid: false, errors: ["expected object"] }; } const r = input as Record; let ok = true; if (r.schema_version !== RUN_SUMMARY_SCHEMA_VERSION) { errors.push(`schema_version: expected ${RUN_SUMMARY_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`); ok = false; } ok = requireString(r.run_id, "run_id", errors) && ok; ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok; ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok; if (typeof r.git_commit !== "string" || !/^[0-9a-f]{40}$/.test(r.git_commit as string)) { errors.push("git_commit: must be 40-char hex"); ok = false; } if (typeof r.overall_passed !== "boolean") { errors.push("overall_passed: must be boolean"); ok = false; } ok = requireSha256(r.run_hash, "run_hash", errors) && ok; for (const k of ["total_records_in", "total_records_out", "total_accepted", "total_rejected", "total_quarantined", "total_skipped", "rag_records", "sft_records", "preference_pairs", "total_duration_ms"]) { if (typeof (r as any)[k] !== "number") { errors.push(`${k}: expected number`); ok = false; } } if (!Array.isArray(r.stages)) { errors.push("stages: expected array"); ok = false; } if (!ok) return { valid: false, errors }; return { valid: true, value: r as unknown as RunSummary }; }