// drift_report.ts — comparison of a current run summary vs the // previous run summary on disk. Spec calls this "drift detection"; // concretely it answers: did the pipeline behave the same way as // last time, and if not, was the change explained by an input change // or did it appear out of nowhere (silent drift)? // // Severity: // ok — within 20% on every metric, no hash surprises // warn — record-count or category swing > 20%, OR new error class // alert — output_hash differs while input_hash is identical // (deterministic violation — same input → different output) import { ValidationResult, requireString, requireIsoTimestamp, } from "./types"; import type { StageName } from "./stage_receipt"; export const DRIFT_REPORT_SCHEMA_VERSION = 2; export const DRIFT_THRESHOLD_PCT = 0.20; export type DriftSeverity = "ok" | "warn" | "alert"; export interface StageDrift { stage: StageName; delta_records_in: number; // current - prior delta_records_out: number; delta_accepted: number; delta_quarantined: number; pct_change_out: number | null; // null when prior had 0 records // null when input_hash isn't materialized into the stage summary — // schema v1 lied and reported `true` here. v2 is honest: callers // that want determinism enforcement must read the full StageReceipt // off disk and compute input_hash equality there. input_hash_match: boolean | null; output_hash_match: boolean; // alert if input_hash matches but output_hash diverges deterministic_violation: boolean; notes: string[]; } export interface DriftReport { schema_version: number; run_id: string; prior_run_id: string | null; // null when no prior run on disk generated_at: string; severity: DriftSeverity; stages: StageDrift[]; // Top-level swings the human reader should see immediately. flags: string[]; } export function validateDriftReport(input: unknown): ValidationResult { const errors: string[] = []; if (typeof input !== "object" || input === null) { return { valid: false, errors: ["expected object"] }; } const r = input as Record; let ok = true; if (r.schema_version !== DRIFT_REPORT_SCHEMA_VERSION) { errors.push(`schema_version: expected ${DRIFT_REPORT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`); ok = false; } ok = requireString(r.run_id, "run_id", errors) && ok; if (r.prior_run_id !== null && typeof r.prior_run_id !== "string") { errors.push("prior_run_id: must be string or null"); ok = false; } ok = requireIsoTimestamp(r.generated_at, "generated_at", errors) && ok; if (!["ok", "warn", "alert"].includes(r.severity as string)) { errors.push(`severity: must be ok|warn|alert, got ${JSON.stringify(r.severity)}`); ok = false; } if (!Array.isArray(r.stages)) { errors.push("stages: expected array"); ok = false; } if (!Array.isArray(r.flags)) { errors.push("flags: expected array"); ok = false; } if (!ok) return { valid: false, errors }; return { valid: true, value: r as unknown as DriftReport }; }