lakehouse/auditor/schemas/distillation/receipt.ts

// Receipt — per-pipeline-stage record with everything needed to
// reproduce the run. Spec non-negotiable: substantive receipts, not
// "ran successfully". Every field below has a deterministic source so
// the receipt schema validator catches "I forgot to fill it in" the
// same way it catches type errors.
import {
  ValidationResult, requireString, requireNumber, requireIsoTimestamp,
} from "./types";

export const RECEIPT_SCHEMA_VERSION = 1;

export interface FileReference {
  path: string;     // relative to repo root
  sha256: string;   // hex
  bytes?: number;   // optional but recommended
}

export interface Receipt {
  schema_version: number;
  command: string;             // shell-line or script identifier
  git_sha: string;             // 40-char hex (full SHA1)
  git_branch?: string;
  git_dirty?: boolean;         // true if working tree had uncommitted changes
  started_at: string;          // ISO 8601
  ended_at: string;            // ISO 8601
  duration_ms: number;
  input_files: FileReference[];
  output_files: FileReference[];
  record_counts: {
    in: number;
    out: number;
    [key: string]: number;     // per-stage extras (filtered, dropped, etc.)
  };
  validation_pass: boolean;    // explicit — never inferred
  errors: string[];
  warnings: string[];
}

function validateFileRef(v: unknown, field: string, errors: string[]): boolean {
  if (typeof v !== "object" || v === null) {
    errors.push(`${field}: expected object`);
    return false;
  }
  const f = v as Record<string, unknown>;
  let ok = true;
  ok = requireString(f.path, `${field}.path`, errors) && ok;
  if (typeof f.sha256 !== "string" || !/^[0-9a-f]{64}$/.test(f.sha256)) {
    errors.push(`${field}.sha256: must be hex sha256`);
    ok = false;
  }
  if (f.bytes !== undefined && typeof f.bytes !== "number") {
    errors.push(`${field}.bytes: expected number when present`);
    ok = false;
  }
  return ok;
}

export function validateReceipt(input: unknown): ValidationResult<Receipt> {
  const errors: string[] = [];
  if (typeof input !== "object" || input === null) {
    return { valid: false, errors: ["expected object"] };
  }
  const r = input as Record<string, unknown>;
  let ok = true;

  if (r.schema_version !== RECEIPT_SCHEMA_VERSION) {
    errors.push(`schema_version: expected ${RECEIPT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
    ok = false;
  }
  ok = requireString(r.command, "command", errors) && ok;
  if (typeof r.git_sha !== "string" || !/^[0-9a-f]{40}$/.test(r.git_sha as string)) {
    errors.push("git_sha: must be 40-char hex");
    ok = false;
  }
  ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok;
  ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok;
  ok = requireNumber(r.duration_ms, "duration_ms", errors) && ok;
  if (typeof r.validation_pass !== "boolean") {
    errors.push("validation_pass: must be boolean (explicit, never inferred)");
    ok = false;
  }
  if (!Array.isArray(r.input_files)) {
    errors.push("input_files: expected array");
    ok = false;
  } else {
    for (let i = 0; i < r.input_files.length; i++) {
      if (!validateFileRef(r.input_files[i], `input_files[${i}]`, errors)) ok = false;
    }
  }
  if (!Array.isArray(r.output_files)) {
    errors.push("output_files: expected array");
    ok = false;
  } else {
    for (let i = 0; i < r.output_files.length; i++) {
      if (!validateFileRef(r.output_files[i], `output_files[${i}]`, errors)) ok = false;
    }
  }
  if (typeof r.record_counts !== "object" || r.record_counts === null) {
    errors.push("record_counts: expected object");
    ok = false;
  } else {
    const rc = r.record_counts as Record<string, unknown>;
    if (typeof rc.in !== "number") { errors.push("record_counts.in: expected number"); ok = false; }
    if (typeof rc.out !== "number") { errors.push("record_counts.out: expected number"); ok = false; }
  }
  if (!Array.isArray(r.errors)) { errors.push("errors: expected array"); ok = false; }
  if (!Array.isArray(r.warnings)) { errors.push("warnings: expected array"); ok = false; }

  if (!ok) return { valid: false, errors };
  return { valid: true, value: r as unknown as Receipt };
}