// PreferenceSample — entry in exports/preference/chosen_rejected.jsonl. // Source: real disagreements (audit_discrepancies, scrum ladder retries). // Validator pins: chosen != rejected, both source_run_ids present, reason // is non-empty. No synthesized preferences. import { ValidationResult, requireString, requireIsoTimestamp, requireProvenance, } from "./types"; export const PREFERENCE_SAMPLE_SCHEMA_VERSION = 1; export interface PreferenceSample { schema_version: number; id: string; prompt: string; chosen: string; rejected: string; reason: string; // why chosen > rejected — must be non-empty chosen_run_id: string; rejected_run_id: string; created_at: string; provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string }; } export function validatePreferenceSample(input: unknown): ValidationResult { const errors: string[] = []; if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] }; const r = input as Record; let ok = true; if (r.schema_version !== PREFERENCE_SAMPLE_SCHEMA_VERSION) { errors.push(`schema_version: expected ${PREFERENCE_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`); ok = false; } ok = requireString(r.id, "id", errors) && ok; ok = requireString(r.prompt, "prompt", errors) && ok; ok = requireString(r.chosen, "chosen", errors) && ok; ok = requireString(r.rejected, "rejected", errors) && ok; ok = requireString(r.reason, "reason", errors) && ok; ok = requireString(r.chosen_run_id, "chosen_run_id", errors) && ok; ok = requireString(r.rejected_run_id, "rejected_run_id", errors) && ok; ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok; ok = requireProvenance(r.provenance, "provenance", errors) && ok; // Self-pairing guard. if (r.chosen === r.rejected && typeof r.chosen === "string") { errors.push("chosen and rejected must differ — preference data needs a real disagreement"); ok = false; } if (r.chosen_run_id === r.rejected_run_id && typeof r.chosen_run_id === "string") { errors.push("chosen_run_id and rejected_run_id must differ — same run can't disagree with itself"); ok = false; } if (typeof r.reason === "string" && (r.reason as string).trim().length === 0) { errors.push("reason: must be non-whitespace (every preference needs WHY chosen > rejected)"); ok = false; } if (!ok) return { valid: false, errors }; return { valid: true, value: r as unknown as PreferenceSample }; }