package materializer import ( "fmt" "regexp" "strings" "time" "git.agentview.dev/profit/golangLAKEHOUSE/internal/distillation" ) // ValidateEvidenceRecord ports validateEvidenceRecord from // auditor/schemas/distillation/evidence_record.ts. Returns nil on // success or a slice of human-readable error messages — the // materializer logs the slice into distillation_skips.jsonl so an // operator can see why a row was rejected without diff'ing logic. // // The validator is intentionally separate from // distillation.ValidateScoredRun: scoring runs and evidence records // have different shapes and the scorer's validator only covers the // scored-run side. func ValidateEvidenceRecord(r distillation.EvidenceRecord) []string { var errs []string if r.RunID == "" { errs = append(errs, "run_id: must be non-empty") } if r.TaskID == "" { errs = append(errs, "task_id: must be non-empty") } if !validISOTimestamp(r.Timestamp) { errs = append(errs, fmt.Sprintf("timestamp: not a valid ISO 8601 timestamp: %s", trim(r.Timestamp, 60))) } if r.SchemaVersion != distillation.EvidenceSchemaVersion { errs = append(errs, fmt.Sprintf("schema_version: expected %d, got %d", distillation.EvidenceSchemaVersion, r.SchemaVersion)) } errs = append(errs, validateProvenanceFields(r.Provenance)...) if r.ModelRole != "" && !isValidModelRole(r.ModelRole) { errs = append(errs, fmt.Sprintf("model_role: must be a known role, got %q", r.ModelRole)) } if r.InputHash != "" && !isHexSha256(r.InputHash) { errs = append(errs, "input_hash: must be hex sha256 when present") } if r.OutputHash != "" && !isHexSha256(r.OutputHash) { errs = append(errs, "output_hash: must be hex sha256 when present") } if r.ObserverConfidence < 0 || r.ObserverConfidence > 100 { errs = append(errs, "observer_confidence: must be in [0, 100]") } if r.HumanOverride != nil { if r.HumanOverride.Overrider == "" { errs = append(errs, "human_override.overrider: must be non-empty") } if r.HumanOverride.Reason == "" { errs = append(errs, "human_override.reason: must be non-empty") } if !validISOTimestamp(r.HumanOverride.OverriddenAt) { errs = append(errs, "human_override.overridden_at: must be ISO 8601") } switch r.HumanOverride.Decision { case "accept", "reject", "needs_review": default: errs = append(errs, "human_override.decision: must be accept|reject|needs_review") } } if len(errs) == 0 { return nil } return errs } func validateProvenanceFields(p distillation.Provenance) []string { var errs []string if p.SourceFile == "" { errs = append(errs, "provenance.source_file: must be non-empty") } if !isHexSha256(p.SigHash) { errs = append(errs, fmt.Sprintf("provenance.sig_hash: not a valid hex sha256: %s", trim(p.SigHash, 80))) } if !validISOTimestamp(p.RecordedAt) { errs = append(errs, "provenance.recorded_at: must be ISO 8601") } return errs } var ( // Permissive ISO 8601 (matches TS regex): // YYYY-MM-DDTHH:MM:SS(.fraction)?(Z|±HH:MM)? isoTimestampRE = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?$`) hexSha256RE = regexp.MustCompile(`^[0-9a-f]{64}$`) ) func validISOTimestamp(s string) bool { if s == "" { return false } if !isoTimestampRE.MatchString(s) { return false } // Belt-and-suspenders: confirm it's actually parseable too. if _, err := time.Parse(time.RFC3339, s); err == nil { return true } if _, err := time.Parse(time.RFC3339Nano, s); err == nil { return true } return false } func isHexSha256(s string) bool { return hexSha256RE.MatchString(s) } func isValidModelRole(role distillation.ModelRole) bool { switch role { case distillation.RoleExecutor, distillation.RoleReviewer, distillation.RoleExtractor, distillation.RoleVerifier, distillation.RoleCategorizer, distillation.RoleTiebreaker, distillation.RoleApplier, distillation.RoleEmbedder, distillation.RoleOther: return true } return false } func trim(s string, n int) string { if len(s) <= n { return s } return strings.ReplaceAll(s[:n], "\n", " ") }