/// Auto-detect PII columns by name patterns. /// Conservative: flags likely PII, doesn't miss obvious cases. use crate::types::Sensitivity; /// Check if a column name suggests PII content. pub fn detect_sensitivity(column_name: &str) -> Option { let lower = column_name.to_lowercase(); // Direct PII identifiers if matches!(lower.as_str(), "ssn" | "social_security" | "social_security_number" | "sin" | "national_id" | "passport" | "passport_number" | "drivers_license" | "driver_license" | "dl_number" ) { return Some(Sensitivity::Pii); } // Names if lower.contains("first_name") || lower.contains("last_name") || lower.contains("full_name") || lower.contains("middle_name") || lower == "name" || lower == "fname" || lower == "lname" { return Some(Sensitivity::Pii); } // Contact info if lower.contains("email") || lower.contains("e_mail") || lower.contains("phone") || lower.contains("mobile") || lower.contains("cell") || lower.contains("fax") || lower == "tel" || lower == "telephone" { return Some(Sensitivity::Pii); } // Address if lower.contains("address") || lower.contains("street") || (lower.contains("zip") && !lower.contains("unzip")) || lower == "postal_code" || lower == "postcode" { return Some(Sensitivity::Pii); } // Financial if lower.contains("salary") || lower.contains("wage") || lower.contains("pay_rate") || lower.contains("bill_rate") || lower.contains("compensation") || lower.contains("revenue") || lower.contains("bank_account") || lower.contains("routing_number") || lower.contains("credit_card") { return Some(Sensitivity::Financial); } // Health if lower.contains("diagnosis") || lower.contains("medication") || lower.contains("medical") || lower.contains("health") || lower.contains("patient_id") || lower.contains("mrn") { return Some(Sensitivity::Phi); } // Date of birth if lower == "dob" || lower == "date_of_birth" || lower == "birthdate" || lower == "birth_date" { return Some(Sensitivity::Pii); } None } /// Classify all columns and return the highest sensitivity found. pub fn detect_dataset_sensitivity(column_names: &[&str]) -> Option { let mut highest: Option = None; for name in column_names { if let Some(sens) = detect_sensitivity(name) { highest = Some(match (&highest, &sens) { (None, s) => s.clone(), (Some(Sensitivity::Public), s) => s.clone(), (Some(Sensitivity::Internal), s) if !matches!(s, Sensitivity::Public | Sensitivity::Internal) => s.clone(), (Some(Sensitivity::Financial), Sensitivity::Pii | Sensitivity::Phi) => sens.clone(), (Some(Sensitivity::Pii), Sensitivity::Phi) => Sensitivity::Phi, (existing, _) => existing.clone().unwrap(), }); } } highest } #[cfg(test)] mod tests { use super::*; #[test] fn detects_email_as_pii() { assert_eq!(detect_sensitivity("email"), Some(Sensitivity::Pii)); assert_eq!(detect_sensitivity("contact_email"), Some(Sensitivity::Pii)); } #[test] fn detects_salary_as_financial() { assert_eq!(detect_sensitivity("salary"), Some(Sensitivity::Financial)); assert_eq!(detect_sensitivity("bill_rate"), Some(Sensitivity::Financial)); } #[test] fn detects_ssn() { assert_eq!(detect_sensitivity("ssn"), Some(Sensitivity::Pii)); } #[test] fn non_sensitive_returns_none() { assert_eq!(detect_sensitivity("status"), None); assert_eq!(detect_sensitivity("created_at"), None); } #[test] fn dataset_sensitivity_picks_highest() { let cols = vec!["id", "name", "email", "status"]; assert_eq!(detect_dataset_sensitivity(&cols), Some(Sensitivity::Pii)); } }