package ingestd import ( "testing" ) func TestInferSchema_CleanInts(t *testing.T) { headers := []string{"id", "count"} samples := [][]string{{"1", "100"}, {"2", "200"}, {"3", "300"}} got, err := InferSchema(headers, samples) if err != nil { t.Fatal(err) } for _, c := range got { if c.Type != TypeInt64 { t.Errorf("%s: got %s, want int64", c.Name, c.Type) } if c.Nullable { t.Errorf("%s should not be nullable", c.Name) } } } func TestInferSchema_FloatColumns(t *testing.T) { headers := []string{"price", "weight"} samples := [][]string{{"1.5", "2.0"}, {"100", "3.14"}, {"0.0", "0"}} got, _ := InferSchema(headers, samples) // "price" has 1.5 + "100" + 0.0 → float64 (one of the values isn't int-parseable in 1.5) if got[0].Type != TypeFloat64 { t.Errorf("price: got %s, want float64", got[0].Type) } if got[1].Type != TypeFloat64 { t.Errorf("weight: got %s, want float64", got[1].Type) } } func TestInferSchema_AmbiguousFallsToString(t *testing.T) { // ADR-010: a column with "123", "N/A", and "" is a string, not int. headers := []string{"salary"} samples := [][]string{{"50000"}, {"N/A"}, {"60000"}, {""}} got, _ := InferSchema(headers, samples) if got[0].Type != TypeString { t.Errorf("salary: got %s, want string (ADR-010 fallback)", got[0].Type) } if !got[0].Nullable { t.Errorf("salary: should be nullable (saw empty cell)") } } func TestInferSchema_BoolLiterals(t *testing.T) { headers := []string{"active", "deleted"} samples := [][]string{{"true", "false"}, {"True", "False"}, {"TRUE", "FALSE"}} got, _ := InferSchema(headers, samples) if got[0].Type != TypeBool { t.Errorf("active: got %s, want bool", got[0].Type) } if got[1].Type != TypeBool { t.Errorf("deleted: got %s, want bool", got[1].Type) } } func TestInferSchema_OneZeroIsInt_NotBool(t *testing.T) { // Keeps the type system honest — 1/0 columns in CRM data are // typically counts (children, certs), not flags. headers := []string{"children"} samples := [][]string{{"0"}, {"1"}, {"2"}, {"0"}} got, _ := InferSchema(headers, samples) if got[0].Type != TypeInt64 { t.Errorf("children: got %s, want int64 (1/0 is int, not bool)", got[0].Type) } } func TestInferSchema_EmptyHeader(t *testing.T) { if _, err := InferSchema(nil, nil); err == nil { t.Error("nil headers should error") } if _, err := InferSchema([]string{"valid", ""}, nil); err == nil { t.Error("empty header name should error") } } func TestFingerprint_Deterministic(t *testing.T) { s1, _ := InferSchema([]string{"id", "name"}, [][]string{{"1", "alice"}}) s2, _ := InferSchema([]string{"id", "name"}, [][]string{{"1", "alice"}}) if s1.Fingerprint() != s2.Fingerprint() { t.Errorf("fingerprint not deterministic: %s vs %s", s1.Fingerprint(), s2.Fingerprint()) } } func TestFingerprint_FlipsOnTypeChange(t *testing.T) { intSchema, _ := InferSchema([]string{"id"}, [][]string{{"1"}, {"2"}}) strSchema, _ := InferSchema([]string{"id"}, [][]string{{"1"}, {"abc"}}) if intSchema.Fingerprint() == strSchema.Fingerprint() { t.Error("fingerprint should flip when column type changes") } } func TestFingerprint_StableUnderNullable(t *testing.T) { // Adding null cells doesn't flip the fingerprint — it's only // about (name, type), not nullability. a, _ := InferSchema([]string{"id"}, [][]string{{"1"}, {"2"}}) b, _ := InferSchema([]string{"id"}, [][]string{{"1"}, {"2"}, {""}}) if a.Fingerprint() != b.Fingerprint() { t.Error("fingerprint shouldn't flip when nullability changes") } } func TestFingerprint_RespectsColumnOrder(t *testing.T) { // Same columns, swapped order → different fingerprint. a, _ := InferSchema([]string{"id", "name"}, [][]string{{"1", "x"}}) b, _ := InferSchema([]string{"name", "id"}, [][]string{{"x", "1"}}) if a.Fingerprint() == b.Fingerprint() { t.Error("fingerprint should be order-sensitive") } }