This is an automated email from the ASF dual-hosted git repository.

laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new ac1ead73 feat(schema): reject complex type defaults #1011  (#1017)
ac1ead73 is described below

commit ac1ead73460291da098f06af1279d43a2ed4c7e5
Author: Tanmay Rauth <[email protected]>
AuthorDate: Wed May 6 09:11:37 2026 -0700

    feat(schema): reject complex type defaults #1011  (#1017)
    
    Validates that initial-default / write-default on struct, list, and map
    fields match the expected JSON shape (object for struct/map, array for
    list). Walks the full schema tree including nested fields within
    list/map elements. Mirrors the existing unknownTypeValidator pattern.
    
    
    Closes #1011
---
 table/metadata_builder_internal_test.go | 159 ++++++++++++++++++++++++++++++++
 table/metadata_schema_compatibility.go  | 106 +++++++++++++++++++++
 2 files changed, 265 insertions(+)

diff --git a/table/metadata_builder_internal_test.go 
b/table/metadata_builder_internal_test.go
index 4f801127..8754144d 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -1837,3 +1837,162 @@ func TestUnknownTypeValidation(t *testing.T) {
                require.ErrorContains(t, err, "must be optional")
        })
 }
+
+func TestComplexTypeDefaultValidation(t *testing.T) {
+       t.Run("InvalidStructInitialDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "s", Type: 
&iceberg.StructType{
+                               FieldList: []iceberg.NestedField{
+                                       {ID: 2, Name: "x", Type: 
iceberg.Int32Type{}, Required: false},
+                               },
+                       }, Required: false, InitialDefault: "not a struct"},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "struct type field 's' (id: 1) 
must have null or JSON object initial-default")
+       })
+
+       t.Run("InvalidStructWriteDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "s", Type: 
&iceberg.StructType{
+                               FieldList: []iceberg.NestedField{
+                                       {ID: 2, Name: "x", Type: 
iceberg.Int32Type{}, Required: false},
+                               },
+                       }, Required: false, WriteDefault: float64(42)},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "struct type field 's' (id: 1) 
must have null or JSON object write-default")
+       })
+
+       t.Run("ValidStructNullDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "s", Type: 
&iceberg.StructType{
+                               FieldList: []iceberg.NestedField{
+                                       {ID: 2, Name: "x", Type: 
iceberg.Int32Type{}, Required: false},
+                               },
+                       }, Required: false},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("ValidStructObjectDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "s", Type: 
&iceberg.StructType{
+                               FieldList: []iceberg.NestedField{
+                                       {ID: 2, Name: "x", Type: 
iceberg.Int32Type{}, Required: false},
+                               },
+                       }, Required: false, InitialDefault: map[string]any{"x": 
float64(1)}},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("InvalidListInitialDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "l", Type: 
&iceberg.ListType{
+                               ElementID: 2, Element: iceberg.StringType{}, 
ElementRequired: false,
+                       }, Required: false, InitialDefault: "not a list"},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "list type field 'l' (id: 1) must 
have null or JSON array initial-default")
+       })
+
+       t.Run("InvalidListWriteDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "l", Type: 
&iceberg.ListType{
+                               ElementID: 2, Element: iceberg.StringType{}, 
ElementRequired: false,
+                       }, Required: false, WriteDefault: map[string]any{"a": 
"b"}},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "list type field 'l' (id: 1) must 
have null or JSON array write-default")
+       })
+
+       t.Run("ValidListNullDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "l", Type: 
&iceberg.ListType{
+                               ElementID: 2, Element: iceberg.StringType{}, 
ElementRequired: false,
+                       }, Required: false},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("ValidListArrayDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "l", Type: 
&iceberg.ListType{
+                               ElementID: 2, Element: iceberg.StringType{}, 
ElementRequired: false,
+                       }, Required: false, InitialDefault: []any{"a", "b"}},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("InvalidMapInitialDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "m", Type: 
&iceberg.MapType{
+                               KeyID: 2, KeyType: iceberg.StringType{}, 
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+                       }, Required: false, InitialDefault: []any{"not", "a", 
"map"}},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "map type field 'm' (id: 1) must 
have null or JSON object initial-default")
+       })
+
+       t.Run("InvalidMapWriteDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "m", Type: 
&iceberg.MapType{
+                               KeyID: 2, KeyType: iceberg.StringType{}, 
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+                       }, Required: false, WriteDefault: "not a map"},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "map type field 'm' (id: 1) must 
have null or JSON object write-default")
+       })
+
+       t.Run("ValidMapNullDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "m", Type: 
&iceberg.MapType{
+                               KeyID: 2, KeyType: iceberg.StringType{}, 
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+                       }, Required: false},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("ValidMapObjectDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "m", Type: 
&iceberg.MapType{
+                               KeyID: 2, KeyType: iceberg.StringType{}, 
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+                       }, Required: false, InitialDefault: 
map[string]any{"keys": []any{"a"}, "values": []any{float64(1)}}},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("PrimitiveDefaultPassesThrough", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "n", Type: 
iceberg.Int64Type{}, Required: false, InitialDefault: float64(42)},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.NoError(t, err)
+       })
+
+       t.Run("InvalidNestedStructDefault", func(t *testing.T) {
+               schema := iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "outer", Type: 
&iceberg.StructType{
+                               FieldList: []iceberg.NestedField{
+                                       {ID: 2, Name: "inner", Type: 
&iceberg.ListType{
+                                               ElementID: 3, Element: 
iceberg.StringType{}, ElementRequired: false,
+                                       }, Required: false, InitialDefault: 
"not a list"},
+                               },
+                       }, Required: false},
+               )
+               err := checkSchemaCompatibility(schema, 3)
+               require.Error(t, err)
+               require.ErrorContains(t, err, "list type field 'inner' (id: 2) 
must have null or JSON array initial-default")
+       })
+}
diff --git a/table/metadata_schema_compatibility.go 
b/table/metadata_schema_compatibility.go
index 91fdb0cf..d1baf48f 100644
--- a/table/metadata_schema_compatibility.go
+++ b/table/metadata_schema_compatibility.go
@@ -78,6 +78,10 @@ func checkSchemaCompatibility(sc *iceberg.Schema, 
formatVersion int) error {
                return fmt.Errorf("failed to validate unknown types: %w", err)
        }
 
+       if err := validateComplexTypeDefaults(sc); err != nil {
+               return fmt.Errorf("failed to validate complex type defaults: 
%w", err)
+       }
+
        if _, err := iceberg.IndexNameByID(sc); err != nil {
                return fmt.Errorf("invalid schema: %w", err)
        }
@@ -246,3 +250,105 @@ func (v *unknownTypeValidator) Map(mapType 
iceberg.MapType, keyResult, valueResu
 func (v *unknownTypeValidator) Primitive(_ iceberg.PrimitiveType) error {
        return nil
 }
+
+func validateComplexTypeDefaults(sc *iceberg.Schema) error {
+       validator := &complexTypeDefaultValidator{}
+       result, err := iceberg.Visit(sc, validator)
+       if err != nil {
+               return err
+       }
+
+       return result
+}
+
+type complexTypeDefaultValidator struct{}
+
+func (v *complexTypeDefaultValidator) Schema(_ *iceberg.Schema, structResult 
error) error {
+       return structResult
+}
+
+func (v *complexTypeDefaultValidator) Struct(_ iceberg.StructType, 
fieldResults []error) error {
+       for _, err := range fieldResults {
+               if err != nil {
+                       return err
+               }
+       }
+
+       return nil
+}
+
+func (v *complexTypeDefaultValidator) Field(field iceberg.NestedField, 
fieldResult error) error {
+       if fieldResult != nil {
+               return fieldResult
+       }
+
+       return validateComplexDefault(field)
+}
+
+func (v *complexTypeDefaultValidator) List(list iceberg.ListType, elemResult 
error) error {
+       if elemResult != nil {
+               return elemResult
+       }
+
+       return validateComplexDefault(list.ElementField())
+}
+
+func (v *complexTypeDefaultValidator) Map(mapType iceberg.MapType, keyResult, 
valueResult error) error {
+       if keyResult != nil {
+               return keyResult
+       }
+
+       if valueResult != nil {
+               return valueResult
+       }
+
+       if err := validateComplexDefault(mapType.KeyField()); err != nil {
+               return err
+       }
+
+       return validateComplexDefault(mapType.ValueField())
+}
+
+func (v *complexTypeDefaultValidator) Primitive(_ iceberg.PrimitiveType) error 
{
+       return nil
+}
+
+func validateComplexDefault(field iceberg.NestedField) error {
+       switch field.Type.(type) {
+       case *iceberg.StructType:
+               if field.InitialDefault != nil {
+                       if _, ok := field.InitialDefault.(map[string]any); !ok {
+                               return fmt.Errorf("struct type field '%s' (id: 
%d) must have null or JSON object initial-default, but got: %v", field.Name, 
field.ID, field.InitialDefault)
+                       }
+               }
+               if field.WriteDefault != nil {
+                       if _, ok := field.WriteDefault.(map[string]any); !ok {
+                               return fmt.Errorf("struct type field '%s' (id: 
%d) must have null or JSON object write-default, but got: %v", field.Name, 
field.ID, field.WriteDefault)
+                       }
+               }
+       case *iceberg.ListType:
+               if field.InitialDefault != nil {
+                       if _, ok := field.InitialDefault.([]any); !ok {
+                               return fmt.Errorf("list type field '%s' (id: 
%d) must have null or JSON array initial-default, but got: %v", field.Name, 
field.ID, field.InitialDefault)
+                       }
+               }
+               if field.WriteDefault != nil {
+                       if _, ok := field.WriteDefault.([]any); !ok {
+                               return fmt.Errorf("list type field '%s' (id: 
%d) must have null or JSON array write-default, but got: %v", field.Name, 
field.ID, field.WriteDefault)
+                       }
+               }
+       case *iceberg.MapType:
+               if field.InitialDefault != nil {
+                       if _, ok := field.InitialDefault.(map[string]any); !ok {
+                               return fmt.Errorf("map type field '%s' (id: %d) 
must have null or JSON object initial-default, but got: %v", field.Name, 
field.ID, field.InitialDefault)
+                       }
+               }
+               if field.WriteDefault != nil {
+                       if _, ok := field.WriteDefault.(map[string]any); !ok {
+                               return fmt.Errorf("map type field '%s' (id: %d) 
must have null or JSON object write-default, but got: %v", field.Name, 
field.ID, field.WriteDefault)
+                       }
+               }
+       }
+
+       return nil
+}

Reply via email to