This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new ac1ead73 feat(schema): reject complex type defaults #1011 (#1017)
ac1ead73 is described below
commit ac1ead73460291da098f06af1279d43a2ed4c7e5
Author: Tanmay Rauth <[email protected]>
AuthorDate: Wed May 6 09:11:37 2026 -0700
feat(schema): reject complex type defaults #1011 (#1017)
Validates that initial-default / write-default on struct, list, and map
fields match the expected JSON shape (object for struct/map, array for
list). Walks the full schema tree including nested fields within
list/map elements. Mirrors the existing unknownTypeValidator pattern.
Closes #1011
---
table/metadata_builder_internal_test.go | 159 ++++++++++++++++++++++++++++++++
table/metadata_schema_compatibility.go | 106 +++++++++++++++++++++
2 files changed, 265 insertions(+)
diff --git a/table/metadata_builder_internal_test.go
b/table/metadata_builder_internal_test.go
index 4f801127..8754144d 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -1837,3 +1837,162 @@ func TestUnknownTypeValidation(t *testing.T) {
require.ErrorContains(t, err, "must be optional")
})
}
+
+func TestComplexTypeDefaultValidation(t *testing.T) {
+ t.Run("InvalidStructInitialDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "s", Type:
&iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 2, Name: "x", Type:
iceberg.Int32Type{}, Required: false},
+ },
+ }, Required: false, InitialDefault: "not a struct"},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "struct type field 's' (id: 1)
must have null or JSON object initial-default")
+ })
+
+ t.Run("InvalidStructWriteDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "s", Type:
&iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 2, Name: "x", Type:
iceberg.Int32Type{}, Required: false},
+ },
+ }, Required: false, WriteDefault: float64(42)},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "struct type field 's' (id: 1)
must have null or JSON object write-default")
+ })
+
+ t.Run("ValidStructNullDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "s", Type:
&iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 2, Name: "x", Type:
iceberg.Int32Type{}, Required: false},
+ },
+ }, Required: false},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("ValidStructObjectDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "s", Type:
&iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 2, Name: "x", Type:
iceberg.Int32Type{}, Required: false},
+ },
+ }, Required: false, InitialDefault: map[string]any{"x":
float64(1)}},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("InvalidListInitialDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "l", Type:
&iceberg.ListType{
+ ElementID: 2, Element: iceberg.StringType{},
ElementRequired: false,
+ }, Required: false, InitialDefault: "not a list"},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "list type field 'l' (id: 1) must
have null or JSON array initial-default")
+ })
+
+ t.Run("InvalidListWriteDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "l", Type:
&iceberg.ListType{
+ ElementID: 2, Element: iceberg.StringType{},
ElementRequired: false,
+ }, Required: false, WriteDefault: map[string]any{"a":
"b"}},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "list type field 'l' (id: 1) must
have null or JSON array write-default")
+ })
+
+ t.Run("ValidListNullDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "l", Type:
&iceberg.ListType{
+ ElementID: 2, Element: iceberg.StringType{},
ElementRequired: false,
+ }, Required: false},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("ValidListArrayDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "l", Type:
&iceberg.ListType{
+ ElementID: 2, Element: iceberg.StringType{},
ElementRequired: false,
+ }, Required: false, InitialDefault: []any{"a", "b"}},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("InvalidMapInitialDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "m", Type:
&iceberg.MapType{
+ KeyID: 2, KeyType: iceberg.StringType{},
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+ }, Required: false, InitialDefault: []any{"not", "a",
"map"}},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "map type field 'm' (id: 1) must
have null or JSON object initial-default")
+ })
+
+ t.Run("InvalidMapWriteDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "m", Type:
&iceberg.MapType{
+ KeyID: 2, KeyType: iceberg.StringType{},
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+ }, Required: false, WriteDefault: "not a map"},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "map type field 'm' (id: 1) must
have null or JSON object write-default")
+ })
+
+ t.Run("ValidMapNullDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "m", Type:
&iceberg.MapType{
+ KeyID: 2, KeyType: iceberg.StringType{},
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+ }, Required: false},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("ValidMapObjectDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "m", Type:
&iceberg.MapType{
+ KeyID: 2, KeyType: iceberg.StringType{},
ValueID: 3, ValueType: iceberg.Int32Type{}, ValueRequired: false,
+ }, Required: false, InitialDefault:
map[string]any{"keys": []any{"a"}, "values": []any{float64(1)}}},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("PrimitiveDefaultPassesThrough", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "n", Type:
iceberg.Int64Type{}, Required: false, InitialDefault: float64(42)},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run("InvalidNestedStructDefault", func(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "outer", Type:
&iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 2, Name: "inner", Type:
&iceberg.ListType{
+ ElementID: 3, Element:
iceberg.StringType{}, ElementRequired: false,
+ }, Required: false, InitialDefault:
"not a list"},
+ },
+ }, Required: false},
+ )
+ err := checkSchemaCompatibility(schema, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "list type field 'inner' (id: 2)
must have null or JSON array initial-default")
+ })
+}
diff --git a/table/metadata_schema_compatibility.go
b/table/metadata_schema_compatibility.go
index 91fdb0cf..d1baf48f 100644
--- a/table/metadata_schema_compatibility.go
+++ b/table/metadata_schema_compatibility.go
@@ -78,6 +78,10 @@ func checkSchemaCompatibility(sc *iceberg.Schema,
formatVersion int) error {
return fmt.Errorf("failed to validate unknown types: %w", err)
}
+ if err := validateComplexTypeDefaults(sc); err != nil {
+ return fmt.Errorf("failed to validate complex type defaults:
%w", err)
+ }
+
if _, err := iceberg.IndexNameByID(sc); err != nil {
return fmt.Errorf("invalid schema: %w", err)
}
@@ -246,3 +250,105 @@ func (v *unknownTypeValidator) Map(mapType
iceberg.MapType, keyResult, valueResu
func (v *unknownTypeValidator) Primitive(_ iceberg.PrimitiveType) error {
return nil
}
+
+func validateComplexTypeDefaults(sc *iceberg.Schema) error {
+ validator := &complexTypeDefaultValidator{}
+ result, err := iceberg.Visit(sc, validator)
+ if err != nil {
+ return err
+ }
+
+ return result
+}
+
+type complexTypeDefaultValidator struct{}
+
+func (v *complexTypeDefaultValidator) Schema(_ *iceberg.Schema, structResult
error) error {
+ return structResult
+}
+
+func (v *complexTypeDefaultValidator) Struct(_ iceberg.StructType,
fieldResults []error) error {
+ for _, err := range fieldResults {
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (v *complexTypeDefaultValidator) Field(field iceberg.NestedField,
fieldResult error) error {
+ if fieldResult != nil {
+ return fieldResult
+ }
+
+ return validateComplexDefault(field)
+}
+
+func (v *complexTypeDefaultValidator) List(list iceberg.ListType, elemResult
error) error {
+ if elemResult != nil {
+ return elemResult
+ }
+
+ return validateComplexDefault(list.ElementField())
+}
+
+func (v *complexTypeDefaultValidator) Map(mapType iceberg.MapType, keyResult,
valueResult error) error {
+ if keyResult != nil {
+ return keyResult
+ }
+
+ if valueResult != nil {
+ return valueResult
+ }
+
+ if err := validateComplexDefault(mapType.KeyField()); err != nil {
+ return err
+ }
+
+ return validateComplexDefault(mapType.ValueField())
+}
+
+func (v *complexTypeDefaultValidator) Primitive(_ iceberg.PrimitiveType) error
{
+ return nil
+}
+
+func validateComplexDefault(field iceberg.NestedField) error {
+ switch field.Type.(type) {
+ case *iceberg.StructType:
+ if field.InitialDefault != nil {
+ if _, ok := field.InitialDefault.(map[string]any); !ok {
+ return fmt.Errorf("struct type field '%s' (id:
%d) must have null or JSON object initial-default, but got: %v", field.Name,
field.ID, field.InitialDefault)
+ }
+ }
+ if field.WriteDefault != nil {
+ if _, ok := field.WriteDefault.(map[string]any); !ok {
+ return fmt.Errorf("struct type field '%s' (id:
%d) must have null or JSON object write-default, but got: %v", field.Name,
field.ID, field.WriteDefault)
+ }
+ }
+ case *iceberg.ListType:
+ if field.InitialDefault != nil {
+ if _, ok := field.InitialDefault.([]any); !ok {
+ return fmt.Errorf("list type field '%s' (id:
%d) must have null or JSON array initial-default, but got: %v", field.Name,
field.ID, field.InitialDefault)
+ }
+ }
+ if field.WriteDefault != nil {
+ if _, ok := field.WriteDefault.([]any); !ok {
+ return fmt.Errorf("list type field '%s' (id:
%d) must have null or JSON array write-default, but got: %v", field.Name,
field.ID, field.WriteDefault)
+ }
+ }
+ case *iceberg.MapType:
+ if field.InitialDefault != nil {
+ if _, ok := field.InitialDefault.(map[string]any); !ok {
+ return fmt.Errorf("map type field '%s' (id: %d)
must have null or JSON object initial-default, but got: %v", field.Name,
field.ID, field.InitialDefault)
+ }
+ }
+ if field.WriteDefault != nil {
+ if _, ok := field.WriteDefault.(map[string]any); !ok {
+ return fmt.Errorf("map type field '%s' (id: %d)
must have null or JSON object write-default, but got: %v", field.Name,
field.ID, field.WriteDefault)
+ }
+ }
+ }
+
+ return nil
+}