This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 40b81252 feat(table): port remaining v3 metadata-upgrade validation
rules (#1070)
40b81252 is described below
commit 40b81252526c658740541d87f78531c7a9f8c70b
Author: Tanmay Rauth <[email protected]>
AuthorDate: Tue May 12 13:38:39 2026 -0700
feat(table): port remaining v3 metadata-upgrade validation rules (#1070)
Add missing validation rules to SetFormatVersion that mirror Java's
TableMetadata upgrade checks:
- Initialize lastSequenceNumber to 0 when upgrading from v1 to v2+
- Initialize nextRowID to 0 when upgrading to v3
- Assign UUID if not already set when upgrading to v2+
- Validate lastColumnId is not less than the highest field ID in schemas
Validation runs before state mutation to prevent leaving the builder in
a half-upgraded state on failure.
Closes #1013
---
table/metadata.go | 39 +++++++++
table/metadata_builder_internal_test.go | 148 ++++++++++++++++++++++++++++++++
2 files changed, 187 insertions(+)
diff --git a/table/metadata.go b/table/metadata.go
index e6a06d4a..08f545ca 100644
--- a/table/metadata.go
+++ b/table/metadata.go
@@ -708,9 +708,48 @@ func (b *MetadataBuilder) SetFormatVersion(formatVersion
int) error {
return nil
}
+ if err := b.validateLastColumnID(); err != nil {
+ return err
+ }
+
+ previousVersion := b.formatVersion
b.updates = append(b.updates,
NewUpgradeFormatVersionUpdate(formatVersion))
b.formatVersion = formatVersion
+ if previousVersion < 2 && formatVersion >= 2 {
+ if b.uuid == (uuid.UUID{}) {
+ b.uuid = uuid.New()
+ }
+
+ if b.lastSequenceNumber == nil {
+ seq := int64(0)
+ b.lastSequenceNumber = &seq
+ }
+ }
+
+ if previousVersion < 3 && formatVersion >= 3 {
+ if b.nextRowID == nil {
+ nextRowID := int64(0)
+ b.nextRowID = &nextRowID
+ }
+ }
+
+ return nil
+}
+
+func (b *MetadataBuilder) validateLastColumnID() error {
+ highestFieldID := 0
+ for _, schema := range b.schemaList {
+ if id := schema.HighestFieldID(); id > highestFieldID {
+ highestFieldID = id
+ }
+ }
+
+ if highestFieldID > 0 && b.lastColumnId < highestFieldID {
+ return fmt.Errorf("%w: last-column-id %d is less than the
highest field ID %d in schemas",
+ ErrInvalidMetadata, b.lastColumnId, highestFieldID)
+ }
+
return nil
}
diff --git a/table/metadata_builder_internal_test.go
b/table/metadata_builder_internal_test.go
index eb8f57ef..991bd50b 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -1998,3 +1998,151 @@ func TestComplexTypeDefaultValidation(t *testing.T) {
require.ErrorContains(t, err, "list type field 'inner' (id: 2)
must have null or JSON array initial-default")
})
}
+
+func TestSetFormatVersionV1ToV2InitializesSequenceNumber(t *testing.T) {
+ builder := builderWithoutChanges(1)
+ require.NoError(t, builder.SetFormatVersion(2))
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, 2, meta.Version())
+ require.Equal(t, int64(0), meta.LastSequenceNumber())
+}
+
+func TestSetFormatVersionV1ToV3InitializesSequenceNumberAndNextRowID(t
*testing.T) {
+ builder := builderWithoutChanges(1)
+ require.NoError(t, builder.SetFormatVersion(3))
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, 3, meta.Version())
+ require.Equal(t, int64(0), meta.LastSequenceNumber())
+ require.Equal(t, int64(0), meta.NextRowID())
+}
+
+func TestSetFormatVersionV2ToV3InitializesNextRowID(t *testing.T) {
+ builder := builderWithoutChanges(2)
+ require.NoError(t, builder.SetFormatVersion(3))
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, 3, meta.Version())
+ require.Equal(t, int64(0), meta.NextRowID())
+}
+
+func TestSetFormatVersionV1ToV2AssignsUUID(t *testing.T) {
+ builder := builderWithoutChanges(1)
+ require.NoError(t, builder.SetFormatVersion(2))
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+ require.NotEqual(t, uuid.UUID{}, meta.TableUUID())
+}
+
+func TestSetFormatVersionPreservesExistingUUID(t *testing.T) {
+ builder := builderWithoutChanges(1)
+ existingUUID := uuid.New()
+ require.NoError(t, builder.SetUUID(existingUUID))
+ require.NoError(t, builder.SetFormatVersion(2))
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, existingUUID, meta.TableUUID())
+}
+
+func TestSetFormatVersionDowngradeNotAllowed(t *testing.T) {
+ builder := builderWithoutChanges(2)
+ err := builder.SetFormatVersion(1)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "downgrading format version from 2 to 1
is not allowed")
+}
+
+func TestSetFormatVersionRejectsUnsupportedVersion(t *testing.T) {
+ builder := builderWithoutChanges(2)
+ err := builder.SetFormatVersion(99)
+ require.Error(t, err)
+ require.ErrorIs(t, err, iceberg.ErrInvalidFormatVersion)
+}
+
+func TestSetFormatVersionNoOpWhenSameVersion(t *testing.T) {
+ builder := builderWithoutChanges(2)
+ require.NoError(t, builder.SetFormatVersion(2))
+ require.False(t, builder.HasChanges())
+}
+
+func TestSetFormatVersionValidatesLastColumnID(t *testing.T) {
+ tableSchema := iceberg.NewSchema(0,
+ iceberg.NestedField{ID: 1, Name: "x", Type:
iceberg.PrimitiveTypes.Int64, Required: true},
+ iceberg.NestedField{ID: 2, Name: "y", Type:
iceberg.PrimitiveTypes.Int64, Required: true},
+ iceberg.NestedField{ID: 3, Name: "z", Type:
iceberg.PrimitiveTypes.Int64, Required: true},
+ )
+ partSpec := iceberg.NewPartitionSpecID(0)
+
+ builder, err := NewMetadataBuilder(1)
+ require.NoError(t, err)
+ require.NoError(t, builder.SetLoc("s3://bucket/test"))
+ require.NoError(t, builder.AddSchema(tableSchema))
+ require.NoError(t, builder.SetCurrentSchemaID(-1))
+ require.NoError(t, builder.AddSortOrder(&UnsortedSortOrder))
+ require.NoError(t, builder.SetDefaultSortOrderID(-1))
+ require.NoError(t, builder.AddPartitionSpec(&partSpec, true))
+ require.NoError(t, builder.SetDefaultSpecID(-1))
+
+ // Corrupt lastColumnId to be less than the highest field ID in the
schema
+ builder.lastColumnId = 1
+
+ err = builder.SetFormatVersion(2)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "last-column-id 1 is less than the
highest field ID 3")
+
+ // Builder format version must remain unchanged after validation failure
+ require.Equal(t, 1, builder.formatVersion)
+}
+
+func TestSetFormatVersionV2ToV3PreservesSequenceNumber(t *testing.T) {
+ builder := builderWithoutChanges(2)
+
+ meta, err := builder.Build()
+ require.NoError(t, err)
+
+ builder2, err := MetadataBuilderFromBase(meta, "")
+ require.NoError(t, err)
+ require.NoError(t, builder2.SetFormatVersion(3))
+
+ meta3, err := builder2.Build()
+ require.NoError(t, err)
+ require.Equal(t, int64(0), meta3.LastSequenceNumber())
+}
+
+func TestSetFormatVersionV1ToV3FromDeserializedMetadata(t *testing.T) {
+ meta, err := ParseMetadataString(ExampleTableMetadataV1)
+ require.NoError(t, err)
+ require.Equal(t, 1, meta.Version())
+
+ builder, err := MetadataBuilderFromBase(meta, "")
+ require.NoError(t, err)
+ require.NoError(t, builder.SetFormatVersion(3))
+
+ meta3, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, 3, meta3.Version())
+ require.Equal(t, int64(0), meta3.LastSequenceNumber())
+ require.Equal(t, int64(0), meta3.NextRowID())
+ require.NotEqual(t, uuid.UUID{}, meta3.TableUUID())
+}
+
+func TestSetFormatVersionV2ToV3FromDeserializedMetadata(t *testing.T) {
+ meta, err := ParseMetadataString(ExampleTableMetadataV2)
+ require.NoError(t, err)
+ require.Equal(t, 2, meta.Version())
+
+ builder, err := MetadataBuilderFromBase(meta, "")
+ require.NoError(t, err)
+ require.NoError(t, builder.SetFormatVersion(3))
+
+ meta3, err := builder.Build()
+ require.NoError(t, err)
+ require.Equal(t, 3, meta3.Version())
+ require.Equal(t, int64(34), meta3.LastSequenceNumber())
+ require.Equal(t, int64(0), meta3.NextRowID())
+}