This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 95308b1 fix(builder): main ref removal (#533)
95308b1 is described below
commit 95308b18a2b2f8174fdfe85ba754178a87e43923
Author: Tobias Pütz <[email protected]>
AuthorDate: Mon Aug 18 18:40:38 2025 +0200
fix(builder): main ref removal (#533)
It's an ok thing to remove a main ref in iceberg, e.g. a replace
transaction will do that:
`commitReplaceTransaction`
<img width="489" height="416" alt="image"
src="https://github.com/user-attachments/assets/bbd9d9e0-1297-4c47-a483-46dfa3ef0827"
/>
Both iceberg-java and iceberg-rust set current-snapshot to -1 and clear
snapshot log on removal of the main ref.
---
table/metadata.go | 7 +-
table/metadata_builder_internal_test.go | 19 +++++
table/metadata_internal_test.go | 15 ++++
table/testdata/TableMetadataV2Valid.json | 122 +++++++++++++++++++++++++++++++
4 files changed, 162 insertions(+), 1 deletion(-)
diff --git a/table/metadata.go b/table/metadata.go
index 5db9531..e47c8db 100644
--- a/table/metadata.go
+++ b/table/metadata.go
@@ -623,7 +623,8 @@ func (b *MetadataBuilder) RemoveSnapshotRef(name string)
(*MetadataBuilder, erro
}
if name == MainBranch {
- return nil, errors.New("cannot remove main branch's snapshot
ref")
+ b.currentSnapshotID = nil
+ b.snapshotLog = b.snapshotLog[:0]
}
delete(b.refs, name)
@@ -1037,6 +1038,10 @@ func (c *commonMetadata) preValidate() {
c.CurrentSnapshotID = nil
}
+ if c.SnapshotRefs == nil {
+ c.SnapshotRefs = map[string]SnapshotRef{}
+ }
+
if c.CurrentSnapshotID != nil {
if _, ok := c.SnapshotRefs[MainBranch]; !ok {
c.SnapshotRefs[MainBranch] = SnapshotRef{
diff --git a/table/metadata_builder_internal_test.go
b/table/metadata_builder_internal_test.go
index 41b7991..229eab0 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -194,3 +194,22 @@ func TestCannotAddDuplicateSnapshotID(t *testing.T) {
_, err = builder.AddSnapshot(&snapshot)
require.ErrorContains(t, err, "can't add snapshot with id 2, already
exists")
}
+
+func TestRemoveMainSnapshotRef(t *testing.T) {
+ meta, err := getTestTableMetadata("TableMetadataV2Valid.json")
+ require.NoError(t, err)
+ require.NotNil(t, meta)
+ require.NotNil(t, meta.CurrentSnapshot())
+ builder, err := MetadataBuilderFromBase(meta)
+ require.NoError(t, err)
+ require.NotNil(t, builder.currentSnapshotID)
+ if _, ok := builder.refs[MainBranch]; !ok {
+ t.Fatal("expected main branch to exist")
+ }
+ _, err = builder.RemoveSnapshotRef(MainBranch)
+ require.NoError(t, err)
+ require.Nil(t, builder.currentSnapshotID)
+ meta, err = builder.Build()
+ require.NoError(t, err)
+ require.NotNil(t, meta)
+}
diff --git a/table/metadata_internal_test.go b/table/metadata_internal_test.go
index 6ca47a7..e183fcf 100644
--- a/table/metadata_internal_test.go
+++ b/table/metadata_internal_test.go
@@ -19,6 +19,8 @@ package table
import (
"encoding/json"
+ "os"
+ "path"
"slices"
"testing"
@@ -949,3 +951,16 @@ func TestMetadataV2Validation(t *testing.T) {
// Test case 3: Verify LastColumnId maintains 0 when explicitly set
require.NoError(t, meta3.UnmarshalJSON([]byte(zeroColumnID)))
}
+
+func getTestTableMetadata(fileName string) (Metadata, error) {
+ fCont, err := os.ReadFile(path.Join("testdata", fileName))
+ if err != nil {
+ return nil, err
+ }
+ meta, err := ParseMetadataBytes(fCont)
+ if err != nil {
+ return nil, err
+ }
+
+ return meta, nil
+}
diff --git a/table/testdata/TableMetadataV2Valid.json
b/table/testdata/TableMetadataV2Valid.json
new file mode 100644
index 0000000..0dc89de
--- /dev/null
+++ b/table/testdata/TableMetadataV2Valid.json
@@ -0,0 +1,122 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 1,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [
+ 1,
+ 2
+ ],
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": 3055729675574597004,
+ "snapshots": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/1.avro"
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "parent-snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1555100955770,
+ "sequence-number": 1,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/2.avro",
+ "schema-id": 1
+ }
+ ],
+ "snapshot-log": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "timestamp-ms": 1555100955770
+ }
+ ],
+ "metadata-log": []
+}
\ No newline at end of file