This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new b3b16429 feat(table): add SetPartitionStatisticsUpdate and
RemovePartitionStatisticsUpdate (#1018)
b3b16429 is described below
commit b3b16429bc09817f22fc8bbd9f2461c98e9fec28
Author: Tanmay Rauth <[email protected]>
AuthorDate: Wed May 6 08:58:59 2026 -0700
feat(table): add SetPartitionStatisticsUpdate and
RemovePartitionStatisticsUpdate (#1018)
Adds SetPartitionStatisticsUpdate and RemovePartitionStatisticsUpdate in
table/updates.go with MetadataBuilder methods and UnmarshalJSON wiring.
Mirrors the existing SetStatisticsUpdate / RemoveStatisticsUpdate from
#902.
Closes #1009
---
table/metadata.go | 34 ++++++++++++++++++
table/updates.go | 74 +++++++++++++++++++++++++++++---------
table/updates_test.go | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 191 insertions(+), 16 deletions(-)
diff --git a/table/metadata.go b/table/metadata.go
index 48d667b5..61aca395 100644
--- a/table/metadata.go
+++ b/table/metadata.go
@@ -1208,6 +1208,40 @@ func (b *MetadataBuilder) RemoveStatistics(snapshotID
int64) error {
return nil
}
+// SetPartitionStatistics adds or replaces a partition statistics file for the
given snapshot.
+// If a partition statistics file with the same snapshot ID already exists it
is replaced,
+// otherwise the file is appended.
+func (b *MetadataBuilder) SetPartitionStatistics(stats
PartitionStatisticsFile) error {
+ replaced := false
+ for i, s := range b.partitionStatsList {
+ if s.SnapshotID == stats.SnapshotID {
+ b.partitionStatsList[i] = stats
+ replaced = true
+
+ break
+ }
+ }
+
+ if !replaced {
+ b.partitionStatsList = append(b.partitionStatsList, stats)
+ }
+
+ b.updates = append(b.updates, NewSetPartitionStatisticsUpdate(stats))
+
+ return nil
+}
+
+// RemovePartitionStatistics removes the partition statistics file associated
with the given
+// snapshot ID. It is not an error if no such file exists.
+func (b *MetadataBuilder) RemovePartitionStatistics(snapshotID int64) error {
+ b.partitionStatsList = slices.DeleteFunc(b.partitionStatsList, func(s
PartitionStatisticsFile) bool {
+ return s.SnapshotID == snapshotID
+ })
+ b.updates = append(b.updates,
NewRemovePartitionStatisticsUpdate(snapshotID))
+
+ return nil
+}
+
// AddEncryptionKey adds or replaces an encryption key indexed by its key-id.
// Encryption keys are only supported for format version 3 and above.
func (b *MetadataBuilder) AddEncryptionKey(key EncryptionKey) error {
diff --git a/table/updates.go b/table/updates.go
index cfe723a5..5dbdfb90 100644
--- a/table/updates.go
+++ b/table/updates.go
@@ -39,22 +39,24 @@ const (
UpdateAssignUUID = "assign-uuid"
- UpdateAddEncryptionKey = "add-encryption-key"
- UpdateRemoveEncryptionKey = "remove-encryption-key"
- UpdateRemoveProperties = "remove-properties"
- UpdateRemoveSchemas = "remove-schemas"
- UpdateRemoveSnapshots = "remove-snapshots"
- UpdateRemoveSnapshotRef = "remove-snapshot-ref"
- UpdateRemoveSpec = "remove-partition-specs"
- UpdateRemoveStatistics = "remove-statistics"
-
- UpdateSetCurrentSchema = "set-current-schema"
- UpdateSetDefaultSortOrder = "set-default-sort-order"
- UpdateSetDefaultSpec = "set-default-spec"
- UpdateSetLocation = "set-location"
- UpdateSetProperties = "set-properties"
- UpdateSetSnapshotRef = "set-snapshot-ref"
- UpdateSetStatistics = "set-statistics"
+ UpdateAddEncryptionKey = "add-encryption-key"
+ UpdateRemoveEncryptionKey = "remove-encryption-key"
+ UpdateRemovePartitionStatistics = "remove-partition-statistics"
+ UpdateRemoveProperties = "remove-properties"
+ UpdateRemoveSchemas = "remove-schemas"
+ UpdateRemoveSnapshots = "remove-snapshots"
+ UpdateRemoveSnapshotRef = "remove-snapshot-ref"
+ UpdateRemoveSpec = "remove-partition-specs"
+ UpdateRemoveStatistics = "remove-statistics"
+
+ UpdateSetCurrentSchema = "set-current-schema"
+ UpdateSetDefaultSortOrder = "set-default-sort-order"
+ UpdateSetDefaultSpec = "set-default-spec"
+ UpdateSetLocation = "set-location"
+ UpdateSetPartitionStatistics = "set-partition-statistics"
+ UpdateSetProperties = "set-properties"
+ UpdateSetSnapshotRef = "set-snapshot-ref"
+ UpdateSetStatistics = "set-statistics"
UpdateUpgradeFormatVersion = "upgrade-format-version"
)
@@ -123,6 +125,10 @@ func (u *Updates) UnmarshalJSON(data []byte) error {
upd = &setStatisticsUpdate{}
case UpdateRemoveStatistics:
upd = &removeStatisticsUpdate{}
+ case UpdateSetPartitionStatistics:
+ upd = &setPartitionStatisticsUpdate{}
+ case UpdateRemovePartitionStatistics:
+ upd = &removePartitionStatisticsUpdate{}
case UpdateAddEncryptionKey:
upd = &addEncryptionKeyUpdate{}
case UpdateRemoveEncryptionKey:
@@ -663,6 +669,42 @@ func (u *removeStatisticsUpdate) Apply(builder
*MetadataBuilder) error {
return builder.RemoveStatistics(u.SnapshotID)
}
+type setPartitionStatisticsUpdate struct {
+ baseUpdate
+ PartitionStatistics PartitionStatisticsFile
`json:"partition-statistics"`
+}
+
+// NewSetPartitionStatisticsUpdate creates a new Update that adds or replaces
the partition
+// statistics file for the given snapshot ID in the table metadata.
+func NewSetPartitionStatisticsUpdate(stats PartitionStatisticsFile)
*setPartitionStatisticsUpdate {
+ return &setPartitionStatisticsUpdate{
+ baseUpdate: baseUpdate{ActionName:
UpdateSetPartitionStatistics},
+ PartitionStatistics: stats,
+ }
+}
+
+func (u *setPartitionStatisticsUpdate) Apply(builder *MetadataBuilder) error {
+ return builder.SetPartitionStatistics(u.PartitionStatistics)
+}
+
+type removePartitionStatisticsUpdate struct {
+ baseUpdate
+ SnapshotID int64 `json:"snapshot-id"`
+}
+
+// NewRemovePartitionStatisticsUpdate creates a new Update that removes the
partition statistics
+// file for the given snapshot ID from the table metadata.
+func NewRemovePartitionStatisticsUpdate(snapshotID int64)
*removePartitionStatisticsUpdate {
+ return &removePartitionStatisticsUpdate{
+ baseUpdate: baseUpdate{ActionName:
UpdateRemovePartitionStatistics},
+ SnapshotID: snapshotID,
+ }
+}
+
+func (u *removePartitionStatisticsUpdate) Apply(builder *MetadataBuilder)
error {
+ return builder.RemovePartitionStatistics(u.SnapshotID)
+}
+
type addEncryptionKeyUpdate struct {
baseUpdate
EncryptionKey EncryptionKey `json:"encryption-key"`
diff --git a/table/updates_test.go b/table/updates_test.go
index 480caf2f..73b40e2f 100644
--- a/table/updates_test.go
+++ b/table/updates_test.go
@@ -535,6 +535,105 @@ func TestRemoveStatisticsUpdate_Apply_NoOp(t *testing.T) {
require.NoError(t, NewRemoveStatisticsUpdate(999).Apply(b))
}
+func TestSetPartitionStatisticsUpdate_Unmarshal(t *testing.T) {
+ data := []byte(`[{
+ "action": "set-partition-statistics",
+ "partition-statistics": {
+ "snapshot-id": 42,
+ "statistics-path":
"s3://bucket/partition-stats.parquet",
+ "file-size-in-bytes": 100
+ }
+ }]`)
+
+ var updates Updates
+ require.NoError(t, json.Unmarshal(data, &updates))
+ require.Len(t, updates, 1)
+
+ u, ok := updates[0].(*setPartitionStatisticsUpdate)
+ require.True(t, ok)
+ assert.Equal(t, int64(42), u.PartitionStatistics.SnapshotID)
+ assert.Equal(t, "s3://bucket/partition-stats.parquet",
u.PartitionStatistics.StatisticsPath)
+}
+
+func TestSetPartitionStatisticsUpdate_Apply(t *testing.T) {
+ b := buildFromBase(t)
+ stats := PartitionStatisticsFile{
+ SnapshotID: 1,
+ StatisticsPath: "s3://bucket/partition-stats.parquet",
+ FileSizeInBytes: 200,
+ }
+
+ upd := NewSetPartitionStatisticsUpdate(stats)
+ require.NoError(t, upd.Apply(b))
+
+ meta, err := b.Build()
+ require.NoError(t, err)
+
+ var found *PartitionStatisticsFile
+ for s := range meta.PartitionStatistics() {
+ sc := s
+ found = &sc
+ }
+ require.NotNil(t, found)
+ assert.Equal(t, stats.StatisticsPath, found.StatisticsPath)
+}
+
+func TestSetPartitionStatisticsUpdate_Apply_Replaces(t *testing.T) {
+ b := buildFromBase(t)
+ first := PartitionStatisticsFile{SnapshotID: 5, StatisticsPath:
"s3://first.parquet", FileSizeInBytes: 10}
+ second := PartitionStatisticsFile{SnapshotID: 5, StatisticsPath:
"s3://second.parquet", FileSizeInBytes: 20}
+
+ require.NoError(t, NewSetPartitionStatisticsUpdate(first).Apply(b))
+ require.NoError(t, NewSetPartitionStatisticsUpdate(second).Apply(b))
+
+ meta, err := b.Build()
+ require.NoError(t, err)
+
+ count := 0
+ var got PartitionStatisticsFile
+ for s := range meta.PartitionStatistics() {
+ count++
+ got = s
+ }
+ assert.Equal(t, 1, count)
+ assert.Equal(t, "s3://second.parquet", got.StatisticsPath)
+}
+
+func TestRemovePartitionStatisticsUpdate_Unmarshal(t *testing.T) {
+ data :=
[]byte(`[{"action":"remove-partition-statistics","snapshot-id":7}]`)
+
+ var updates Updates
+ require.NoError(t, json.Unmarshal(data, &updates))
+ require.Len(t, updates, 1)
+
+ u, ok := updates[0].(*removePartitionStatisticsUpdate)
+ require.True(t, ok)
+ assert.Equal(t, int64(7), u.SnapshotID)
+}
+
+func TestRemovePartitionStatisticsUpdate_Apply(t *testing.T) {
+ b := buildFromBase(t)
+ stats := PartitionStatisticsFile{SnapshotID: 3, StatisticsPath:
"s3://bucket/partition-stats.parquet", FileSizeInBytes: 50}
+ require.NoError(t, NewSetPartitionStatisticsUpdate(stats).Apply(b))
+
+ require.NoError(t, NewRemovePartitionStatisticsUpdate(3).Apply(b))
+
+ meta, err := b.Build()
+ require.NoError(t, err)
+
+ count := 0
+ for range meta.PartitionStatistics() {
+ count++
+ }
+ assert.Equal(t, 0, count)
+}
+
+func TestRemovePartitionStatisticsUpdate_Apply_NoOp(t *testing.T) {
+ // Removing a partition statistics file that does not exist should not
error.
+ b := buildFromBase(t)
+ require.NoError(t, NewRemovePartitionStatisticsUpdate(999).Apply(b))
+}
+
func TestAddEncryptionKeyUpdate_Unmarshal(t *testing.T) {
data := []byte(`[{
"action": "add-encryption-key",