This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 554fd113 fix: handle missing parent snapshots in ExpireSnapshots (#671)
554fd113 is described below

commit 554fd113b33d65dcb3b6909477205024fa729a44
Author: Krutika Dhananjay <[email protected]>
AuthorDate: Sat Jan 17 05:48:15 2026 +0530

    fix: handle missing parent snapshots in ExpireSnapshots (#671)
    
    After snapshot expiration, remaining snapshots may have
    parent-snapshot-id references pointing to snapshots that no longer
    exist. ExpireSnapshots was failing when walking the parent chain and
    encountering these dangling references.
    
    Change the behavior to treat a missing parent snapshot as the end of the
    chain rather than returning an error. This is expected and valid
    behavior in Iceberg tables that have undergone previous snapshot
    expiration.
    
    ---------
    
    Co-authored-by: Krutika Dhananjay <[email protected]>
---
 table/table_test.go  | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 table/transaction.go |  4 ++-
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/table/table_test.go b/table/table_test.go
index 72997fef..b97f0a6f 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -970,6 +970,93 @@ func (t *TableWritingTestSuite) TestExpireSnapshots() {
        t.Require().Equal(2, len(slices.Collect(tbl.Metadata().SnapshotLogs())))
 }
 
+func (t *TableWritingTestSuite) TestExpireSnapshotsWithMissingParent() {
+       // This test validates the fix for handling missing parent snapshots.
+       // After expiring snapshots, remaining snapshots may have 
parent-snapshot-id
+       // references pointing to snapshots that no longer exist. 
ExpireSnapshots should
+       // treat missing parents as the end of the chain rather than returning 
an error.
+
+       fs := iceio.LocalFS{}
+
+       files := make([]string, 0)
+       for i := range 5 {
+               filePath := 
fmt.Sprintf("%s/expire_with_missing_parent_v%d/data-%d.parquet", t.location, 
t.formatVersion, i)
+               t.writeParquet(fs, filePath, t.arrTablePromotedTypes)
+               files = append(files, filePath)
+       }
+
+       ident := table.Identifier{"default", "expire_with_missing_parent_v" + 
strconv.Itoa(t.formatVersion)}
+       meta, err := table.NewMetadata(t.tableSchemaPromotedTypes, 
iceberg.UnpartitionedSpec,
+               table.UnsortedSortOrder, t.location, 
iceberg.Properties{table.PropertyFormatVersion: strconv.Itoa(t.formatVersion)})
+       t.Require().NoError(err)
+
+       ctx := context.Background()
+
+       tbl := table.New(
+               ident,
+               meta,
+               t.getMetadataLoc(),
+               func(ctx context.Context) (iceio.IO, error) {
+                       return fs, nil
+               },
+               &mockedCatalog{meta},
+       )
+
+       // Create 5 snapshots, each one with a parent pointing to the previous
+       for i := range 5 {
+               tx := tbl.NewTransaction()
+               t.Require().NoError(tx.AddFiles(ctx, files[i:i+1], nil, false))
+               tbl, err = tx.Commit(ctx)
+               t.Require().NoError(err)
+       }
+
+       t.Require().Equal(5, len(tbl.Metadata().Snapshots()))
+
+       // Get the snapshot IDs before expiration
+       snapshotsBeforeExpire := tbl.Metadata().Snapshots()
+       snapshot3ID := snapshotsBeforeExpire[2].SnapshotID
+       snapshot4ID := snapshotsBeforeExpire[3].SnapshotID
+
+       // Expire the first 3 snapshots, keeping only the last 2
+       tx := tbl.NewTransaction()
+       t.Require().NoError(tx.ExpireSnapshots(table.WithOlderThan(0), 
table.WithRetainLast(2)))
+       tbl, err = tx.Commit(ctx)
+       t.Require().NoError(err)
+       t.Require().Equal(2, len(tbl.Metadata().Snapshots()))
+
+       // Verify that the 4th snapshot's parent (snapshot 3) is no longer in 
the metadata
+       remainingSnapshots := tbl.Metadata().Snapshots()
+       var snapshot4 *table.Snapshot
+       for i := range remainingSnapshots {
+               if remainingSnapshots[i].SnapshotID == snapshot4ID {
+                       snapshot4 = &remainingSnapshots[i]
+
+                       break
+               }
+       }
+       t.Require().NotNil(snapshot4, "snapshot 4 should still exist")
+       t.Require().NotNil(snapshot4.ParentSnapshotID, "snapshot 4 should have 
a parent ID")
+       t.Require().Equal(snapshot3ID, *snapshot4.ParentSnapshotID, "snapshot 
4's parent should be snapshot 3")
+
+       // Verify snapshot 3 is no longer in the metadata
+       t.Nil(tbl.Metadata().SnapshotByID(snapshot3ID), "snapshot 3 should have 
been removed")
+
+       // At this point, the 4th snapshot has a parent-snapshot-id pointing to
+       // the 3rd snapshot which no longer exists. Try to expire again - this
+       // should not fail even though the parent is missing. Use 
WithRetainLast(2)
+       // to force walking the full parent chain.
+       tx = tbl.NewTransaction()
+       // This should succeed without error despite the missing parent.
+       // WithRetainLast(2) will cause it to walk back through snapshot 4's 
parent chain,
+       // encountering the missing snapshot 3.
+       err = tx.ExpireSnapshots(table.WithOlderThan(0), 
table.WithRetainLast(2))
+       t.Require().NoError(err, "ExpireSnapshots should handle missing parent 
gracefully")
+
+       tbl, err = tx.Commit(ctx)
+       t.Require().NoError(err)
+       t.Require().Equal(2, len(tbl.Metadata().Snapshots()), "should still 
have 2 snapshots")
+}
+
 // validatingCatalog validates requirements before applying updates,
 // simulating real catalog behavior for concurrent modification tests.
 type validatingCatalog struct {
@@ -1042,6 +1129,7 @@ func (t *TableWritingTestSuite) 
TestExpireSnapshotsRejectsOnRefRollback() {
                tbl, err = tx.Commit(ctx)
                t.Require().NoError(err)
        }
+
        t.Require().Equal(5, len(tbl.Metadata().Snapshots()))
 
        // Get snapshot IDs for later use
diff --git a/table/transaction.go b/table/transaction.go
index 68961bf3..f9deaa62 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -259,7 +259,9 @@ func (t *Transaction) ExpireSnapshots(opts 
...ExpireSnapshotsOpt) error {
                for {
                        snap, err := t.meta.SnapshotByID(snapId)
                        if err != nil {
-                               return err
+                               // Parent snapshot may have been removed by a 
previous expiration.
+                               // Treat missing parent as end of chain - this 
is expected behavior.
+                               break
                        }
 
                        snapAge := time.Now().UnixMilli() - snap.TimestampMs

Reply via email to