This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new d76bb5ed1 fix: allow v2 to v3 table upgrades with existing snapshots 
(#2010)
d76bb5ed1 is described below

commit d76bb5ed124e65e42bdc488e5028cb9d20366aa5
Author: Aditya Subrahmanyan <[email protected]>
AuthorDate: Fri Jan 16 16:08:16 2026 -0800

    fix: allow v2 to v3 table upgrades with existing snapshots (#2010)
---
 crates/iceberg/src/spec/snapshot.rs               |  33 ++--
 crates/iceberg/src/spec/table_metadata.rs         | 191 ++++++++++++++++++++++
 crates/iceberg/src/spec/table_metadata_builder.rs |   2 +
 3 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/crates/iceberg/src/spec/snapshot.rs 
b/crates/iceberg/src/spec/snapshot.rs
index 270279988..802cd6546 100644
--- a/crates/iceberg/src/spec/snapshot.rs
+++ b/crates/iceberg/src/spec/snapshot.rs
@@ -272,7 +272,7 @@ pub(super) mod _serde {
 
     #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
     #[serde(rename_all = "kebab-case")]
-    /// Defines the structure of a v2 snapshot for 
serialization/deserialization
+    /// Defines the structure of a v3 snapshot for 
serialization/deserialization
     pub(crate) struct SnapshotV3 {
         pub snapshot_id: i64,
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -283,8 +283,10 @@ pub(super) mod _serde {
         pub summary: Summary,
         #[serde(skip_serializing_if = "Option::is_none")]
         pub schema_id: Option<SchemaId>,
-        pub first_row_id: u64,
-        pub added_rows: u64,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub first_row_id: Option<u64>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub added_rows: Option<u64>,
         #[serde(skip_serializing_if = "Option::is_none")]
         pub key_id: Option<String>,
     }
@@ -333,10 +335,13 @@ pub(super) mod _serde {
                 summary: s.summary,
                 schema_id: s.schema_id,
                 encryption_key_id: s.key_id,
-                row_range: Some(SnapshotRowRange {
-                    first_row_id: s.first_row_id,
-                    added_rows: s.added_rows,
-                }),
+                row_range: match (s.first_row_id, s.added_rows) {
+                    (Some(first_row_id), Some(added_rows)) => 
Some(SnapshotRowRange {
+                        first_row_id,
+                        added_rows,
+                    }),
+                    _ => None,
+                },
             }
         }
     }
@@ -345,12 +350,10 @@ pub(super) mod _serde {
         type Error = Error;
 
         fn try_from(s: Snapshot) -> Result<Self, Self::Error> {
-            let row_range = s.row_range.ok_or_else(|| {
-                Error::new(
-                    crate::ErrorKind::DataInvalid,
-                    "v3 Snapshots must have first-row-id and rows-added fields 
set.".to_string(),
-                )
-            })?;
+            let (first_row_id, added_rows) = match s.row_range {
+                Some(row_range) => (Some(row_range.first_row_id), 
Some(row_range.added_rows)),
+                None => (None, None),
+            };
 
             Ok(SnapshotV3 {
                 snapshot_id: s.snapshot_id,
@@ -360,8 +363,8 @@ pub(super) mod _serde {
                 manifest_list: s.manifest_list,
                 summary: s.summary,
                 schema_id: s.schema_id,
-                first_row_id: row_range.first_row_id,
-                added_rows: row_range.added_rows,
+                first_row_id,
+                added_rows,
                 key_id: s.encryption_key_id,
             })
         }
diff --git a/crates/iceberg/src/spec/table_metadata.rs 
b/crates/iceberg/src/spec/table_metadata.rs
index 585cb3e2b..d3836b2f4 100644
--- a/crates/iceberg/src/spec/table_metadata.rs
+++ b/crates/iceberg/src/spec/table_metadata.rs
@@ -3978,4 +3978,195 @@ mod tests {
         assert_eq!(err.kind(), ErrorKind::DataInvalid);
         assert!(err.message().contains("Invalid table properties"));
     }
+
+    #[test]
+    fn 
test_v2_to_v3_upgrade_preserves_existing_snapshots_without_row_lineage() {
+        // Create a v2 table metadata
+        let schema = Schema::builder()
+            .with_fields(vec![
+                NestedField::required(1, "id", 
Type::Primitive(PrimitiveType::Long)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let v2_metadata = TableMetadataBuilder::new(
+            schema,
+            PartitionSpec::unpartition_spec().into_unbound(),
+            SortOrder::unsorted_order(),
+            "s3://bucket/test/location".to_string(),
+            FormatVersion::V2,
+            HashMap::new(),
+        )
+        .unwrap()
+        .build()
+        .unwrap()
+        .metadata;
+
+        // Add a v2 snapshot
+        let snapshot = Snapshot::builder()
+            .with_snapshot_id(1)
+            .with_timestamp_ms(v2_metadata.last_updated_ms + 1)
+            .with_sequence_number(1)
+            .with_schema_id(0)
+            .with_manifest_list("s3://bucket/test/metadata/snap-1.avro")
+            .with_summary(Summary {
+                operation: Operation::Append,
+                additional_properties: HashMap::from([(
+                    "added-data-files".to_string(),
+                    "1".to_string(),
+                )]),
+            })
+            .build();
+
+        let v2_with_snapshot = v2_metadata
+            
.into_builder(Some("s3://bucket/test/metadata/v00001.json".to_string()))
+            .add_snapshot(snapshot)
+            .unwrap()
+            .set_ref("main", SnapshotReference {
+                snapshot_id: 1,
+                retention: SnapshotRetention::Branch {
+                    min_snapshots_to_keep: None,
+                    max_snapshot_age_ms: None,
+                    max_ref_age_ms: None,
+                },
+            })
+            .unwrap()
+            .build()
+            .unwrap()
+            .metadata;
+
+        // Verify v2 serialization works fine
+        let v2_json = serde_json::to_string(&v2_with_snapshot);
+        assert!(v2_json.is_ok(), "v2 serialization should work");
+
+        // Upgrade to v3
+        let v3_metadata = v2_with_snapshot
+            
.into_builder(Some("s3://bucket/test/metadata/v00002.json".to_string()))
+            .upgrade_format_version(FormatVersion::V3)
+            .unwrap()
+            .build()
+            .unwrap()
+            .metadata;
+
+        assert_eq!(v3_metadata.format_version, FormatVersion::V3);
+        assert_eq!(v3_metadata.next_row_id, INITIAL_ROW_ID);
+        assert_eq!(v3_metadata.snapshots.len(), 1);
+
+        // Verify the snapshot has no row_range
+        let snapshot = v3_metadata.snapshots.values().next().unwrap();
+        assert!(
+            snapshot.row_range().is_none(),
+            "Snapshot should have no row_range after upgrade"
+        );
+
+        // Try to serialize v3 metadata - this should now work
+        let v3_json = serde_json::to_string(&v3_metadata);
+        assert!(
+            v3_json.is_ok(),
+            "v3 serialization should work for upgraded tables"
+        );
+
+        // Verify we can deserialize it back
+        let deserialized: TableMetadata = 
serde_json::from_str(&v3_json.unwrap()).unwrap();
+        assert_eq!(deserialized.format_version, FormatVersion::V3);
+        assert_eq!(deserialized.snapshots.len(), 1);
+
+        // Verify the deserialized snapshot still has no row_range
+        let deserialized_snapshot = 
deserialized.snapshots.values().next().unwrap();
+        assert!(
+            deserialized_snapshot.row_range().is_none(),
+            "Deserialized snapshot should have no row_range"
+        );
+    }
+
+    #[test]
+    fn test_v3_snapshot_with_row_lineage_serialization() {
+        // Create a v3 table metadata
+        let schema = Schema::builder()
+            .with_fields(vec![
+                NestedField::required(1, "id", 
Type::Primitive(PrimitiveType::Long)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let v3_metadata = TableMetadataBuilder::new(
+            schema,
+            PartitionSpec::unpartition_spec().into_unbound(),
+            SortOrder::unsorted_order(),
+            "s3://bucket/test/location".to_string(),
+            FormatVersion::V3,
+            HashMap::new(),
+        )
+        .unwrap()
+        .build()
+        .unwrap()
+        .metadata;
+
+        // Add a v3 snapshot with row lineage
+        let snapshot = Snapshot::builder()
+            .with_snapshot_id(1)
+            .with_timestamp_ms(v3_metadata.last_updated_ms + 1)
+            .with_sequence_number(1)
+            .with_schema_id(0)
+            .with_manifest_list("s3://bucket/test/metadata/snap-1.avro")
+            .with_summary(Summary {
+                operation: Operation::Append,
+                additional_properties: HashMap::from([(
+                    "added-data-files".to_string(),
+                    "1".to_string(),
+                )]),
+            })
+            .with_row_range(100, 50) // first_row_id=100, added_rows=50
+            .build();
+
+        let v3_with_snapshot = v3_metadata
+            
.into_builder(Some("s3://bucket/test/metadata/v00001.json".to_string()))
+            .add_snapshot(snapshot)
+            .unwrap()
+            .set_ref("main", SnapshotReference {
+                snapshot_id: 1,
+                retention: SnapshotRetention::Branch {
+                    min_snapshots_to_keep: None,
+                    max_snapshot_age_ms: None,
+                    max_ref_age_ms: None,
+                },
+            })
+            .unwrap()
+            .build()
+            .unwrap()
+            .metadata;
+
+        // Verify the snapshot has row_range
+        let snapshot = v3_with_snapshot.snapshots.values().next().unwrap();
+        assert!(
+            snapshot.row_range().is_some(),
+            "Snapshot should have row_range"
+        );
+        let (first_row_id, added_rows) = snapshot.row_range().unwrap();
+        assert_eq!(first_row_id, 100);
+        assert_eq!(added_rows, 50);
+
+        // Serialize v3 metadata - this should work
+        let v3_json = serde_json::to_string(&v3_with_snapshot);
+        assert!(
+            v3_json.is_ok(),
+            "v3 serialization should work for snapshots with row lineage"
+        );
+
+        // Verify we can deserialize it back
+        let deserialized: TableMetadata = 
serde_json::from_str(&v3_json.unwrap()).unwrap();
+        assert_eq!(deserialized.format_version, FormatVersion::V3);
+        assert_eq!(deserialized.snapshots.len(), 1);
+
+        // Verify the deserialized snapshot has the correct row_range
+        let deserialized_snapshot = 
deserialized.snapshots.values().next().unwrap();
+        assert!(
+            deserialized_snapshot.row_range().is_some(),
+            "Deserialized snapshot should have row_range"
+        );
+        let (deserialized_first_row_id, deserialized_added_rows) =
+            deserialized_snapshot.row_range().unwrap();
+        assert_eq!(deserialized_first_row_id, 100);
+        assert_eq!(deserialized_added_rows, 50);
+    }
 }
diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs 
b/crates/iceberg/src/spec/table_metadata_builder.rs
index 3db327d48..45d1ccefc 100644
--- a/crates/iceberg/src/spec/table_metadata_builder.rs
+++ b/crates/iceberg/src/spec/table_metadata_builder.rs
@@ -233,6 +233,8 @@ impl TableMetadataBuilder {
                 }
                 FormatVersion::V3 => {
                     self.metadata.format_version = format_version;
+                    // Set next-row-id to 0 when upgrading to v3 as per 
Iceberg spec
+                    self.metadata.next_row_id = INITIAL_ROW_ID;
                     self.changes
                         .push(TableUpdate::UpgradeFormatVersion { 
format_version });
                 }

Reply via email to