liurenjie1024 commented on code in PR #29:
URL: https://github.com/apache/iceberg-rust/pull/29#discussion_r1297955543


##########
crates/iceberg/src/spec/snapshot.rs:
##########
@@ -0,0 +1,343 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/*!
+ * Snapshots
+*/
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use super::table_metadata::SnapshotLog;
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[serde(rename_all = "lowercase")]
+/// The operation field is used by some operations, like snapshot expiration, 
to skip processing certain snapshots.
+pub enum Operation {
+    /// Only data files were added and no files were removed.
+    Append,
+    /// Data and delete files were added and removed without changing table 
data;
+    /// i.e., compaction, changing the data file format, or relocating data 
files.
+    Replace,
+    /// Data and delete files were added and removed in a logical overwrite 
operation.
+    Overwrite,
+    /// Data files were removed and their contents logically deleted and/or 
delete files were added to delete rows.
+    Delete,
+}
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+/// Summarises the changes in the snapshot.
+pub struct Summary {
+    /// The type of operation in the snapshot
+    pub operation: Operation,
+    /// Other summary data.
+    #[serde(flatten)]
+    pub other: HashMap<String, String>,
+}
+
+impl Default for Operation {
+    fn default() -> Operation {
+        Self::Append
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Builder)]
+#[builder(setter(prefix = "with"))]
+/// A snapshot represents the state of a table at some time and is used to 
access the complete set of data files in the table.
+pub struct Snapshot {
+    /// A unique long ID
+    snapshot_id: i64,
+    /// The snapshot ID of the snapshot’s parent.
+    /// Omitted for any snapshot with no parent
+    #[builder(default = "None")]
+    parent_snapshot_id: Option<i64>,
+    /// A monotonically increasing long that tracks the order of
+    /// changes to a table.
+    sequence_number: i64,
+    /// A timestamp when the snapshot was created, used for garbage
+    /// collection and table inspection
+    timestamp_ms: i64,
+    /// The location of a manifest list for this snapshot that
+    /// tracks manifest files with additional metadata.
+    manifest_list: ManifestList,
+    /// A string map that summarizes the snapshot changes, including operation.
+    summary: Summary,
+    /// ID of the table’s current schema when the snapshot was created.
+    #[builder(setter(strip_option))]
+    schema_id: Option<i64>,
+}
+
+/// Type to distinguish between a path to a manifestlist file or a vector of 
manifestfile locations
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[serde(untagged)]
+pub enum ManifestList {
+    /// Location of manifestlist file
+    ManifestListFile(String),
+    /// Manifestfile locations
+    ManifestFiles(Vec<String>),
+}
+
+impl Snapshot {
+    /// Get the id of the snapshot
+    #[inline]
+    pub fn snapshot_id(&self) -> i64 {
+        self.snapshot_id
+    }
+    /// Get sequence_number of the snapshot. Is 0 for Iceberg V1 tables.
+    #[inline]
+    pub fn sequence_number(&self) -> i64 {
+        self.sequence_number
+    }
+    /// Get location of manifest_list file
+    #[inline]
+    pub fn manifest_list(&self) -> &ManifestList {
+        &self.manifest_list
+    }
+    /// Get summary of the snapshot
+    #[inline]
+    pub fn summary(&self) -> &Summary {
+        &self.summary
+    }
+    /// Get the timestamp of when the snapshot was created
+    #[inline]
+    pub fn timestamp(&self) -> i64 {
+        self.timestamp_ms
+    }
+    /// Create snapshot builder
+    pub fn builder() -> SnapshotBuilder {
+        SnapshotBuilder::default()
+    }
+
+    pub(crate) fn log(&self) -> SnapshotLog {
+        SnapshotLog {
+            timestamp_ms: self.timestamp_ms,
+            snapshot_id: self.snapshot_id,
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "kebab-case")]
+/// A snapshot represents the state of a table at some time and is used to 
access the complete set of data files in the table.
+pub(crate) struct SnapshotV2 {
+    /// A unique long ID
+    pub snapshot_id: i64,
+    /// The snapshot ID of the snapshot’s parent.
+    /// Omitted for any snapshot with no parent
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parent_snapshot_id: Option<i64>,
+    /// A monotonically increasing long that tracks the order of
+    /// changes to a table.
+    pub sequence_number: i64,
+    /// A timestamp when the snapshot was created, used for garbage
+    /// collection and table inspection
+    pub timestamp_ms: i64,
+    /// The location of a manifest list for this snapshot that
+    /// tracks manifest files with additional metadata.
+    pub manifest_list: String,
+    /// A string map that summarizes the snapshot changes, including operation.
+    pub summary: Summary,
+    /// ID of the table’s current schema when the snapshot was created.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub schema_id: Option<i64>,
+}
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "kebab-case")]
+/// A snapshot represents the state of a table at some time and is used to 
access the complete set of data files in the table.
+pub(crate) struct SnapshotV1 {
+    /// A unique long ID
+    pub snapshot_id: i64,
+    /// The snapshot ID of the snapshot’s parent.
+    /// Omitted for any snapshot with no parent
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parent_snapshot_id: Option<i64>,
+    /// A timestamp when the snapshot was created, used for garbage
+    /// collection and table inspection
+    pub timestamp_ms: i64,
+    /// The location of a manifest list for this snapshot that
+    /// tracks manifest files with additional metadata.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub manifest_list: Option<String>,
+    /// A list of manifest file locations. Must be omitted if manifest-list is 
present
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub manifests: Option<Vec<String>>,
+    /// A string map that summarizes the snapshot changes, including operation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<Summary>,
+    /// ID of the table’s current schema when the snapshot was created.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub schema_id: Option<i64>,
+}
+
+impl From<SnapshotV2> for Snapshot {
+    fn from(v2: SnapshotV2) -> Self {
+        Snapshot {
+            snapshot_id: v2.snapshot_id,
+            parent_snapshot_id: v2.parent_snapshot_id,
+            sequence_number: v2.sequence_number,
+            timestamp_ms: v2.timestamp_ms,
+            manifest_list: ManifestList::ManifestListFile(v2.manifest_list),
+            summary: v2.summary,
+            schema_id: v2.schema_id,
+        }
+    }
+}
+
+impl From<Snapshot> for SnapshotV2 {
+    fn from(v2: Snapshot) -> Self {
+        SnapshotV2 {
+            snapshot_id: v2.snapshot_id,
+            parent_snapshot_id: v2.parent_snapshot_id,
+            sequence_number: v2.sequence_number,
+            timestamp_ms: v2.timestamp_ms,
+            manifest_list: match v2.manifest_list {

Review Comment:
   The spec says it's required in v2?
   <img width="1044" alt="image" 
src="https://github.com/apache/iceberg-rust/assets/2771941/8a35a3de-3201-49f1-bc26-765e9fe81160";>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to