emkornfield commented on code in PR #1876:
URL: https://github.com/apache/iceberg-rust/pull/1876#discussion_r2621827887


##########
crates/iceberg/src/catalog/metadata_location.rs:
##########
@@ -15,38 +15,91 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
 use std::fmt::Display;
 use std::str::FromStr;
 
 use uuid::Uuid;
 
+use crate::spec::TableProperties;
 use crate::{Error, ErrorKind, Result};
 
 /// Helper for parsing a location of the format: 
`<location>/metadata/<version>-<uuid>.metadata.json`
+/// or with compression: 
`<location>/metadata/<version>-<uuid>.gz.metadata.json`
 #[derive(Clone, Debug, PartialEq)]
 pub struct MetadataLocation {
     table_location: String,
     version: i32,
     id: Uuid,
+    compression_suffix: Option<String>,
 }
 
 impl MetadataLocation {
+    /// Determines the compression suffix from table properties.
+    fn compression_suffix_from_properties(properties: &HashMap<String, 
String>) -> Option<String> {
+        properties
+            .get(TableProperties::PROPERTY_METADATA_COMPRESSION_CODEC)
+            .and_then(|codec| match codec.to_lowercase().as_str() {
+                "gzip" => Some(".gz".to_string()),
+                "none" | "" => None,
+                _ => None,
+            })
+    }
+
     /// Creates a completely new metadata location starting at version 0.
     /// Only used for creating a new table. For updates, see 
`with_next_version`.
+    #[deprecated(
+        since = "0.8.0",
+        note = "Use new_with_properties instead to properly handle compression 
settings"
+    )]
     pub fn new_with_table_location(table_location: impl ToString) -> Self {
         Self {
             table_location: table_location.to_string(),
             version: 0,
             id: Uuid::new_v4(),
+            compression_suffix: None,
+        }
+    }
+
+    /// Creates a completely new metadata location starting at version 0,
+    /// with compression settings from the table's properties.
+    /// Only used for creating a new table. For updates, see 
`with_next_version`.
+    pub fn new_with_properties(
+        table_location: impl ToString,
+        properties: &HashMap<String, String>,
+    ) -> Self {
+        Self {
+            table_location: table_location.to_string(),
+            version: 0,
+            id: Uuid::new_v4(),
+            compression_suffix: 
Self::compression_suffix_from_properties(properties),
         }
     }
 
     /// Creates a new metadata location for an updated metadata file.
+    /// Preserves the compression settings from the current location.
+    #[deprecated(
+        since = "0.8.0",
+        note = "Use with_next_version_and_properties instead to properly 
handle compression settings changes"
+    )]
     pub fn with_next_version(&self) -> Self {
         Self {
             table_location: self.table_location.clone(),
             version: self.version + 1,
             id: Uuid::new_v4(),
+            compression_suffix: self.compression_suffix.clone(),
+        }
+    }
+
+    /// Creates a new metadata location for an updated metadata file.
+    /// Takes table properties to determine compression settings, which may 
have changed
+    /// from the previous version.
+    pub fn with_next_version_and_properties(&self, properties: 
&HashMap<String, String>) -> Self {

Review Comment:
   > If properties has been changes, we should build a new instance of this 
MetadataLocation struct.
   
   My main concern here, is I don't think this is something that clients of 
MetadataLocation would actually understand and check for a new version of the 
metadata (i.e. the path of least resistance is to call `with_next_version` on 
whatever metadata they used for creation.  As a solution, I've created a static 
factory `next_version` which combines the two steps for the only call-site that 
exists for `with_next_version`.  If you think it is better to not deprecated 
`with_next_version` I can also revert this change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to