This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 026356be8f Include footer key metadata when writing encrypted Parquet 
with a plaintext footer (#7600)
026356be8f is described below

commit 026356be8f2291f6985b45217ba681a4b3d3f204
Author: Rok Mihevc <[email protected]>
AuthorDate: Fri Jun 6 17:24:06 2025 +0200

    Include footer key metadata when writing encrypted Parquet with a plaintext 
footer (#7600)
    
    # Which issue does this PR close?
    
    Closes #7599.
    
    # Rationale for this change
    
    Written plaintext footer file will not include
    `footer_signing_key_metadata`, see proposed test for reproduction.
    Written encrypted non-plaintext footer files shouldn't include
    `encryption_algorithm`, see proposed test for reproduction.
    
    # What changes are included in this PR?
    
    `footer_signing_key_metadata` is now included in plaintext footer file
    and `encryption_algorithm` is not included in the footer if footer is
    non-plaintext.
    
    # Are there any user-facing changes?
    
    This doesn't change user facing API.
---
 parquet/src/file/metadata/writer.rs    | 30 ++++++++----
 parquet/tests/encryption/encryption.rs | 84 ++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/parquet/src/file/metadata/writer.rs 
b/parquet/src/file/metadata/writer.rs
index a01ad5d881..0320d1e474 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -140,11 +140,12 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
         // in any Statistics or ColumnIndex object in the whole file.
         // But for simplicity we always set this field.
         let column_orders = Some(column_orders);
-
         let (row_groups, unencrypted_row_groups) = self
             .object_writer
             .apply_row_group_encryption(self.row_groups)?;
 
+        let (encryption_algorithm, footer_signing_key_metadata) =
+            self.object_writer.get_plaintext_footer_crypto_metadata();
         let mut file_metadata = FileMetaData {
             num_rows,
             row_groups,
@@ -153,8 +154,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
             schema: types::to_thrift(self.schema.as_ref())?,
             created_by: self.created_by.clone(),
             column_orders,
-            encryption_algorithm: 
self.object_writer.get_footer_encryption_algorithm(),
-            footer_signing_key_metadata: None,
+            encryption_algorithm,
+            footer_signing_key_metadata,
         };
 
         // Write file metadata
@@ -479,8 +480,10 @@ impl MetadataObjectWriter {
         get_file_magic()
     }
 
-    fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
-        None
+    fn get_plaintext_footer_crypto_metadata(
+        &self,
+    ) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
+        (None, None)
     }
 }
 
@@ -635,11 +638,20 @@ impl MetadataObjectWriter {
         }
     }
 
-    fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
-        if let Some(file_encryptor) = &self.file_encryptor {
-            return 
Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
+    fn get_plaintext_footer_crypto_metadata(
+        &self,
+    ) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
+        // Only plaintext footers may contain encryption algorithm and footer 
key metadata.
+        if let Some(file_encryptor) = self.file_encryptor.as_ref() {
+            let encryption_properties = file_encryptor.properties();
+            if !encryption_properties.encrypt_footer() {
+                return (
+                    
Some(Self::encryption_algorithm_from_encryptor(file_encryptor)),
+                    encryption_properties.footer_key_metadata().cloned(),
+                );
+            }
         }
-        None
+        (None, None)
     }
 
     fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> 
EncryptionAlgorithm {
diff --git a/parquet/tests/encryption/encryption.rs 
b/parquet/tests/encryption/encryption.rs
index 134e3383b3..a46794a85f 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -256,6 +256,90 @@ fn 
test_non_uniform_encryption_plaintext_footer_with_key_retriever() {
     verify_encryption_test_file_read(file, decryption_properties);
 }
 
+#[test]
+fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
+    let test_data = arrow::util::test_util::parquet_test_data();
+
+    // Read example data with key retriever
+    let path = 
format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted");
+    let file = File::open(path).unwrap();
+
+    let key_retriever = Arc::new(
+        TestKeyRetriever::new()
+            .with_key("kf".to_owned(), b"0123456789012345".to_vec())
+            .with_key("kc1".to_owned(), b"1234567890123450".to_vec())
+            .with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
+    );
+
+    let decryption_properties = 
FileDecryptionProperties::with_key_retriever(key_retriever.clone())
+        .build()
+        .unwrap();
+
+    let options = ArrowReaderOptions::default()
+        .with_file_decryption_properties(decryption_properties.clone());
+    let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
+
+    // Write data into temporary file with plaintext footer and footer key 
metadata
+    let temp_file = tempfile::tempfile().unwrap();
+    let encryption_properties = 
FileEncryptionProperties::builder(b"0123456789012345".to_vec())
+        .with_footer_key_metadata("kf".into())
+        .with_column_key_and_metadata("double_field", 
b"1234567890123450".to_vec(), b"kc1".into())
+        .with_column_key_and_metadata("float_field", 
b"1234567890123451".to_vec(), b"kc2".into())
+        .with_plaintext_footer(true)
+        .build()
+        .unwrap();
+
+    let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options).unwrap();
+    let batch_reader = builder.build().unwrap();
+    let batches = batch_reader
+        .collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
+        .unwrap();
+
+    let props = WriterProperties::builder()
+        .with_file_encryption_properties(encryption_properties)
+        .build();
+
+    let mut writer = ArrowWriter::try_new(
+        temp_file.try_clone().unwrap(),
+        metadata.schema().clone(),
+        Some(props),
+    )
+    .unwrap();
+    for batch in batches {
+        writer.write(&batch).unwrap();
+    }
+
+    writer.close().unwrap();
+
+    // Read temporary file with plaintext metadata using key retriever
+    let decryption_properties = 
FileDecryptionProperties::with_key_retriever(key_retriever)
+        .build()
+        .unwrap();
+
+    let options = ArrowReaderOptions::default()
+        .with_file_decryption_properties(decryption_properties.clone());
+    let _ = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
+
+    // Read temporary file with plaintext metadata using key retriever with 
invalid key
+    let key_retriever = Arc::new(
+        TestKeyRetriever::new()
+            .with_key("kf".to_owned(), b"0133756789012345".to_vec())
+            .with_key("kc1".to_owned(), b"1234567890123450".to_vec())
+            .with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
+    );
+    let decryption_properties = 
FileDecryptionProperties::with_key_retriever(key_retriever)
+        .build()
+        .unwrap();
+    let options = ArrowReaderOptions::default()
+        .with_file_decryption_properties(decryption_properties.clone());
+    let result = ArrowReaderMetadata::load(&temp_file, options.clone());
+    assert!(result.is_err());
+    assert!(result
+        .unwrap_err()
+        .to_string()
+        .starts_with("Parquet error: Footer signature verification failed. 
Computed: ["));
+}
+
 #[test]
 fn test_non_uniform_encryption_with_key_retriever() {
     let test_data = arrow::util::test_util::parquet_test_data();

Reply via email to