This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 026356be8f Include footer key metadata when writing encrypted Parquet
with a plaintext footer (#7600)
026356be8f is described below
commit 026356be8f2291f6985b45217ba681a4b3d3f204
Author: Rok Mihevc <[email protected]>
AuthorDate: Fri Jun 6 17:24:06 2025 +0200
Include footer key metadata when writing encrypted Parquet with a plaintext
footer (#7600)
# Which issue does this PR close?
Closes #7599.
# Rationale for this change
Written plaintext footer file will not include
`footer_signing_key_metadata`, see proposed test for reproduction.
Written encrypted non-plaintext footer files shouldn't include
`encryption_algorithm`, see proposed test for reproduction.
# What changes are included in this PR?
`footer_signing_key_metadata` is now included in plaintext footer file
and `encryption_algorithm` is not included in the footer if footer is
non-plaintext.
# Are there any user-facing changes?
This doesn't change user facing API.
---
parquet/src/file/metadata/writer.rs | 30 ++++++++----
parquet/tests/encryption/encryption.rs | 84 ++++++++++++++++++++++++++++++++++
2 files changed, 105 insertions(+), 9 deletions(-)
diff --git a/parquet/src/file/metadata/writer.rs
b/parquet/src/file/metadata/writer.rs
index a01ad5d881..0320d1e474 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -140,11 +140,12 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
// in any Statistics or ColumnIndex object in the whole file.
// But for simplicity we always set this field.
let column_orders = Some(column_orders);
-
let (row_groups, unencrypted_row_groups) = self
.object_writer
.apply_row_group_encryption(self.row_groups)?;
+ let (encryption_algorithm, footer_signing_key_metadata) =
+ self.object_writer.get_plaintext_footer_crypto_metadata();
let mut file_metadata = FileMetaData {
num_rows,
row_groups,
@@ -153,8 +154,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
schema: types::to_thrift(self.schema.as_ref())?,
created_by: self.created_by.clone(),
column_orders,
- encryption_algorithm:
self.object_writer.get_footer_encryption_algorithm(),
- footer_signing_key_metadata: None,
+ encryption_algorithm,
+ footer_signing_key_metadata,
};
// Write file metadata
@@ -479,8 +480,10 @@ impl MetadataObjectWriter {
get_file_magic()
}
- fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
- None
+ fn get_plaintext_footer_crypto_metadata(
+ &self,
+ ) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
+ (None, None)
}
}
@@ -635,11 +638,20 @@ impl MetadataObjectWriter {
}
}
- fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
- if let Some(file_encryptor) = &self.file_encryptor {
- return
Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
+ fn get_plaintext_footer_crypto_metadata(
+ &self,
+ ) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
+ // Only plaintext footers may contain encryption algorithm and footer
key metadata.
+ if let Some(file_encryptor) = self.file_encryptor.as_ref() {
+ let encryption_properties = file_encryptor.properties();
+ if !encryption_properties.encrypt_footer() {
+ return (
+
Some(Self::encryption_algorithm_from_encryptor(file_encryptor)),
+ encryption_properties.footer_key_metadata().cloned(),
+ );
+ }
}
- None
+ (None, None)
}
fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) ->
EncryptionAlgorithm {
diff --git a/parquet/tests/encryption/encryption.rs
b/parquet/tests/encryption/encryption.rs
index 134e3383b3..a46794a85f 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -256,6 +256,90 @@ fn
test_non_uniform_encryption_plaintext_footer_with_key_retriever() {
verify_encryption_test_file_read(file, decryption_properties);
}
+#[test]
+fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
+ let test_data = arrow::util::test_util::parquet_test_data();
+
+ // Read example data with key retriever
+ let path =
format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted");
+ let file = File::open(path).unwrap();
+
+ let key_retriever = Arc::new(
+ TestKeyRetriever::new()
+ .with_key("kf".to_owned(), b"0123456789012345".to_vec())
+ .with_key("kc1".to_owned(), b"1234567890123450".to_vec())
+ .with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
+ );
+
+ let decryption_properties =
FileDecryptionProperties::with_key_retriever(key_retriever.clone())
+ .build()
+ .unwrap();
+
+ let options = ArrowReaderOptions::default()
+ .with_file_decryption_properties(decryption_properties.clone());
+ let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
+
+ // Write data into temporary file with plaintext footer and footer key
metadata
+ let temp_file = tempfile::tempfile().unwrap();
+ let encryption_properties =
FileEncryptionProperties::builder(b"0123456789012345".to_vec())
+ .with_footer_key_metadata("kf".into())
+ .with_column_key_and_metadata("double_field",
b"1234567890123450".to_vec(), b"kc1".into())
+ .with_column_key_and_metadata("float_field",
b"1234567890123451".to_vec(), b"kc2".into())
+ .with_plaintext_footer(true)
+ .build()
+ .unwrap();
+
+ let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file,
options).unwrap();
+ let batch_reader = builder.build().unwrap();
+ let batches = batch_reader
+ .collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
+ .unwrap();
+
+ let props = WriterProperties::builder()
+ .with_file_encryption_properties(encryption_properties)
+ .build();
+
+ let mut writer = ArrowWriter::try_new(
+ temp_file.try_clone().unwrap(),
+ metadata.schema().clone(),
+ Some(props),
+ )
+ .unwrap();
+ for batch in batches {
+ writer.write(&batch).unwrap();
+ }
+
+ writer.close().unwrap();
+
+ // Read temporary file with plaintext metadata using key retriever
+ let decryption_properties =
FileDecryptionProperties::with_key_retriever(key_retriever)
+ .build()
+ .unwrap();
+
+ let options = ArrowReaderOptions::default()
+ .with_file_decryption_properties(decryption_properties.clone());
+ let _ = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
+
+ // Read temporary file with plaintext metadata using key retriever with
invalid key
+ let key_retriever = Arc::new(
+ TestKeyRetriever::new()
+ .with_key("kf".to_owned(), b"0133756789012345".to_vec())
+ .with_key("kc1".to_owned(), b"1234567890123450".to_vec())
+ .with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
+ );
+ let decryption_properties =
FileDecryptionProperties::with_key_retriever(key_retriever)
+ .build()
+ .unwrap();
+ let options = ArrowReaderOptions::default()
+ .with_file_decryption_properties(decryption_properties.clone());
+ let result = ArrowReaderMetadata::load(&temp_file, options.clone());
+ assert!(result.is_err());
+ assert!(result
+ .unwrap_err()
+ .to_string()
+ .starts_with("Parquet error: Footer signature verification failed.
Computed: ["));
+}
+
#[test]
fn test_non_uniform_encryption_with_key_retriever() {
let test_data = arrow::util::test_util::parquet_test_data();