adamreeve commented on code in PR #7111: URL: https://github.com/apache/arrow-rs/pull/7111#discussion_r2017690250
########## parquet/src/file/metadata/writer.rs: ########## @@ -133,17 +190,53 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { // Write file metadata let start_pos = self.buf.bytes_written(); - { - let mut protocol = TCompactOutputProtocol::new(&mut self.buf); - file_metadata.write_to_out_protocol(&mut protocol)?; + match self.file_encryptor.as_ref() { + #[cfg(feature = "encryption")] + Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { + // First write FileCryptoMetadata + let crypto_metadata = Self::file_crypto_metadata(file_encryptor)?; + let mut protocol = TCompactOutputProtocol::new(&mut self.buf); + crypto_metadata.write_to_out_protocol(&mut protocol)?; + + // Then write encrypted footer + let aad = create_footer_aad(file_encryptor.file_aad())?; + let mut encryptor = file_encryptor.get_footer_encryptor()?; + encrypt_object(&file_metadata, &mut encryptor, &mut self.buf, &aad)?; + } + _ => { + let mut protocol = TCompactOutputProtocol::new(&mut self.buf); + file_metadata.write_to_out_protocol(&mut protocol)?; + } } + let end_pos = self.buf.bytes_written(); // Write footer let metadata_len = (end_pos - start_pos) as u32; self.buf.write_all(&metadata_len.to_le_bytes())?; - self.buf.write_all(&PARQUET_MAGIC)?; + #[cfg(feature = "encryption")] + let magic = get_file_magic( + self.file_encryptor + .as_ref() + .map(|encryptor| encryptor.properties()), + ); + #[cfg(not(feature = "encryption"))] + let magic = get_file_magic(); + self.buf.write_all(magic)?; + + // TODO: Discuss what to do here with community + // The argument for returning unencrypted rowgroups is that any users working with + // the returned FIleMetaData have no easy way to decrypt it and hence expect an + // unencrypted structure be returned. So, the argument here would be backward compatibility. + // Return unencrypted row_group for use in program + // E.g. when collecting statistics. + // Related to this see: https://github.com/apache/arrow-rs/issues/7254 Review Comment: After discussing this some more with @corwinjoy, we'd prefer to keep this as is rather than making a big breaking change like returning `ParquetMetaData` instead, and don't think this workaround is too objectionable. If others are happy with this I'll just update this comment and remove the TODO. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org