rok commented on code in PR #6637: URL: https://github.com/apache/arrow-rs/pull/6637#discussion_r1985963634
########## parquet/src/file/metadata/reader.rs: ########## @@ -578,56 +644,163 @@ impl ParquetMetaDataReader { if length > suffix_len - FOOTER_SIZE { let metadata_start = file_size - length - FOOTER_SIZE; let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?; - Ok((Self::decode_metadata(&meta)?, None)) + Ok(( + Self::decode_metadata( + &meta, + // footer.is_encrypted_footer(), + // #[cfg(feature = "encryption")] + // file_decryption_properties, + )?, + None, + )) } else { let metadata_start = file_size - length - FOOTER_SIZE - footer_start; let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE]; Ok(( - Self::decode_metadata(slice)?, + Self::decode_metadata( + slice, + // footer.is_encrypted_footer(), + // #[cfg(feature = "encryption")] + // file_decryption_properties, + )?, Some((footer_start, suffix.slice(..metadata_start))), )) } } - /// Decodes the Parquet footer returning the metadata length in bytes + /// Decodes the end of the Parquet footer /// - /// A parquet footer is 8 bytes long and has the following layout: + /// There are 8 bytes at the end of the Parquet footer with the following layout: /// * 4 bytes for the metadata length - /// * 4 bytes for the magic bytes 'PAR1' + /// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer) /// /// ```text - /// +-----+--------+ - /// | len | 'PAR1' | - /// +-----+--------+ + /// +-----+------------------+ + /// | len | 'PAR1' or 'PARE' | + /// +-----+------------------+ /// ``` - pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> { - // check this is indeed a parquet file - if slice[4..] != PARQUET_MAGIC { + pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> { + let magic = &slice[4..]; + let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER { + true + } else if magic == PARQUET_MAGIC { + false + } else { return Err(general_err!("Invalid Parquet file. Corrupt footer")); - } - + }; // get the metadata length from the footer let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap()); - // u32 won't be larger than usize in most cases - Ok(metadata_len as usize) + Ok(FooterTail { + // u32 won't be larger than usize in most cases + metadata_length: metadata_len as usize, + encrypted_footer, + }) + } + + /// Decodes the Parquet footer, returning the metadata length in bytes + #[deprecated(note = "use decode_footer_tail instead")] + pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> { + Self::decode_footer_tail(slice).map(|f| f.metadata_length) + } + + pub fn decrypt_metadata( Review Comment: Review here would be quite welcome. `decrypt_metadata` here is equivalent to `decode_metadata` which is pub. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org