alamb commented on code in PR #6637:
URL: https://github.com/apache/arrow-rs/pull/6637#discussion_r1983969392


##########
parquet/src/encryption/decryption.rs:
##########
@@ -0,0 +1,254 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor};
+use crate::encryption::modules::{create_module_aad, ModuleType};
+use crate::errors::Result;
+use std::collections::HashMap;
+use std::io::Read;
+use std::sync::Arc;
+
+pub fn read_and_decrypt<T: Read>(
+    decryptor: &Arc<dyn BlockDecryptor>,
+    input: &mut T,
+    aad: &[u8],
+) -> Result<Vec<u8>> {
+    let mut len_bytes = [0; 4];
+    input.read_exact(&mut len_bytes)?;
+    let ciphertext_len = u32::from_le_bytes(len_bytes) as usize;
+    let mut ciphertext = vec![0; 4 + ciphertext_len];
+    input.read_exact(&mut ciphertext[4..])?;
+
+    decryptor.decrypt(&ciphertext, aad.as_ref())
+}
+
+#[derive(Debug, Clone)]
+pub struct CryptoContext {

Review Comment:
   Perhaps we can add some doc comments here



##########
parquet/src/file/metadata/reader.rs:
##########
@@ -578,56 +644,163 @@ impl ParquetMetaDataReader {
         if length > suffix_len - FOOTER_SIZE {
             let metadata_start = file_size - length - FOOTER_SIZE;
             let meta = fetch.fetch(metadata_start..file_size - 
FOOTER_SIZE).await?;
-            Ok((Self::decode_metadata(&meta)?, None))
+            Ok((
+                Self::decode_metadata(
+                    &meta,
+                    // footer.is_encrypted_footer(),
+                    // #[cfg(feature = "encryption")]
+                    // file_decryption_properties,
+                )?,
+                None,
+            ))
         } else {
             let metadata_start = file_size - length - FOOTER_SIZE - 
footer_start;
             let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
             Ok((
-                Self::decode_metadata(slice)?,
+                Self::decode_metadata(
+                    slice,
+                    // footer.is_encrypted_footer(),
+                    // #[cfg(feature = "encryption")]
+                    // file_decryption_properties,
+                )?,
                 Some((footer_start, suffix.slice(..metadata_start))),
             ))
         }
     }
 
-    /// Decodes the Parquet footer returning the metadata length in bytes
+    /// Decodes the end of the Parquet footer
     ///
-    /// A parquet footer is 8 bytes long and has the following layout:
+    /// There are 8 bytes at the end of the Parquet footer with the following 
layout:
     /// * 4 bytes for the metadata length
-    /// * 4 bytes for the magic bytes 'PAR1'
+    /// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
     ///
     /// ```text
-    /// +-----+--------+
-    /// | len | 'PAR1' |
-    /// +-----+--------+
+    /// +-----+------------------+
+    /// | len | 'PAR1' or 'PARE' |
+    /// +-----+------------------+
     /// ```
-    pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
-        // check this is indeed a parquet file
-        if slice[4..] != PARQUET_MAGIC {
+    pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> 
{
+        let magic = &slice[4..];
+        let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
+            true
+        } else if magic == PARQUET_MAGIC {
+            false
+        } else {
             return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-        }
-
+        };
         // get the metadata length from the footer
         let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
-        // u32 won't be larger than usize in most cases
-        Ok(metadata_len as usize)
+        Ok(FooterTail {
+            // u32 won't be larger than usize in most cases
+            metadata_length: metadata_len as usize,
+            encrypted_footer,
+        })
+    }
+
+    /// Decodes the Parquet footer, returning the metadata length in bytes
+    #[deprecated(note = "use decode_footer_tail instead")]
+    pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
+        Self::decode_footer_tail(slice).map(|f| f.metadata_length)
+    }
+
+    pub fn decrypt_metadata(

Review Comment:
   if this is a pub function we should also add documentation to it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to