mbrobbel commented on code in PR #7374:
URL: https://github.com/apache/arrow-rs/pull/7374#discussion_r2026327406


##########
parquet/src/encryption/decrypt.rs:
##########
@@ -28,6 +28,78 @@ use std::io::Read;
 use std::sync::Arc;
 
 /// Trait for retrieving an encryption key using the key's metadata
+///
+/// # Example
+///
+/// This shows how you might use a `KeyRetriever` to decrypt a Parquet file
+/// if you have a set of known encryption keys with identifiers, but at read 
time
+/// you may not know which columns were encrypted and which keys were used.
+///
+/// In practice, the key metadata might instead store an encrypted key that 
must
+/// be decrypted with a Key Management Server.
+///
+/// ```
+/// # use std::collections::HashMap;
+/// # use std::sync::{Arc, Mutex};
+/// # use parquet::encryption::decrypt::{FileDecryptionProperties, 
KeyRetriever};
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// # use parquet::errors::ParquetError;
+/// // Define known encryption keys
+/// let mut keys = HashMap::new();
+/// keys.insert("kf".to_owned(), b"0123456789012345".to_vec());
+/// keys.insert("kc1".to_owned(), b"1234567890123450".to_vec());
+/// keys.insert("kc2".to_owned(), b"1234567890123451".to_vec());
+///
+/// // Create encryption properties for writing a file,
+/// // and specify the key identifiers as the key metadata.
+/// let encryption_properties = 
FileEncryptionProperties::builder(keys.get("kf").unwrap().clone())
+///     .with_footer_key_metadata("kf".as_bytes().into())
+///     .with_column_key_and_metadata("x", keys.get("kc1").unwrap().clone(), 
"kc1".as_bytes().into())
+///     .with_column_key_and_metadata("y", keys.get("kc2").unwrap().clone(), 
"kc2".as_bytes().into())
+///     .build()?;
+///
+/// // Write an encrypted file with the properties
+/// // ...
+///
+/// // Define a KeyRetriever that can get encryption keys using their 
identifiers
+/// struct CustomKeyRetriever {
+///     keys: Mutex<HashMap<String, Vec<u8>>>,
+/// }
+///
+/// impl KeyRetriever for CustomKeyRetriever {
+///     fn retrieve_key(&self, key_metadata: &[u8]) -> 
parquet::errors::Result<Vec<u8>> {
+///         // Metadata is bytes, so convert it to a string identifier
+///         let key_metadata = std::str::from_utf8(key_metadata).map_err(|e| {
+///             ParquetError::General(format!("Could not convert key metadata 
to string: {}", e))

Review Comment:
   ```suggestion
   ///             ParquetError::General(format!("Could not convert key 
metadata to string: {e}"))
   ```



##########
parquet/src/encryption/decrypt.rs:
##########
@@ -28,6 +28,78 @@ use std::io::Read;
 use std::sync::Arc;
 
 /// Trait for retrieving an encryption key using the key's metadata
+///
+/// # Example
+///
+/// This shows how you might use a `KeyRetriever` to decrypt a Parquet file
+/// if you have a set of known encryption keys with identifiers, but at read 
time
+/// you may not know which columns were encrypted and which keys were used.
+///
+/// In practice, the key metadata might instead store an encrypted key that 
must
+/// be decrypted with a Key Management Server.
+///
+/// ```
+/// # use std::collections::HashMap;
+/// # use std::sync::{Arc, Mutex};
+/// # use parquet::encryption::decrypt::{FileDecryptionProperties, 
KeyRetriever};
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// # use parquet::errors::ParquetError;
+/// // Define known encryption keys
+/// let mut keys = HashMap::new();
+/// keys.insert("kf".to_owned(), b"0123456789012345".to_vec());
+/// keys.insert("kc1".to_owned(), b"1234567890123450".to_vec());
+/// keys.insert("kc2".to_owned(), b"1234567890123451".to_vec());
+///
+/// // Create encryption properties for writing a file,
+/// // and specify the key identifiers as the key metadata.
+/// let encryption_properties = 
FileEncryptionProperties::builder(keys.get("kf").unwrap().clone())
+///     .with_footer_key_metadata("kf".as_bytes().into())

Review Comment:
   You can directly convert using 
https://doc.rust-lang.org/stable/std/primitive.str.html#impl-From%3C%26str%3E-for-Vec%3Cu8%3E:
   ```suggestion
   ///     .with_footer_key_metadata("kf".into())
   ```



##########
parquet/src/encryption/encrypt.rs:
##########
@@ -53,6 +53,41 @@ impl EncryptionKey {
 
 #[derive(Debug, Clone, PartialEq)]
 /// Defines how data in a Parquet file should be encrypted
+///
+/// The `FileEncryptionProperties` should be included in the 
[`WriterProperties`](crate::file::properties::WriterProperties)
+/// used to write a file by using 
[`WriterPropertiesBuilder::with_file_encryption_properties`](crate::file::properties::WriterPropertiesBuilder::with_file_encryption_properties).
+///
+/// # Examples
+///
+/// Create `FileEncryptionProperties` for a file encrypted with uniform 
encryption,
+/// where all metadata and data are encrypted with the footer key:
+/// ```
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// let file_encryption_properties = 
FileEncryptionProperties::builder(b"0123456789012345".into())
+///     .build()?;
+/// # Ok::<(), parquet::errors::ParquetError>(())
+/// ```
+///
+/// Create properties for a file where columns are encrypted with different 
keys.
+/// Any columns without a key specified will be unencrypted:
+/// ```
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// let file_encryption_properties = 
FileEncryptionProperties::builder(b"0123456789012345".into())
+///     .with_column_key("x", b"1234567890123450".into())
+///     .with_column_key("y", b"1234567890123451".into())
+///     .build()?;
+/// # Ok::<(), parquet::errors::ParquetError>(())
+/// ```
+///
+/// Specify additional authenticated data, used to protect against data 
replacement.
+/// This should represent the file identity:
+/// ```
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// let file_encryption_properties = 
FileEncryptionProperties::builder(b"0123456789012345".into())
+///     .with_aad_prefix("example_file".as_bytes().to_vec())

Review Comment:
   ```suggestion
   ///     .with_aad_prefix("example_file".into())
   ```



##########
parquet/src/encryption/decrypt.rs:
##########
@@ -195,7 +267,43 @@ impl PartialEq for DecryptionKeys {
     }
 }
 
-/// FileDecryptionProperties hold keys and AAD data required to decrypt a 
Parquet file.
+/// `FileDecryptionProperties` hold keys and AAD data required to decrypt a 
Parquet file.
+///
+/// When reading Arrow data, the `FileDecryptionProperties` should be included 
in the
+/// [`ArrowReaderOptions`](crate::arrow::arrow_reader::ArrowReaderOptions)  
using
+/// 
[`with_file_decryption_properties`](crate::arrow::arrow_reader::ArrowReaderOptions::with_file_decryption_properties).
+///
+/// # Examples
+///
+/// Create `FileDecryptionProperties` for a file encrypted with uniform 
encryption,
+/// where all metadata and data are encrypted with the footer key:
+/// ```
+/// # use parquet::encryption::decrypt::FileDecryptionProperties;
+/// let file_encryption_properties = 
FileDecryptionProperties::builder(b"0123456789012345".into())
+///     .build()?;
+/// # Ok::<(), parquet::errors::ParquetError>(())
+/// ```
+///
+/// Create properties for a file where columns are encrypted with different 
keys:
+/// ```
+/// # use parquet::encryption::decrypt::FileDecryptionProperties;
+/// let file_encryption_properties = 
FileDecryptionProperties::builder(b"0123456789012345".into())
+///     .with_column_key("x", b"1234567890123450".into())
+///     .with_column_key("y", b"1234567890123451".into())
+///     .build()?;
+/// # Ok::<(), parquet::errors::ParquetError>(())
+/// ```
+///
+/// Specify additional authenticated data, used to protect against data 
replacement.
+/// This must match the AAD prefix provided when the file was written, 
otherwise
+/// data decryption will fail.
+/// ```
+/// # use parquet::encryption::decrypt::FileDecryptionProperties;
+/// let file_encryption_properties = 
FileDecryptionProperties::builder(b"0123456789012345".into())
+///     .with_aad_prefix("example_file".as_bytes().to_vec())

Review Comment:
   ```suggestion
   ///     .with_aad_prefix("example_file".into())
   ```



##########
parquet/src/encryption/decrypt.rs:
##########
@@ -28,6 +28,78 @@ use std::io::Read;
 use std::sync::Arc;
 
 /// Trait for retrieving an encryption key using the key's metadata
+///
+/// # Example
+///
+/// This shows how you might use a `KeyRetriever` to decrypt a Parquet file
+/// if you have a set of known encryption keys with identifiers, but at read 
time
+/// you may not know which columns were encrypted and which keys were used.
+///
+/// In practice, the key metadata might instead store an encrypted key that 
must
+/// be decrypted with a Key Management Server.
+///
+/// ```
+/// # use std::collections::HashMap;
+/// # use std::sync::{Arc, Mutex};
+/// # use parquet::encryption::decrypt::{FileDecryptionProperties, 
KeyRetriever};
+/// # use parquet::encryption::encrypt::FileEncryptionProperties;
+/// # use parquet::errors::ParquetError;
+/// // Define known encryption keys
+/// let mut keys = HashMap::new();
+/// keys.insert("kf".to_owned(), b"0123456789012345".to_vec());
+/// keys.insert("kc1".to_owned(), b"1234567890123450".to_vec());
+/// keys.insert("kc2".to_owned(), b"1234567890123451".to_vec());
+///
+/// // Create encryption properties for writing a file,
+/// // and specify the key identifiers as the key metadata.
+/// let encryption_properties = 
FileEncryptionProperties::builder(keys.get("kf").unwrap().clone())
+///     .with_footer_key_metadata("kf".as_bytes().into())
+///     .with_column_key_and_metadata("x", keys.get("kc1").unwrap().clone(), 
"kc1".as_bytes().into())
+///     .with_column_key_and_metadata("y", keys.get("kc2").unwrap().clone(), 
"kc2".as_bytes().into())
+///     .build()?;
+///
+/// // Write an encrypted file with the properties
+/// // ...
+///
+/// // Define a KeyRetriever that can get encryption keys using their 
identifiers
+/// struct CustomKeyRetriever {
+///     keys: Mutex<HashMap<String, Vec<u8>>>,
+/// }
+///
+/// impl KeyRetriever for CustomKeyRetriever {
+///     fn retrieve_key(&self, key_metadata: &[u8]) -> 
parquet::errors::Result<Vec<u8>> {
+///         // Metadata is bytes, so convert it to a string identifier
+///         let key_metadata = std::str::from_utf8(key_metadata).map_err(|e| {
+///             ParquetError::General(format!("Could not convert key metadata 
to string: {}", e))
+///         })?;
+///         // Lookup the key
+///         let keys = self.keys.lock().unwrap();
+///         match keys.get(key_metadata) {
+///             Some(key) => Ok(key.clone()),
+///             None => Err(ParquetError::General(format!(
+///                 "Could not retrieve key for metadata {:?}",
+///                 key_metadata

Review Comment:
   ```suggestion
   ///                 "Could not retrieve key for metadata {key_metadata:?}"
   ```



##########
parquet/src/encryption/mod.rs:
##########
@@ -15,8 +15,96 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Encryption implementation specific to Parquet, as described
-//! in the 
[spec](https://github.com/apache/parquet-format/blob/master/Encryption.md).
+//! This module implements Parquet Modular Encryption, as described in the
+//! 
[specification](https://github.com/apache/parquet-format/blob/master/Encryption.md).
+//!
+//! # Example of writing and reading an encrypted Parquet file
+//!
+//! ```
+//! use arrow::array::{ArrayRef, Float32Array, Int32Array, RecordBatch};
+//! use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
+//! use parquet::arrow::ArrowWriter;
+//! use parquet::encryption::decrypt::FileDecryptionProperties;
+//! use parquet::encryption::encrypt::FileEncryptionProperties;
+//! use parquet::errors::Result;
+//! use parquet::file::properties::WriterProperties;
+//! use std::fs::File;
+//! use std::sync::Arc;
+//! use tempfile::TempDir;
+//!
+//! // Define 16 byte AES encryption keys to use.
+//! static FOOTER_KEY: &[u8; 16] = b"0123456789012345";
+//! static COLUMN_KEY_1: &[u8; 16] = b"1234567890123450";
+//! static COLUMN_KEY_2: &[u8; 16] = b"1234567890123451";
+//!
+//! let temp_dir = TempDir::new()?;
+//! let file_path = temp_dir.path().join("encrypted_example.parquet");
+//!
+//! // Create file encryption properties, which define how the file is 
encrypted.
+//! // We will specify a key to encrypt the footer metadata,
+//! // then separate keys for different columns.
+//! // This allows fine-grained control of access to different columns within 
a Parquet file.
+//! // Note that any columns without an encryption key specified will be left 
un-encrypted.
+//! // If only a footer key is specified, then all columns are encrypted with 
the footer key.
+//! let encryption_properties = 
FileEncryptionProperties::builder(FOOTER_KEY.into())
+//!     .with_column_key("x", COLUMN_KEY_1.into())
+//!     .with_column_key("y", COLUMN_KEY_2.into())
+//!     // We also set an AAD prefix, which is optional.
+//!     // This contributes to the "additional authenticated data" that is 
used to verify file
+//!     // integrity and prevents data being swapped with data encrypted with 
the same key.
+//!     .with_aad_prefix(b"example_aad".into())
+//!     // Specify that the AAD prefix is stored in the file, so readers don't 
need
+//!     // to provide it to read the data, but can optionally provide it if 
they want to
+//!     // verify file integrity.
+//!     .with_aad_prefix_storage(true)
+//!     .build()?;
+//!
+//! let writer_properties = WriterProperties::builder()
+//!     .with_file_encryption_properties(encryption_properties)
+//!     .build();
+//!
+//! // Write the encrypted Parquet file
+//! {
+//!     let file = File::create(&file_path)?;
+//!
+//!     let ids = Int32Array::from(vec![0, 1, 2, 3, 4, 5]);
+//!     let x_vals = Float32Array::from(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5]);
+//!     let y_vals = Float32Array::from(vec![1.0, 1.1, 1.2, 1.3, 1.4, 1.5]);
+//!     let batch = RecordBatch::try_from_iter(vec![
+//!       ("id", Arc::new(ids) as ArrayRef),
+//!       ("x", Arc::new(x_vals) as ArrayRef),
+//!       ("y", Arc::new(y_vals) as ArrayRef),
+//!     ])?;
+//!
+//!     let mut writer = ArrowWriter::try_new(file, batch.schema(), 
Some(writer_properties))?;
+//!
+//!     writer.write(&batch)?;
+//!     writer.close()?;
+//! }
+//!
+//! // In order to read the encrypted Parquet file, we need to know the 
encryption
+//! // keys used to encrypt it.
+//! // We don't need to provide the AAD prefix as it was stored in the file 
metadata,
+//! // but we could specify it here if we wanted to verify the file hasn't 
been tampered with:
+//! let decryption_properties = 
FileDecryptionProperties::builder(FOOTER_KEY.into())
+//!     .with_column_key("x", COLUMN_KEY_1.into())
+//!     .with_column_key("y", COLUMN_KEY_2.into())
+//!     .build()?;
+//!
+//! let reader_options =
+//!     
ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
+//!
+//! // Read the file using the configured decryption properties
+//! let file = File::open(&file_path)?;
+//!
+//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
reader_options)?;
+//! let record_reader = builder.build()?;
+//! for batch in record_reader {
+//!     let batch = batch?;
+//!     println!("Read batch: {:?}", batch);

Review Comment:
   ```suggestion
   //!     println!("Read batch: {batch:?}");
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to