adamreeve commented on code in PR #7283: URL: https://github.com/apache/arrow-rs/pull/7283#discussion_r1994347289
########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,96 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! let key_code: &[u8] = "0123456789012345".as_bytes(); +//! let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) +//! .build() +//! .unwrap(); +//! +//! let options = +//! ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); +//! let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); +//! let metadata = reader_metadata.metadata(); +//! let file_metadata = metadata.file_metadata(); +//! +//! println!("Read {} rows.", file_metadata.num_rows()); +//! +//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); +//! println!("Converted arrow schema is: {}", builder.schema()); +//! +//! let mut reader = builder.build().unwrap(); +//! +//! let record_batch = reader.next().unwrap().unwrap(); +//! +//! println!("Read {} records.", record_batch.num_rows()); +//! ``` +//! +//! # Example of reading non-uniformly encrypted parquet file into arrow record batch Review Comment: I don't know that we need two examples, it might be enough to show the non-uniform encryption example and just have a comment that if no column keys are specified, its assumed that uniform encryption is used and all columns are encrypted with the footer key? ########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,96 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! let key_code: &[u8] = "0123456789012345".as_bytes(); +//! let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) +//! .build() +//! .unwrap(); +//! +//! let options = +//! ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); +//! let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); +//! let metadata = reader_metadata.metadata(); +//! let file_metadata = metadata.file_metadata(); +//! +//! println!("Read {} rows.", file_metadata.num_rows()); +//! +//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); +//! println!("Converted arrow schema is: {}", builder.schema()); +//! +//! let mut reader = builder.build().unwrap(); +//! +//! let record_batch = reader.next().unwrap().unwrap(); +//! +//! println!("Read {} records.", record_batch.num_rows()); +//! ``` +//! +//! # Example of reading non-uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! // There is always a footer key even with a plaintext footer, +//! // but this is used for signing the footer. Review Comment: I don't think this comment is that relevant, this isn't a plaintext footer file and plaintext isn't the default. Maybe we could just describe what's happening in a bit more detail and provide some context, eg: "Define the AES encryption keys required required for decrypting the footer metadata and column-specific data. If only a footer key is used then it is assumed that the file uses uniform encryption and all columns are encrypted with the footer key. If any column keys are specified, other columns without a key provided are assumed to be unencrypted" ########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,96 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; Review Comment: I'm not that familiar with Rust documentation and doc tests, does cfg_attr mean that the example will only be included in the docs if the encryption feature is enabled, or is that only controlling whether the example is tested? Do we want to always show the example but document that it requires the encryption feature (and that the feature is experimental)? ########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,96 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/uniform_encryption.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! let key_code: &[u8] = "0123456789012345".as_bytes(); +//! let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec()) +//! .build() +//! .unwrap(); +//! +//! let options = +//! ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); +//! let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); +//! let metadata = reader_metadata.metadata(); +//! let file_metadata = metadata.file_metadata(); +//! +//! println!("Read {} rows.", file_metadata.num_rows()); +//! +//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); +//! println!("Converted arrow schema is: {}", builder.schema()); +//! +//! let mut reader = builder.build().unwrap(); +//! +//! let record_batch = reader.next().unwrap().unwrap(); +//! +//! println!("Read {} records.", record_batch.num_rows()); +//! ``` +//! +//! # Example of reading non-uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! // There is always a footer key even with a plaintext footer, +//! // but this is used for signing the footer. +//! let footer_key = "0123456789012345".as_bytes(); // 128bit/16 Review Comment: ```suggestion //! let footer_key = "0123456789012345".as_bytes(); // Keys are 128 bits (16 bytes) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org