adamreeve commented on code in PR #7283: URL: https://github.com/apache/arrow-rs/pull/7283#discussion_r1994601568
########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,57 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading non-uniformly encrypted parquet file into arrow record batch +//! Review Comment: Can we add a note here that this requires the experimental `encryption` feature to be enabled? ########## parquet/src/arrow/mod.rs: ########## @@ -93,6 +93,57 @@ //! //! println!("Read {} records.", record_batch.num_rows()); //! ``` +//! +//! # Example of reading non-uniformly encrypted parquet file into arrow record batch +//! +#![cfg_attr(feature = "encryption", doc = "```rust")] +#![cfg_attr(not(feature = "encryption"), doc = "```ignore")] +//! # use arrow_array::{Int32Array, ArrayRef}; +//! # use arrow_array::{types, RecordBatch}; +//! # use parquet::arrow::arrow_reader::{ +//! # ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +//! # }; +//! # use arrow_array::cast::AsArray; +//! # use parquet::file::metadata::ParquetMetaData; +//! # use tempfile::tempfile; +//! # use std::fs::File; +//! # use parquet::encryption::decrypt::FileDecryptionProperties; +//! # let test_data = arrow::util::test_util::parquet_test_data(); +//! # let path = format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted"); +//! # +//! let file = File::open(path).unwrap(); +//! +//! // Define the AES encryption keys required required for decrypting the footer metadata +//! // and column-specific data. If only a footer key is used then it is assumed that the +//! // file uses uniform encryption and all columns are encrypted with the footer key. +//! // If any column keys are specified, other columns without a key provided are assumed +//! // to be unencrypted +//! let footer_key = "0123456789012345".as_bytes(); // Keys are 128 bits (16 bytes) +//! let column_1_key = "1234567890123450".as_bytes(); +//! let column_2_key = "1234567890123451".as_bytes(); +//! +//! let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec()) +//! .with_column_key("double_field", column_1_key.to_vec()) +//! .with_column_key("float_field", column_2_key.to_vec()) +//! .build() +//! .unwrap(); +//! +//! let options = +//! ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties); +//! let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap(); +//! let metadata = reader_metadata.metadata(); +//! let file_metadata = metadata.file_metadata(); +//! +//! println!("Read {} rows.", file_metadata.num_rows()); +//! +//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); +//! println!("Converted arrow schema is: {}", builder.schema()); +//! +//! let mut reader = builder.build().unwrap(); +//! +//! let record_batch = reader.next().unwrap().unwrap(); +//! +//! println!("Read {} records.", record_batch.num_rows()); Review Comment: I think this can be simplified and tidied up a little, we don't need to repeat some of the details that are in examples above: ```suggestion //! let file_metadata = reader_metadata.metadata().file_metadata(); //! println!("File has {} rows.", file_metadata.num_rows()); //! //! let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap().build().unwrap(); //! //! let record_batch = reader.next().unwrap().unwrap(); //! println!("Read {} records.", record_batch.num_rows()); //! ``` ``` (I also added a closing ```, although the docs seemed to build fine without it?) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org