adamreeve commented on code in PR #9203:
URL: https://github.com/apache/arrow-rs/pull/9203#discussion_r2893682286


##########
parquet/tests/encryption/encryption_async.rs:
##########
@@ -183,113 +323,285 @@ async fn 
test_plaintext_footer_read_without_decryption() {
 
 #[tokio::test]
 async fn test_non_uniform_encryption() {
-    let test_data = arrow::util::test_util::parquet_test_data();
-    let path = 
format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted");
-    let mut file = File::open(&path).await.unwrap();
+    async fn non_uniform_encryption(footer_key: &[u8], column_keys: &[(&str, 
&[u8])]) {
+        let path = encryption_util::encrypted_data_path(
+            footer_key,
+            "encrypt_columns_and_footer.parquet.encrypted",
+        );
+        let mut file = File::open(&path).await.unwrap();
 
-    let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16
-    let column_1_key = "1234567890123450".as_bytes().to_vec();
-    let column_2_key = "1234567890123451".as_bytes().to_vec();
+        let mut builder = 
FileDecryptionProperties::builder(footer_key.to_vec());
+        for (column_name, key) in column_keys {
+            builder = builder.with_column_key(column_name, key.to_vec());
+        }
+        let decryption_properties = builder.build().unwrap();
 
-    let decryption_properties = 
FileDecryptionProperties::builder(footer_key.to_vec())
-        .with_column_key("double_field", column_1_key)
-        .with_column_key("float_field", column_2_key)
-        .build()
-        .unwrap();
+        verify_encryption_test_file_read_async(&mut file, 
decryption_properties)
+            .await
+            .unwrap();
+    }
 
-    verify_encryption_test_file_read_async(&mut file, decryption_properties)
-        .await
-        .unwrap();
+    // AES-128
+    non_uniform_encryption(
+        b"0123456789012345",
+        &[
+            ("double_field", b"1234567890123450".as_slice()),
+            ("float_field", b"1234567890123451".as_slice()),
+        ],
+    )
+    .await;
+
+    // AES-256
+    non_uniform_encryption(
+        b"01234567890123456789012345678901", // 256bit/32
+        &[
+            (
+                "double_field",
+                b"12345678901234567890123456789012".as_slice(),
+            ),
+            (
+                "float_field",
+                b"12345678901234567890123456789013".as_slice(),
+            ),
+            (
+                "boolean_field",
+                b"12345678901234567890123456789014".as_slice(),
+            ),
+            (
+                "int32_field",
+                b"12345678901234567890123456789015".as_slice(),
+            ),
+            ("ba_field", b"12345678901234567890123456789016".as_slice()),
+            ("flba_field", b"12345678901234567890123456789017".as_slice()),
+            (
+                "int64_field",
+                b"12345678901234567890123456789018".as_slice(),
+            ),
+            (
+                "int96_field",
+                b"12345678901234567890123456789019".as_slice(),
+            ),
+        ],
+    )
+    .await;
 }
 
 #[tokio::test]
 async fn test_uniform_encryption() {
-    let test_data = arrow::util::test_util::parquet_test_data();
-    let path = format!("{test_data}/uniform_encryption.parquet.encrypted");
-    let mut file = File::open(&path).await.unwrap();
+    async fn uniform_encryption(footer_key: &[u8], column_keys: &[(&str, 
&[u8])]) {
+        let path = encryption_util::encrypted_data_path(
+            footer_key,
+            "uniform_encryption.parquet.encrypted",
+        );
+        let mut file = File::open(&path).await.unwrap();
 
-    let key_code: &[u8] = "0123456789012345".as_bytes();
-    let decryption_properties = 
FileDecryptionProperties::builder(key_code.to_vec())
-        .build()
-        .unwrap();
+        let mut builder = 
FileDecryptionProperties::builder(footer_key.to_vec());
+        for (column_name, key) in column_keys {
+            builder = builder.with_column_key(column_name, key.to_vec());
+        }
+        let decryption_properties = builder.build().unwrap();
 
-    verify_encryption_test_file_read_async(&mut file, decryption_properties)
-        .await
-        .unwrap();
+        verify_encryption_test_file_read_async(&mut file, 
decryption_properties)
+            .await
+            .unwrap();
+    }
+
+    // AES-128: there is always a footer key even with a plaintext footer,
+    // but this is used for signing the footer.
+    uniform_encryption(
+        b"0123456789012345", // 128bit/16
+        &[],
+    )
+    .await;
+
+    // AES-256
+    uniform_encryption(
+        b"01234567890123456789012345678901", // 256bit/32
+        &[],
+    )
+    .await;
 }
 
 #[tokio::test]
 async fn test_aes_ctr_encryption() {
-    let test_data = arrow::util::test_util::parquet_test_data();
-    let path = 
format!("{test_data}/encrypt_columns_and_footer_ctr.parquet.encrypted");
-    let mut file = File::open(&path).await.unwrap();
+    async fn aes_ctr_encryption(footer_key: &[u8], column_keys: &[(&str, 
&[u8])]) {
+        let path = encryption_util::encrypted_data_path(
+            footer_key,
+            "encrypt_columns_and_footer_ctr.parquet.encrypted",
+        );
+        let mut file = File::open(&path).await.unwrap();
 
-    let footer_key = "0123456789012345".as_bytes().to_vec();
-    let column_1_key = "1234567890123450".as_bytes().to_vec();
-    //let column_2_key = "1234567890123451".as_bytes().to_vec();
+        let mut builder = 
FileDecryptionProperties::builder(footer_key.to_vec());
+        for (column_name, key) in column_keys {
+            builder = builder.with_column_key(column_name, key.to_vec());
+        }
+        let decryption_properties = builder.build().unwrap();
 
-    let decryption_properties = FileDecryptionProperties::builder(footer_key)
-        .with_column_key("double_field", column_1_key.clone())
-        .with_column_key("float_field", column_1_key)
-        .build()
-        .unwrap();
+        let options =
+            
ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
+        let metadata = ArrowReaderMetadata::load_async(&mut file, 
options).await;
 
-    let options = 
ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
-    let metadata = ArrowReaderMetadata::load_async(&mut file, options).await;
+        match metadata {
+            Err(ParquetError::NYI(s)) => {
+                assert!(s.contains("AES_GCM_CTR_V1"));
+            }
+            _ => {
+                panic!("Expected ParquetError::NYI");
+            }
+        };
+    }
 
-    match metadata {
-        Err(ParquetError::NYI(s)) => {
-            assert!(s.contains("AES_GCM_CTR_V1"));
-        }
-        _ => {
-            panic!("Expected ParquetError::NYI");
-        }
-    };
+    // AES-128
+    aes_ctr_encryption(
+        b"0123456789012345", // 128bit/16
+        &[
+            ("double_field", b"1234567890123450".as_slice()),
+            ("float_field", b"1234567890123451".as_slice()),
+        ],
+    )
+    .await;
+
+    // AES-256
+    aes_ctr_encryption(
+        b"01234567890123456789012345678901", // 256bit/32
+        &[
+            (
+                "double_field",
+                b"12345678901234567890123456789012".as_slice(),
+            ),
+            (
+                "float_field",
+                b"12345678901234567890123456789013".as_slice(),
+            ),
+        ],
+    )
+    .await;
 }
 
 #[tokio::test]
 async fn test_decrypting_without_decryption_properties_fails() {
     let test_data = arrow::util::test_util::parquet_test_data();
-    let path = format!("{test_data}/uniform_encryption.parquet.encrypted");
-    let mut file = File::open(&path).await.unwrap();
+    let paths = [
+        format!("{test_data}/uniform_encryption.parquet.encrypted"),
+        format!("{test_data}/aes256/uniform_encryption.parquet.encrypted"),
+    ];
 
-    let options = ArrowReaderOptions::new();
-    let result = ArrowReaderMetadata::load_async(&mut file, options).await;
-    assert!(result.is_err());
-    assert_eq!(
-        result.unwrap_err().to_string(),
-        "Parquet error: Parquet file has an encrypted footer but decryption 
properties were not provided"
-    );
+    for path in &paths {
+        let mut file = File::open(&path).await.unwrap();
+
+        let options = ArrowReaderOptions::new();
+        let result = ArrowReaderMetadata::load_async(&mut file, options).await;
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Parquet error: Parquet file has an encrypted footer but 
decryption properties were not provided"
+        );
+    }
 }
 
 #[tokio::test]
 async fn test_write_non_uniform_encryption() {
-    let testdata = arrow::util::test_util::parquet_test_data();
-    let path = 
format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    async fn write_non_uniform_encryption(
+        footer_key: &[u8],
+        column_names: Vec<&str>,
+        column_keys: Vec<Vec<u8>>,
+        encryption_column_keys: &[(&str, &[u8])],
+    ) {
+        let path = encryption_util::encrypted_data_path(
+            footer_key,
+            "encrypt_columns_and_footer.parquet.encrypted",
+        );
+
+        let decryption_properties = 
FileDecryptionProperties::builder(footer_key.to_vec())
+            .with_column_keys(column_names.to_vec(), column_keys.clone())
+            .unwrap()
+            .build()
+            .unwrap();
 
-    let footer_key = b"0123456789012345".to_vec(); // 128bit/16
-    let column_names = vec!["double_field", "float_field"];
-    let column_keys = vec![b"1234567890123450".to_vec(), 
b"1234567890123451".to_vec()];
+        let mut builder = 
FileEncryptionProperties::builder(footer_key.to_vec());
+        for (column_name, key) in encryption_column_keys {
+            builder = builder.with_column_key(column_name, key.to_vec());
+        }
+        let file_encryption_properties = builder.build().unwrap();
 
-    let decryption_properties = 
FileDecryptionProperties::builder(footer_key.clone())
-        .with_column_keys(column_names.clone(), column_keys.clone())
-        .unwrap()
-        .build()
+        read_and_roundtrip_to_encrypted_file_async(
+            &path,
+            decryption_properties,
+            file_encryption_properties,
+        )
+        .await
         .unwrap();
+    }
 
-    let file_encryption_properties = 
FileEncryptionProperties::builder(footer_key)
-        .with_column_keys(column_names, column_keys)
-        .unwrap()
-        .build()
-        .unwrap();
+    write_non_uniform_encryption(
+        b"0123456789012345", // 128bit/16,
+        vec!["double_field", "float_field"],
+        vec![b"1234567890123450".to_vec(), b"1234567890123451".to_vec()],
+        &[
+            ("double_field", b"1234567890123450".as_slice()),
+            ("float_field", b"1234567890123451".as_slice()),
+        ],
+    )
+    .await;
 
-    read_and_roundtrip_to_encrypted_file_async(
-        &path,
-        decryption_properties,
-        file_encryption_properties,
+    // AES-256
+    // The asymmetric column names is because we check column paths in the 
validate_encrypted_column_names function of [encryption::encrypt]

Review Comment:
   Hi @hsiang-c. This doesn't look like expected behaviour to me, you shouldn't 
have to specify the same encryption key for both `int64_field` and 
`int64_field.list.int64_field`. I would expect that the full path should be 
used everywhere.
   
   I'm also surprised that the path is `int64_field.list.int64_field`, the path 
should be `int64_field.list.element` according to the [spec for the list 
logical 
type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists).
 Is this another bug related to encryption, or does arrow-rs differ from the 
spec here?
   
   We need to support the use case where different struct fields can be 
encrypted with different keys, so I don't think your suggested fix to change to 
using `c.name()` instead of `c.path()` is correct, although I'm not sure where 
you're suggesting to change that.
   
   But we may want to allow simpler encryption configuration using the top 
level field names as a new feature. C++ Arrow has recently added that for 
example: https://github.com/apache/arrow/pull/45462
   
   I agree that this isn't related to the current PR so can be fixed separately.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to