This is an automated email from the ASF dual-hosted git repository.

gershinsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 2c233f2fd factory complete column encr (#1180)
2c233f2fd is described below

commit 2c233f2fd4a6c4bc4d7d5a9f4707c6bdce84273b
Author: ggershinsky <[email protected]>
AuthorDate: Tue Nov 7 11:18:30 2023 +0200

    factory complete column encr (#1180)
---
 parquet-hadoop/README.md                              | 11 ++++++++++-
 .../keytools/PropertiesDrivenCryptoFactory.java       | 19 ++++++++++++++++++-
 .../crypto/TestPropertiesDrivenEncryption.java        | 15 +++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/parquet-hadoop/README.md b/parquet-hadoop/README.md
index 6f2373b9e..d235c618c 100644
--- a/parquet-hadoop/README.md
+++ b/parquet-hadoop/README.md
@@ -408,7 +408,10 @@ ParquetInputFormat to materialize records. It should be a 
the descendant class o
 ## Class: PropertiesDrivenCryptoFactory
 
 **Property:** `parquet.encryption.column.keys`  
-**Description:** List of columns to encrypt, with master key IDs (see 
HIVE-21848).Format: 
`<masterKeyID>:<colName>,<colName>;<masterKeyID>:<colName>...`. Note: nested 
column names must be specified as full dot-separated paths for each leaf 
column.  
+**Description:** List of columns to encrypt, with master key IDs (see 
HIVE-21848). 
+Format: `<masterKeyID>:<colName>,<colName>;<masterKeyID>:<colName>...`. 
+Unlisted columns are not encrypted.
+Note: nested column names must be specified as full dot-separated paths for 
each leaf column.  
 **Default value:** None.
 
 ---
@@ -419,6 +422,12 @@ ParquetInputFormat to materialize records. It should be a 
the descendant class o
 
 ---
 
+**Property:** `parquet.encryption.complete.columns`  
+**Description:** Complete column encryption - if set to `true`, unlisted 
columns are encrypted (using the footer master key).  
+**Default value:** `false`
+
+---
+
 **Property:** `parquet.encryption.uniform.key`  
 **Description:** Master key ID for uniform encryption of all columns and 
footer. If set, `column.keys` and `footer.key` parameters should not be used.  
 **Default value:** None.
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
index 817ab4d96..274b8eae6 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
@@ -49,13 +49,19 @@ public class PropertiesDrivenCryptoFactory implements 
EncryptionPropertiesFactor
 
   /**
    * List of columns to encrypt, with master key IDs (see HIVE-21848).
-   * Format: "masterKeyID:colName,colName;masterKeyID:colName..."
+   * Format: "masterKeyID:colName,colName;masterKeyID:colName...".
+   * Unlisted columns are not encrypted.
    */
   public static final String COLUMN_KEYS_PROPERTY_NAME = 
"parquet.encryption.column.keys";
   /**
    * Master key ID for footer encryption/signing.
    */
   public static final String FOOTER_KEY_PROPERTY_NAME = 
"parquet.encryption.footer.key";
+  /**
+   * Encrypt unlisted columns using footer key.
+   * By default, false - unlisted columns are not encrypted.
+   */
+  public static final String COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME = 
"parquet.encryption.complete.columns";
   /**
    * Master key ID for uniform encryption (same key for all columns and 
footer).
    */
@@ -72,6 +78,7 @@ public class PropertiesDrivenCryptoFactory implements 
EncryptionPropertiesFactor
 
   public static final String ENCRYPTION_ALGORITHM_DEFAULT = 
ParquetCipher.AES_GCM_V1.toString();
   public static final boolean PLAINTEXT_FOOTER_DEFAULT = false;
+  public static final boolean COMPLETE_COLUMN_ENCRYPTION_DEFAULT = false;
 
   private static final SecureRandom RANDOM = new SecureRandom();
 
@@ -82,6 +89,9 @@ public class PropertiesDrivenCryptoFactory implements 
EncryptionPropertiesFactor
     String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
     String columnKeysStr = 
fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
     String uniformKeyId = 
fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
+    boolean completeColumnEncryption = 
fileHadoopConfig.getBoolean(COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME,
+      COMPLETE_COLUMN_ENCRYPTION_DEFAULT);
+
 
     boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
     boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
@@ -111,6 +121,9 @@ public class PropertiesDrivenCryptoFactory implements 
EncryptionPropertiesFactor
         throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have 
column keys configured in " +
           COLUMN_KEYS_PROPERTY_NAME);
       }
+      if (completeColumnEncryption) {
+        throw new ParquetCryptoRuntimeException("Complete column encryption 
cant be applied in uniform encryption mode");
+      }
 
       // Now assign footer key id to uniform key id
       footerKeyId = uniformKeyId;
@@ -164,6 +177,10 @@ public class PropertiesDrivenCryptoFactory implements 
EncryptionPropertiesFactor
       Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns =
         getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
       propertiesBuilder = 
propertiesBuilder.withEncryptedColumns(encryptedColumns);
+
+      if (completeColumnEncryption) {
+        propertiesBuilder = propertiesBuilder.withCompleteColumnEncryption();
+      }
     }
 
     if (plaintextFooter) {
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
index 0c5a7cc5d..2384b48c4 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
@@ -99,6 +99,9 @@ import static 
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
  *                                  - plaintext footer mode.
  *  - ENCRYPT_COLUMNS_AND_FOOTER_CTR:   Encrypt two columns and the footer, 
with different
  *                                  keys. Use the alternative (AES_GCM_CTR_V1) 
algorithm.
+ *  - COMPLETE_COLUMN_ENCRYPTION:   Encrypt two columns and the footer, with 
different
+ *                                  keys. Encrypt other columns with the 
footer key.
+ *  - UNIFORM_ENCRYPTION:           Encrypt all columns and footer with the 
same master key.
  *  - NO_ENCRYPTION:   Do not encrypt anything
  *
  *
@@ -270,6 +273,18 @@ public class TestPropertiesDrivenEncryption {
         return conf;
       }
     },
+    COMPLETE_COLUMN_ENCRYPTION {
+      /**
+       * Encrypt two columns and the footer, with different master keys.
+       * Encrypt other columns with the footer master key.
+       */
+      public Configuration 
getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
+        Configuration conf = getCryptoProperties(test);
+        setColumnAndFooterKeys(conf);
+        
conf.setBoolean(PropertiesDrivenCryptoFactory.COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME,
 true);
+        return conf;
+      }
+    },
     NO_ENCRYPTION {
       /**
        * Do not encrypt anything

Reply via email to