This is an automated email from the ASF dual-hosted git repository.
gershinsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 2c233f2fd factory complete column encr (#1180)
2c233f2fd is described below
commit 2c233f2fd4a6c4bc4d7d5a9f4707c6bdce84273b
Author: ggershinsky <[email protected]>
AuthorDate: Tue Nov 7 11:18:30 2023 +0200
factory complete column encr (#1180)
---
parquet-hadoop/README.md | 11 ++++++++++-
.../keytools/PropertiesDrivenCryptoFactory.java | 19 ++++++++++++++++++-
.../crypto/TestPropertiesDrivenEncryption.java | 15 +++++++++++++++
3 files changed, 43 insertions(+), 2 deletions(-)
diff --git a/parquet-hadoop/README.md b/parquet-hadoop/README.md
index 6f2373b9e..d235c618c 100644
--- a/parquet-hadoop/README.md
+++ b/parquet-hadoop/README.md
@@ -408,7 +408,10 @@ ParquetInputFormat to materialize records. It should be a
the descendant class o
## Class: PropertiesDrivenCryptoFactory
**Property:** `parquet.encryption.column.keys`
-**Description:** List of columns to encrypt, with master key IDs (see
HIVE-21848).Format:
`<masterKeyID>:<colName>,<colName>;<masterKeyID>:<colName>...`. Note: nested
column names must be specified as full dot-separated paths for each leaf
column.
+**Description:** List of columns to encrypt, with master key IDs (see
HIVE-21848).
+Format: `<masterKeyID>:<colName>,<colName>;<masterKeyID>:<colName>...`.
+Unlisted columns are not encrypted.
+Note: nested column names must be specified as full dot-separated paths for
each leaf column.
**Default value:** None.
---
@@ -419,6 +422,12 @@ ParquetInputFormat to materialize records. It should be a
the descendant class o
---
+**Property:** `parquet.encryption.complete.columns`
+**Description:** Complete column encryption - if set to `true`, unlisted
columns are encrypted (using the footer master key).
+**Default value:** `false`
+
+---
+
**Property:** `parquet.encryption.uniform.key`
**Description:** Master key ID for uniform encryption of all columns and
footer. If set, `column.keys` and `footer.key` parameters should not be used.
**Default value:** None.
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
index 817ab4d96..274b8eae6 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
@@ -49,13 +49,19 @@ public class PropertiesDrivenCryptoFactory implements
EncryptionPropertiesFactor
/**
* List of columns to encrypt, with master key IDs (see HIVE-21848).
- * Format: "masterKeyID:colName,colName;masterKeyID:colName..."
+ * Format: "masterKeyID:colName,colName;masterKeyID:colName...".
+ * Unlisted columns are not encrypted.
*/
public static final String COLUMN_KEYS_PROPERTY_NAME =
"parquet.encryption.column.keys";
/**
* Master key ID for footer encryption/signing.
*/
public static final String FOOTER_KEY_PROPERTY_NAME =
"parquet.encryption.footer.key";
+ /**
+ * Encrypt unlisted columns using footer key.
+ * By default, false - unlisted columns are not encrypted.
+ */
+ public static final String COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME =
"parquet.encryption.complete.columns";
/**
* Master key ID for uniform encryption (same key for all columns and
footer).
*/
@@ -72,6 +78,7 @@ public class PropertiesDrivenCryptoFactory implements
EncryptionPropertiesFactor
public static final String ENCRYPTION_ALGORITHM_DEFAULT =
ParquetCipher.AES_GCM_V1.toString();
public static final boolean PLAINTEXT_FOOTER_DEFAULT = false;
+ public static final boolean COMPLETE_COLUMN_ENCRYPTION_DEFAULT = false;
private static final SecureRandom RANDOM = new SecureRandom();
@@ -82,6 +89,9 @@ public class PropertiesDrivenCryptoFactory implements
EncryptionPropertiesFactor
String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
String columnKeysStr =
fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
String uniformKeyId =
fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
+ boolean completeColumnEncryption =
fileHadoopConfig.getBoolean(COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME,
+ COMPLETE_COLUMN_ENCRYPTION_DEFAULT);
+
boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
@@ -111,6 +121,9 @@ public class PropertiesDrivenCryptoFactory implements
EncryptionPropertiesFactor
throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have
column keys configured in " +
COLUMN_KEYS_PROPERTY_NAME);
}
+ if (completeColumnEncryption) {
+ throw new ParquetCryptoRuntimeException("Complete column encryption
cant be applied in uniform encryption mode");
+ }
// Now assign footer key id to uniform key id
footerKeyId = uniformKeyId;
@@ -164,6 +177,10 @@ public class PropertiesDrivenCryptoFactory implements
EncryptionPropertiesFactor
Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns =
getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
propertiesBuilder =
propertiesBuilder.withEncryptedColumns(encryptedColumns);
+
+ if (completeColumnEncryption) {
+ propertiesBuilder = propertiesBuilder.withCompleteColumnEncryption();
+ }
}
if (plaintextFooter) {
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
index 0c5a7cc5d..2384b48c4 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
@@ -99,6 +99,9 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
* - plaintext footer mode.
* - ENCRYPT_COLUMNS_AND_FOOTER_CTR: Encrypt two columns and the footer,
with different
* keys. Use the alternative (AES_GCM_CTR_V1)
algorithm.
+ * - COMPLETE_COLUMN_ENCRYPTION: Encrypt two columns and the footer, with
different
+ * keys. Encrypt other columns with the
footer key.
+ * - UNIFORM_ENCRYPTION: Encrypt all columns and footer with the
same master key.
* - NO_ENCRYPTION: Do not encrypt anything
*
*
@@ -270,6 +273,18 @@ public class TestPropertiesDrivenEncryption {
return conf;
}
},
+ COMPLETE_COLUMN_ENCRYPTION {
+ /**
+ * Encrypt two columns and the footer, with different master keys.
+ * Encrypt other columns with the footer master key.
+ */
+ public Configuration
getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
+ Configuration conf = getCryptoProperties(test);
+ setColumnAndFooterKeys(conf);
+
conf.setBoolean(PropertiesDrivenCryptoFactory.COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME,
true);
+ return conf;
+ }
+ },
NO_ENCRYPTION {
/**
* Do not encrypt anything