This is an automated email from the ASF dual-hosted git repository.
zivanfi pushed a commit to branch encryption
in repository https://gitbox.apache.org/repos/asf/parquet-format.git
The following commit(s) were added to refs/heads/encryption by this push:
new 411d99e PARQUET-1419: enable old readers to access unencrypted
columns in files with plaint… (#109)
411d99e is described below
commit 411d99e580bb52a61c0b3e4f1a791cbf21bc0f58
Author: ggershinsky <[email protected]>
AuthorDate: Fri Oct 26 12:11:45 2018 +0300
PARQUET-1419: enable old readers to access unencrypted columns in files
with plaint… (#109)
---
src/main/thrift/parquet.thrift | 83 ++++++++++++++++++++++++------------------
1 file changed, 47 insertions(+), 36 deletions(-)
diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
index c05e871..9d67a54 100644
--- a/src/main/thrift/parquet.thrift
+++ b/src/main/thrift/parquet.thrift
@@ -561,7 +561,7 @@ struct PageHeader {
/** Uncompressed page size in bytes (not including this header) **/
2: required i32 uncompressed_page_size
- /** Compressed page size in bytes (not including this header) **/
+ /** Compressed (and potentially encrypted) page size in bytes, not including
this header **/
3: required i32 compressed_page_size
/** 32bit crc for the data below. This allows for disabling checksumming in
HDFS
@@ -638,7 +638,8 @@ struct ColumnMetaData {
/** total byte size of all uncompressed pages in this column chunk
(including the headers) **/
6: required i64 total_uncompressed_size
- /** total byte size of all compressed pages in this column chunk (including
the headers) **/
+ /** total byte size of all compressed, and potentially encrypted, pages
+ * in this column chunk (including the headers) **/
7: required i64 total_compressed_size
/** Optional key/value metadata **/
@@ -730,7 +731,8 @@ struct RowGroup {
* in this row group **/
5: optional i64 file_offset
- /** Total byte size of all compressed column data in this row group **/
+ /** Total byte size of all compressed (and potentially encrypted) column
data
+ * in this row group **/
6: optional i64 total_compressed_size
}
@@ -860,6 +862,31 @@ struct ColumnIndex {
5: optional list<i64> null_counts
}
+struct AesGcmV1 {
+ /** Retrieval metadata of AAD used for encryption of pages and structures **/
+ 1: optional binary aad_metadata
+
+ /** If file IVs are comprised of a fixed part, and variable parts
+ * (e.g. counter), keep the fixed part here **/
+ 2: optional binary iv_prefix
+}
+
+struct AesGcmCtrV1 {
+ /** Retrieval metadata of AAD used for encryption of structures **/
+ 1: optional binary aad_metadata
+
+ /** If file IVs are comprised of a fixed part, and variable parts
+ * (e.g. counter), keep the fixed part here **/
+ 2: optional binary gcm_iv_prefix
+
+ 3: optional binary ctr_iv_prefix
+}
+
+union EncryptionAlgorithm {
+ 1: AesGcmV1 AES_GCM_V1
+ 2: AesGcmCtrV1 AES_GCM_CTR_V1
+}
+
/**
* Description for file metadata
*/
@@ -902,46 +929,30 @@ struct FileMetaData {
* The obsolete min and max fields are always sorted by signed comparison
* regardless of column_orders.
*/
- 7: optional list<ColumnOrder> column_orders;
-}
-
-struct AesGcmV1 {
- /** Retrieval metadata of AAD used for encryption of pages and structures **/
- 1: optional binary aad_metadata
-
- /** If file IVs are comprised of a fixed part, and variable parts
- * (e.g. counter), keep the fixed part here **/
- 2: optional binary iv_prefix
-
-}
-
-struct AesGcmCtrV1 {
- /** Retrieval metadata of AAD used for encryption of structures **/
- 1: optional binary aad_metadata
-
- /** If file IVs are comprised of a fixed part, and variable parts
- * (e.g. counter), keep the fixed part here **/
- 2: optional binary gcm_iv_prefix
-
- 3: optional binary ctr_iv_prefix
-}
-
-union EncryptionAlgorithm {
- 1: AesGcmV1 AES_GCM_V1
- 2: AesGcmCtrV1 AES_GCM_CTR_V1
+ 7: optional list<ColumnOrder> column_orders
+
+ /**
+ * Encryption algorithm. Note that this field is only used for files
+ * with plaintext footer. Files with encrypted footer store the algorithm id
+ * in FileCryptoMetaData structure.
+ */
+ 8: optional EncryptionAlgorithm encryption_algorithm
}
+/** Crypto metadata for files with encrypted footer **/
struct FileCryptoMetaData {
+ /**
+ * Encryption algorithm. Note that this field is only used for files
+ * with encrypted footer. Files with plaintext footer store the algorithm id
+ * inside footer (FileMetaData structure).
+ */
1: required EncryptionAlgorithm encryption_algorithm
-
- /** Parquet footer can be encrypted, or left as plaintext **/
- 2: required bool encrypted_footer
/** Retrieval metadata of key used for encryption of footer,
* and (possibly) columns **/
- 3: optional binary footer_key_metadata
+ 2: optional binary footer_key_metadata
- /** Offset of Parquet footer (encrypted, or plaintext) **/
- 4: required i64 footer_offset
+ /** Offset of encrypted Parquet footer **/
+ 3: required i64 footer_offset
}