thamht4190 commented on a change in pull request #8023: URL: https://github.com/apache/arrow/pull/8023#discussion_r497981549
########## File path: cpp/src/parquet/encryption/file_key_wrapper.h ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <unordered_map> + +#include "arrow/util/concurrent_map.h" + +#include "parquet/encryption/file_key_material_store.h" +#include "parquet/encryption/key_encryption_key.h" +#include "parquet/encryption/key_toolkit.h" +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// This class will generate "key metadata" from "data encryption key" and "master key", +// following these steps: +// 1. Wrap "data encryption key". There are 2 modes: +// 1.1. single wrapping: encrypt "data encryption key" directly with "master encryption +// key" 1.2. double wrapping: 2 steps: 1.2.1. "key encryption key" is randomized (see +// structure of KeyEncryptionKey class) 1.2.2. "data encryption key" is encrypted with the +// above "key encryption key" +// 2. Create "key material" (see structure in KeyMaterial class) +// 3. Create "key metadata" with "key material" inside or a reference to outside "key +// material" (see structure in KeyMetadata class). +// Currently we don't support the case "key material" stores outside "key metadata" +// yet. +class PARQUET_EXPORT FileKeyWrapper { + public: + static constexpr int kKeyEncryptionKeyLength = 16; + static constexpr int kKeyEncryptionKeyIdLength = 16; + + /// key_toolkit and kms_connection_config is to get KmsClient from the cache or create + /// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of + /// KmsClient in the cache. key_material_store is to store "key material" outside + /// parquet file, NULL if "key material" is stored inside parquet file. + FileKeyWrapper(KeyToolkit* key_toolkit, + const KmsConnectionConfig& kms_connection_config, + std::shared_ptr<FileKeyMaterialStore> key_material_store, + uint64_t cache_entry_lifetime_seconds, bool double_wrapping); + + /// Creates key_metadata field for a given data key, via wrapping the key with the + /// master key + std::string GetEncryptionKeyMetadata(const std::string& data_key, + const std::string& master_key_id, + bool is_footer_key); + + private: + KeyEncryptionKey CreateKeyEncryptionKey(const std::string& master_key_id); + + /// A map of Master Encryption Key ID -> KeyEncryptionKey, for the current token + std::shared_ptr<arrow::util::ConcurrentMap<KeyEncryptionKey>> kek_per_master_key_id_; Review comment: That variable references the item in cache. When the item is expired, a thread can remove it from cache, while this thread can still access it. So I use `shared_ptr` here to make sure this variable still alive when the item in cache is removed. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org