thamht4190 commented on a change in pull request #8023: URL: https://github.com/apache/arrow/pull/8023#discussion_r497981750
########## File path: cpp/src/parquet/encryption/file_key_unwrapper.h ########## @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/util/concurrent_map.h" + +#include "parquet/encryption/encryption.h" +#include "parquet/encryption/key_material.h" +#include "parquet/encryption/key_toolkit.h" +#include "parquet/encryption/key_toolkit_internal.h" +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// This class will retrieve the key from "key metadata", following these steps: +// 1. Parse "key metadata" (see structure in KeyMetadata class). +// 2. Retrieve "key material" which can be stored inside or outside "key metadata" +// Currently we don't support the case "key material" stores outside "key metadata" +// yet. +// 3. Unwrap the "data encryption key" from "key material". There are 2 modes: +// 3.1. single wrapping: decrypt the wrapped "data encryption key" directly with "master +// encryption key" 3.2. double wrapping: 2 steps: 3.2.1. "key encryption key" is decrypted +// with "master encryption key" 3.2.2. "data encryption key" is decrypted with the above +// "key encryption key" +class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { + public: + /// key_toolkit and kms_connection_config is to get KmsClient from cache or create + /// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of + /// KmsClient in the cache. + FileKeyUnwrapper(KeyToolkit* key_toolkit, + const KmsConnectionConfig& kms_connection_config, + uint64_t cache_lifetime_seconds); + + std::string GetKey(const std::string& key_metadata) const override; + + private: + internal::KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material) const; + std::shared_ptr<KmsClient> GetKmsClientFromConfigOrKeyMaterial( + const KeyMaterial& key_material) const; + + /// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token + mutable std::shared_ptr<arrow::util::ConcurrentMap<std::string>> kek_per_kek_id_; Review comment: This variable references the item in cache. When the item is expired, a thread can remove it from cache, while this thread can still access it. So I use shared_ptr here to make sure this variable still alive when the item in cache is removed. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org