thamht4190 commented on a change in pull request #8023: URL: https://github.com/apache/arrow/pull/8023#discussion_r479634599
########## File path: cpp/src/parquet/key_material.cc ########## @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <rapidjson/document.h> +#include <rapidjson/stringbuffer.h> +#include <rapidjson/writer.h> + +#include "parquet/exception.h" +#include "parquet/key_material.h" +#include "parquet/key_metadata.h" + +namespace parquet { +namespace encryption { + +constexpr char KeyMaterial::KEY_MATERIAL_TYPE_FIELD[]; +constexpr char KeyMaterial::KEY_MATERIAL_TYPE1[]; + +constexpr char KeyMaterial::FOOTER_KEY_ID_IN_FILE[]; +constexpr char KeyMaterial::COLUMN_KEY_ID_IN_FILE_PREFIX[]; + +constexpr char KeyMaterial::IS_FOOTER_KEY_FIELD[]; +constexpr char KeyMaterial::DOUBLE_WRAPPING_FIELD[]; +constexpr char KeyMaterial::KMS_INSTANCE_ID_FIELD[]; +constexpr char KeyMaterial::KMS_INSTANCE_URL_FIELD[]; +constexpr char KeyMaterial::MASTER_KEY_ID_FIELD[]; +constexpr char KeyMaterial::WRAPPED_DEK_FIELD[]; +constexpr char KeyMaterial::KEK_ID_FIELD[]; +constexpr char KeyMaterial::WRAPPED_KEK_FIELD[]; + +KeyMaterial::KeyMaterial(bool is_footer_key, const std::string& kms_instance_id, + const std::string& kms_instance_url, + const std::string& master_key_id, bool is_double_wrapped, + const std::string& kek_id, + const std::string& encoded_wrapped_kek, + const std::string& encoded_wrapped_dek) + : is_footer_key_(is_footer_key), + kms_instance_id_(kms_instance_id), + kms_instance_url_(kms_instance_url), + master_key_id_(master_key_id), + is_double_wrapped_(is_double_wrapped), + kek_id_(kek_id), + encoded_wrapped_kek_(encoded_wrapped_kek), + encoded_wrapped_dek_(encoded_wrapped_dek) {} + +KeyMaterial KeyMaterial::Parse(const std::string& key_material_string) { + rapidjson::Document document; + document.Parse(key_material_string.c_str()); + + if (document.HasParseError() || !document.IsObject()) { + throw ParquetException("Failed to parse key metadata " + key_material_string); + } + + // External key material - extract "key material type", and make sure it is supported + std::string key_material_type = document[KEY_MATERIAL_TYPE_FIELD].GetString(); + if (KEY_MATERIAL_TYPE1 != key_material_type) { + throw ParquetException("Wrong key material type: " + key_material_type + " vs " + + KEY_MATERIAL_TYPE1); + } + // Parse other fields (common to internal and external key material) + return Parse(document); +} + +KeyMaterial KeyMaterial::Parse(const rapidjson::Document& key_material_json) { + // 2. Check if "key material" belongs to file footer key + bool is_footer_key = key_material_json[IS_FOOTER_KEY_FIELD].GetBool(); + std::string kms_instance_id; + std::string kms_instance_url; + if (is_footer_key) { + // 3. For footer key, extract KMS Instance ID + kms_instance_id = key_material_json[KMS_INSTANCE_ID_FIELD].GetString(); + // 4. For footer key, extract KMS Instance URL + kms_instance_url = key_material_json[KMS_INSTANCE_URL_FIELD].GetString(); + } + // 5. Extract master key ID + std::string master_key_id = key_material_json[MASTER_KEY_ID_FIELD].GetString(); + // 6. Extract wrapped DEK + std::string encoded_wrapped_dek = key_material_json[WRAPPED_DEK_FIELD].GetString(); + std::string kek_id; + std::string encoded_wrapped_kek; + // 7. Check if "key material" was generated in double wrapping mode + bool is_double_wrapped = key_material_json[DOUBLE_WRAPPING_FIELD].GetBool(); + if (is_double_wrapped) { + // 8. In double wrapping mode, extract KEK ID + kek_id = key_material_json[KEK_ID_FIELD].GetString(); + // 9. In double wrapping mode, extract wrapped KEK + encoded_wrapped_kek = key_material_json[WRAPPED_KEK_FIELD].GetString(); + } + + return KeyMaterial(is_footer_key, kms_instance_id, kms_instance_url, master_key_id, + is_double_wrapped, kek_id, encoded_wrapped_kek, encoded_wrapped_dek); +} + +std::string KeyMaterial::CreateSerialized( + bool is_footer_key, const std::string& kms_instance_id, Review comment: I'm not sure it's a good idea to create a new struct, since this struct will have the same structure with `KeyMaterial`. So we can public set methods. However, I'm not sure it will go right with the idea that all these information should be passed. Using a new struct / set methods, some information can be easily missed. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org