bkietz commented on a change in pull request #8023: URL: https://github.com/apache/arrow/pull/8023#discussion_r497630717
########## File path: cpp/src/arrow/util/string.h ########## @@ -41,6 +41,10 @@ ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out); namespace internal { +/// \brief Split a string with a delimiter +ARROW_EXPORT +std::vector<std::string> SplitString(util::string_view v, char delim); Review comment: Why doesn't this return `vector<string_view>`? ########## File path: cpp/src/parquet/encryption/key_encryption_key.h ########## @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> +#include <vector> + +#include "arrow/util/base64.h" + +namespace parquet { +namespace encryption { + +// In the double wrapping mode, each "data encryption key" (DEK) is encrypted with a “key +// encryption key” (KEK), that in turn is encrypted with a "master encryption key" (MEK). +// In a writer process, a random KEK is generated for each MEK ID, and cached in a <MEK-ID +// : KEK> map. This allows to perform an interaction with a KMS server only once for each +// MEK, in order to wrap its KEK. "Data encryption key" (DEK) wrapping is performed +// locally, and does not involve an interaction with a KMS server. +class KeyEncryptionKey { + public: + KeyEncryptionKey(const std::string& kek_bytes, const std::string& kek_id, + const std::string& encoded_wrapped_kek) + : kek_bytes_(kek_bytes), + kek_id_(kek_id), + encoded_wrapped_kek_(encoded_wrapped_kek) { + encoded_kek_id_ = + arrow::util::base64_encode(reinterpret_cast<const uint8_t*>(kek_id_.data()), + static_cast<uint32_t>(kek_id_.size())); + } Review comment: Please use `move`: ```suggestion KeyEncryptionKey(std::string kek_bytes, std::string kek_id, std::string encoded_wrapped_kek) : kek_bytes_(std::move(kek_bytes)), kek_id_(std::move(kek_id)), encoded_kek_id_(arrow::util::base64_encode(reinterpret_cast<const uint8_t*>(kek_id_.data()), static_cast<uint32_t>(kek_id_.size()))), encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {} ``` ########## File path: cpp/src/parquet/encryption/kms_client.cc ########## @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/encryption/kms_client.h" + +namespace parquet { +namespace encryption { + +constexpr const char KmsClient::kKmsInstanceIdDefault[]; +constexpr const char KmsClient::kKmsInstanceUrlDefault[]; +constexpr const char KmsClient::kKeyAccessTokenDefault[]; Review comment: No, they're static class members so even though they are constexpr their declaration https://github.com/apache/arrow/pull/8023/files#diff-063d5acd8fa092535ebe261f9f63b6b5R68 is not also a definition; these definitions of the constants indicate that the string `"DEFAULT"` is stored in `kms_client.o` ########## File path: cpp/src/arrow/util/concurrent_map.h ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <functional> +#include <unordered_map> +#include <utility> + +#include "arrow/util/mutex.h" + +namespace arrow { +namespace util { + +template <typename V> +class ConcurrentMap { Review comment: In all, I think this doesn't add sufficient value over an inlined `container, mutex` pair. I think this class should be removed, maybe extracting a helper for single lookup insertion: ```c++ template <typename K, typename V, typename Hash, typename Eq, typename Gen> auto GetOrInsert(std::unordered_map<K, V, Hash, Eq>* map, Gen&& gen, V placeholder = V{}) -> decltype(map->begin()) { auto it_success = map->emplace(key, placeholder); if (!it_success.second) { // insertion of placeholder was blocked by an existing entry, return that return it_success.first; } // overwrite placeholder with computed value it_success.first->second = gen(); return it_success.first; } ``` ########## File path: cpp/src/arrow/json/object_parser.h ########## @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep + +#include <rapidjson/document.h> + +#include "arrow/result.h" +#include "arrow/util/string_view.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace json { + +namespace rj = arrow::rapidjson; + +class ARROW_EXPORT ObjectParser { + public: + bool Parse(arrow::util::string_view json); + + Result<std::string> GetString(const char* key) const; + Result<bool> GetBool(const char* key) const; + + private: + rj::Document _document; Review comment: Since this ObjectParser and ObjectWriter are fairly thin wrappers around rj::Document they can be removed or at least made `internal`. For example, the only public mention of it ObjectParser is KeyMaterial::Parse where it could be replaced by a string. ########## File path: cpp/src/arrow/util/concurrent_map.h ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <functional> +#include <unordered_map> +#include <utility> + +#include "arrow/util/mutex.h" + +namespace arrow { +namespace util { + +template <typename V> +class ConcurrentMap { + public: + void Insert(const std::string& key, const V& value) { + auto lock = mutex_.Lock(); + map_.insert({key, value}); + } + + void Assign(const std::string& key, const V& value) { + auto lock = mutex_.Lock(); + map_[key] = value; + } + + V GetOrAssignIfNotExist(const std::string& key, std::function<V()> compute_value_func) { + auto lock = mutex_.Lock(); + auto it = map_.find(key); + if (it == map_.end()) { + map_.insert({key, compute_value_func()}); + } + return map_.at(key); Review comment: Can be accomplished with one if you have a lightweight default/placeholder for `V`: ```c++ auto it_success = map_.emplace(key, V{}); V* value_or_placeholder = &it_success->first->second; if (!it_success.second) { // insert was blocked by an existing entry, return that return *value_or_placeholder; } // overwrite placeholder with computed value *value_or_placeholder = compute_value_func(); return *value_or_placeholder; ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org