bkietz commented on a change in pull request #8023:
URL: https://github.com/apache/arrow/pull/8023#discussion_r497630717



##########
File path: cpp/src/arrow/util/string.h
##########
@@ -41,6 +41,10 @@ ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* 
out);
 
 namespace internal {
 
+/// \brief Split a string with a delimiter
+ARROW_EXPORT
+std::vector<std::string> SplitString(util::string_view v, char delim);

Review comment:
       Why doesn't this return `vector<string_view>`?

##########
File path: cpp/src/parquet/encryption/key_encryption_key.h
##########
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/util/base64.h"
+
+namespace parquet {
+namespace encryption {
+
+// In the double wrapping mode, each "data encryption key" (DEK) is encrypted 
with a “key
+// encryption key” (KEK), that in turn is encrypted with a "master encryption 
key" (MEK).
+// In a writer process, a random KEK is generated for each MEK ID, and cached 
in a <MEK-ID
+// : KEK> map. This allows to perform an interaction with a KMS server only 
once for each
+// MEK, in order to wrap its KEK. "Data encryption key" (DEK) wrapping is 
performed
+// locally, and does not involve an interaction with a KMS server.
+class KeyEncryptionKey {
+ public:
+  KeyEncryptionKey(const std::string& kek_bytes, const std::string& kek_id,
+                   const std::string& encoded_wrapped_kek)
+      : kek_bytes_(kek_bytes),
+        kek_id_(kek_id),
+        encoded_wrapped_kek_(encoded_wrapped_kek) {
+    encoded_kek_id_ =
+        arrow::util::base64_encode(reinterpret_cast<const 
uint8_t*>(kek_id_.data()),
+                                   static_cast<uint32_t>(kek_id_.size()));
+  }

Review comment:
       Please use `move`:
   ```suggestion
     KeyEncryptionKey(std::string kek_bytes, std::string kek_id,
                      std::string encoded_wrapped_kek)
         : kek_bytes_(std::move(kek_bytes)),
           kek_id_(std::move(kek_id)),
           encoded_kek_id_(arrow::util::base64_encode(reinterpret_cast<const 
uint8_t*>(kek_id_.data()),
                                                      
static_cast<uint32_t>(kek_id_.size()))),
           encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {}
   ```

##########
File path: cpp/src/parquet/encryption/kms_client.cc
##########
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/kms_client.h"
+
+namespace parquet {
+namespace encryption {
+
+constexpr const char KmsClient::kKmsInstanceIdDefault[];
+constexpr const char KmsClient::kKmsInstanceUrlDefault[];
+constexpr const char KmsClient::kKeyAccessTokenDefault[];

Review comment:
       No, they're static class members so even though they are constexpr their 
declaration 
https://github.com/apache/arrow/pull/8023/files#diff-063d5acd8fa092535ebe261f9f63b6b5R68
 is not also a definition; these definitions of the constants indicate that the 
string `"DEFAULT"` is stored in `kms_client.o`

##########
File path: cpp/src/arrow/util/concurrent_map.h
##########
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/util/mutex.h"
+
+namespace arrow {
+namespace util {
+
+template <typename V>
+class ConcurrentMap {

Review comment:
       In all, I think this doesn't add sufficient value over an inlined 
`container, mutex` pair. I think this class should be removed, maybe extracting 
a helper for single lookup insertion:
   ```c++
   template <typename K, typename V, typename Hash, typename Eq, typename Gen>
   auto GetOrInsert(std::unordered_map<K, V, Hash, Eq>* map, Gen&& gen, V 
placeholder = V{})
     -> decltype(map->begin()) {
     auto it_success = map->emplace(key, placeholder);
     if (!it_success.second) {
       // insertion of placeholder was blocked by an existing entry, return that
       return it_success.first;
     }
     // overwrite placeholder with computed value
     it_success.first->second = gen();
     return it_success.first;
   }
   ```

##########
File path: cpp/src/arrow/json/object_parser.h
##########
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
+
+#include <rapidjson/document.h>
+
+#include "arrow/result.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace json {
+
+namespace rj = arrow::rapidjson;
+
+class ARROW_EXPORT ObjectParser {
+ public:
+  bool Parse(arrow::util::string_view json);
+
+  Result<std::string> GetString(const char* key) const;
+  Result<bool> GetBool(const char* key) const;
+
+ private:
+  rj::Document _document;

Review comment:
       Since this ObjectParser and ObjectWriter are fairly thin wrappers around 
rj::Document they can be removed or at least made `internal`. For example, the 
only public mention of it ObjectParser is KeyMaterial::Parse where it could be 
replaced by a string.

##########
File path: cpp/src/arrow/util/concurrent_map.h
##########
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/util/mutex.h"
+
+namespace arrow {
+namespace util {
+
+template <typename V>
+class ConcurrentMap {
+ public:
+  void Insert(const std::string& key, const V& value) {
+    auto lock = mutex_.Lock();
+    map_.insert({key, value});
+  }
+
+  void Assign(const std::string& key, const V& value) {
+    auto lock = mutex_.Lock();
+    map_[key] = value;
+  }
+
+  V GetOrAssignIfNotExist(const std::string& key, std::function<V()> 
compute_value_func) {
+    auto lock = mutex_.Lock();
+    auto it = map_.find(key);
+    if (it == map_.end()) {
+      map_.insert({key, compute_value_func()});
+    }
+    return map_.at(key);

Review comment:
       Can be accomplished with one if you have a lightweight 
default/placeholder for `V`:
   ```c++
   auto it_success = map_.emplace(key, V{});
   V* value_or_placeholder = &it_success->first->second;
   if (!it_success.second) {
     // insert was blocked by an existing entry, return that
     return *value_or_placeholder;
   }
   // overwrite placeholder with computed value
   *value_or_placeholder = compute_value_func();
   return *value_or_placeholder;
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to