bkietz commented on code in PR #46017:
URL: https://github.com/apache/arrow/pull/46017#discussion_r2029158514


##########
cpp/src/parquet/encryption/file_key_wrapper.cc:
##########
@@ -113,14 +113,15 @@ KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey(
     const std::string& master_key_id) {
   std::string kek_bytes(kKeyEncryptionKeyLength, '\0');
   RandBytes(reinterpret_cast<uint8_t*>(kek_bytes.data()), 
kKeyEncryptionKeyLength);
+  SecureString secure_kek_bytes(std::move(kek_bytes));

Review Comment:
   ```suggestion
     SecureString secure_kek_bytes(std::string(kKeyEncryptionKeyLength, '\0'));
     RandBytes(secure_kek_bytes.as_span().data(), kKeyEncryptionKeyLength);
   ```



##########
cpp/src/parquet/encryption/read_configurations_test.cc:
##########
@@ -103,9 +103,9 @@ class TestDecryptionConfiguration
   // This vector will hold various decryption configurations.
   std::vector<std::shared_ptr<parquet::FileDecryptionProperties>>
       vector_of_decryption_configurations_;
-  std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey);
-  std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1);
-  std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2);
+  SecureString kFooterEncryptionKey_ = kFooterEncryptionKey;

Review Comment:
   `const` seems warranted here
   ```suggestion
     const SecureString kFooterEncryptionKey_ = kFooterEncryptionKey;
   ```



##########
cpp/src/parquet/encryption/encryption.h:
##########
@@ -46,28 +47,28 @@ using ColumnPathToEncryptionPropertiesMap =
 
 class PARQUET_EXPORT DecryptionKeyRetriever {
  public:
-  virtual std::string GetKey(const std::string& key_metadata) = 0;
+  virtual encryption::SecureString GetKey(const std::string& key_metadata) = 0;
   virtual ~DecryptionKeyRetriever() {}
 };
 
 /// Simple integer key retriever
 class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
  public:
-  void PutKey(uint32_t key_id, const std::string& key);
-  std::string GetKey(const std::string& key_metadata) override;
+  void PutKey(uint32_t key_id, const encryption::SecureString& key);
+  encryption::SecureString GetKey(const std::string& key_metadata) override;

Review Comment:
   Nit: it's not relevant to this PR, but this signature is surprising enough 
that I think it'd be worthwhile to inline this function and include a comment
   
   ```suggestion
     encryption::SecureString GetKey(const std::string& key_metadata) override {
       // key_metadata is string but for IntegerKeyIdRetriever it encodes
       // a native-endian 32 bit unsigned integer key_id
       uint32_t key_id;
       assert(key_metadata.size() == sizeof(key_id));
       memcpy(&key_id, key_metadata.data(), sizeof(key_id));
       return GetKey(key_id);
     }
   
     encryption::SecureString GetKey(uint32_t key_id) {
       return key_map_.at(key_id);
     }
   ```



##########
cpp/src/parquet/encryption/secure_string_test.cc:
##########
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <vector>
+
+#include "parquet/encryption/secure_string.h"
+
+namespace parquet::encryption::test {
+
+void assert_securely_cleared(const std::string& string) {
+  // the entire buffer of the string is filled with zeros
+  std::vector<char> zeros(string.capacity());
+  ::arrow::util::span actual(string.data(), string.capacity());
+  ::arrow::util::span expected(zeros.data(), zeros.size());
+  ASSERT_EQ(actual, expected);
+
+  // the string is empty
+  ASSERT_TRUE(string.empty());
+}
+
+TEST(TestSecureString, SecureClearString) {
+  // short string
+  {
+    std::string tiny("abc");
+    SecureString::secure_clear(tiny);
+    assert_securely_cleared(tiny);
+  }
+
+  // long string
+  {
+    std::string large(1024, 'x');
+    large.resize(1024, 'y');
+    SecureString::secure_clear(large);
+    assert_securely_cleared(large);
+  }
+
+  // empty string
+  {
+    // this creates an empty string with some non-zero characters in the 
string buffer
+    // we test that all those characters are securely cleared
+    std::string empty("abcdef");
+    empty.resize(0);
+    SecureString::secure_clear(empty);
+    assert_securely_cleared(empty);
+  }
+}
+
+TEST(TestSecureString, Construct) {
+  // move constructing from a string securely clears that string
+  std::string string("hello world");
+  SecureString secret_from_string(std::move(string));
+  assert_securely_cleared(string);
+  ASSERT_FALSE(secret_from_string.empty());
+
+  // move constructing from a secure string securely clears that secure string
+  // Note: there is no way to test the secure clearing of the moved secure 
string
+  SecureString secret_from_move_secret(std::move(secret_from_string));
+  ASSERT_TRUE(secret_from_string.empty());
+  ASSERT_FALSE(secret_from_move_secret.empty());
+
+  // copy constructing from a secure string does not modify that secure string
+  SecureString secret_from_secret(secret_from_move_secret);
+  ASSERT_FALSE(secret_from_move_secret.empty());
+  ASSERT_FALSE(secret_from_secret.empty());
+  ASSERT_EQ(secret_from_secret, secret_from_move_secret);
+}
+
+TEST(TestSecureString, Assign) {
+  // move assigning from a string securely clears that string
+  std::string string("hello world");
+  SecureString secret_from_string;
+  secret_from_string = std::move(string);
+  assert_securely_cleared(string);
+  ASSERT_FALSE(secret_from_string.empty());
+
+  // move assigning from a secure string securely clears that secure string
+  // Note: there is no way to test the secure clearing of the moved secure 
string
+  SecureString secret_from_move_secret;
+  secret_from_move_secret = std::move(secret_from_string);
+  ASSERT_TRUE(secret_from_string.empty());
+  ASSERT_FALSE(secret_from_move_secret.empty());
+
+  // assigning from a secure string does not modify that secure string
+  SecureString secret_from_secret;
+  secret_from_secret = secret_from_move_secret;
+  ASSERT_FALSE(secret_from_move_secret.empty());
+  ASSERT_FALSE(secret_from_secret.empty());
+  ASSERT_EQ(secret_from_secret, secret_from_move_secret);
+}
+

Review Comment:
   The standardese "true" way would be to template SecureString on an allocator 
so that you could intercept destruction of the bytes it stores. That's entirely 
too much here; we could just add a debug assertion inside ~SecureString (maybe 
only enabled by an env var or `#if defined(ARROW_ASSERT_SECURE_STRING_ZEROED)`)



##########
cpp/src/parquet/encryption/secure_string.h:
##########
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "arrow/util/span.h"
+#include "parquet/platform.h"
+
+namespace parquet::encryption {
+/**
+ * A secure string that ensures the wrapped string is cleared from memory on
+ * deconstruction. This class can only be created from std::string that are 
securely
+ * erased after creation.
+ *
+ * Note: This class does not provide a constructor / assignment operator that 
copies a
+ * std::string because that would allow code to create a SecureString while 
accidentally
+ * not noticing the need to securely erasing the argument after invoking the 
constructor /
+ * calling the assignment operator.
+ */
+class PARQUET_EXPORT SecureString {
+ public:
+  SecureString() noexcept = default;
+  SecureString(SecureString&&) noexcept;
+  SecureString(const SecureString&) noexcept = default;

Review Comment:
   ```suggestion
     SecureString(const SecureString&) = default;
   ```
   
   This should not be `noexcept`; copying the string requires a new allocation 
which may throw



##########
cpp/src/parquet/encryption/secure_string.h:
##########
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "arrow/util/span.h"
+#include "parquet/platform.h"
+
+namespace parquet::encryption {
+/**
+ * A secure string that ensures the wrapped string is cleared from memory on
+ * deconstruction. This class can only be created from std::string that are 
securely
+ * erased after creation.
+ *
+ * Note: This class does not provide a constructor / assignment operator that 
copies a
+ * std::string because that would allow code to create a SecureString while 
accidentally
+ * not noticing the need to securely erasing the argument after invoking the 
constructor /
+ * calling the assignment operator.
+ */
+class PARQUET_EXPORT SecureString {
+ public:
+  SecureString() noexcept = default;
+  SecureString(SecureString&&) noexcept;
+  SecureString(const SecureString&) noexcept = default;
+  explicit SecureString(std::string&&) noexcept;

Review Comment:
   ```suggestion
     explicit SecureString(std::string&&) noexcept;
     explicit SecureString(size_t size) : SecureString{std::string(size, '\0')} 
{}
   ```



##########
cpp/src/parquet/encryption/secure_string.h:
##########
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "arrow/util/span.h"
+#include "parquet/platform.h"
+
+namespace parquet::encryption {
+/**
+ * A secure string that ensures the wrapped string is cleared from memory on
+ * deconstruction. This class can only be created from std::string that are 
securely
+ * erased after creation.
+ *
+ * Note: This class does not provide a constructor / assignment operator that 
copies a
+ * std::string because that would allow code to create a SecureString while 
accidentally
+ * not noticing the need to securely erasing the argument after invoking the 
constructor /
+ * calling the assignment operator.
+ */
+class PARQUET_EXPORT SecureString {
+ public:
+  SecureString() noexcept = default;
+  SecureString(SecureString&&) noexcept;
+  SecureString(const SecureString&) noexcept = default;
+  explicit SecureString(std::string&&) noexcept;
+
+  SecureString& operator=(SecureString&&) noexcept;
+  SecureString& operator=(const SecureString&) noexcept;

Review Comment:
   ```suggestion
     SecureString& operator=(const SecureString&);
   ```
   (same for copy assignment)



##########
cpp/src/parquet/encryption/secure_string.cc:
##########
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/secure_string.h"
+
+#include <openssl/crypto.h>
+#include <openssl/opensslv.h>
+#include <utility>
+#if defined(_WIN32)
+#  include <windows.h>
+#endif
+
+#include "arrow/util/span.h"
+#include "parquet/encryption/encryption.h"
+
+namespace parquet::encryption {
+SecureString::SecureString(SecureString&& secret) noexcept
+    : secret_(std::move(secret.secret_)) {}
+SecureString::SecureString(std::string&& secret) noexcept : 
secret_(std::move(secret)) {
+  SecureClear(&secret);
+}
+
+SecureString& SecureString::operator=(SecureString&& secret) noexcept {
+  if (this == &secret) {
+    // self-assignment
+    return *this;
+  }
+  Dispose();
+  secret_ = std::move(secret.secret_);
+  return *this;
+}
+SecureString& SecureString::operator=(const SecureString& secret) noexcept {
+  if (this == &secret) {
+    // self-assignment
+    return *this;
+  }
+  Dispose();
+  secret_ = secret.secret_;
+  return *this;
+}
+SecureString& SecureString::operator=(std::string&& secret) noexcept {
+  Dispose();
+  secret_ = std::move(secret);
+  SecureClear(&secret);
+  return *this;
+}
+
+bool SecureString::operator==(const SecureString& other) const {
+  return secret_ == other.secret_;
+}
+
+bool SecureString::operator!=(const SecureString& other) const {
+  return secret_ != other.secret_;
+}
+
+bool SecureString::empty() const { return secret_.empty(); }
+std::size_t SecureString::size() const { return secret_.size(); }
+std::size_t SecureString::length() const { return secret_.length(); }
+
+::arrow::util::span<uint8_t> SecureString::as_span() {
+  return {reinterpret_cast<uint8_t*>(secret_.data()), secret_.size()};
+}
+::arrow::util::span<const uint8_t> SecureString::as_span() const {
+  return {reinterpret_cast<const uint8_t*>(secret_.data()), secret_.size()};
+}
+std::string_view SecureString::as_view() const {
+  return {secret_.data(), secret_.size()};
+}
+
+void SecureString::Dispose() { SecureClear(&secret_); }
+void SecureString::SecureClear(std::string* secret) {
+  secret->clear();
+  SecureClear(reinterpret_cast<uint8_t*>(secret->data()), secret->capacity());
+}
+inline void SecureString::SecureClear(uint8_t* data, size_t size) {
+  // Heavily borrowed from libb2's `secure_zero_memory` at
+  // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h
+#if defined(_WIN32)
+  SecureZeroMemory(data, size);
+#elif defined(__STDC_LIB_EXT1__)
+  // memset_s is meant to not be optimized away
+  memset_s(data, size, 0, size);
+#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000
+  OPENSSL_cleanse(data, size);
+#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && 
__GLIBC_MINOR__ >= 25))
+  // glibc 2.25+ has explicit_bzero
+  explicit_bzero(data, size);
+#else
+  // Try to ensure that a true library call to memset() will be generated
+  // by the compiler.
+  static const volatile auto memset_v = &memset;
+  memset_v(data, 0, size);
+  __asm__ __volatile__("" ::"r"(data) : "memory");

Review Comment:
   It seems questionable to me that the ultimate fallback should not be a 
simple loop
   
   ```suggestion
     for (size_t i = 0; i < size; ++i) {
       data[i] = 0;
     }
   ```
   
   I don't know how cross platform this `__asm__` block is.
   
   
   Also, since this code is borrowed from another project it needs to be 
mentioned in LICENSE.txt, for example 
https://github.com/apache/arrow/blob/7df396eec4c620ddc4a4db780ea0d5ffa4cf685b/LICENSE.txt#L1541-L1543



##########
cpp/src/parquet/encryption/internal_file_decryptor.cc:
##########
@@ -46,7 +46,7 @@ int32_t Decryptor::CiphertextLength(int32_t plaintext_len) 
const {
 
 int32_t Decryptor::Decrypt(::arrow::util::span<const uint8_t> ciphertext,
                            ::arrow::util::span<uint8_t> plaintext) {
-  return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), 
plaintext);
+  return aes_decryptor_->Decrypt(ciphertext, key_.as_span(), str2span(aad_), 
plaintext);

Review Comment:
   Since `str2span` is no longer used, we could delete it



##########
cpp/src/parquet/encryption/encryption.h:
##########
@@ -327,19 +322,19 @@ class PARQUET_EXPORT FileDecryptionProperties {
   }
 
  private:
-  std::string footer_key_;
+  encryption::SecureString footer_key_;
   std::string aad_prefix_;
   std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
 
-  const std::string empty_string_ = "";
+  const encryption::SecureString empty_string_ = encryption::SecureString("");

Review Comment:
   This looks like a holdover from the first draft of encryption; IIRC 
column_key() returned references to strings at that time (so `return "";` would 
fail to compile)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to