pitrou commented on code in PR #46626: URL: https://github.com/apache/arrow/pull/46626#discussion_r2111997664
########## cpp/src/arrow/util/secure_string_test.cc: ########## @@ -0,0 +1,339 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <algorithm> +#include <vector> + +#include "arrow/util/secure_string.h" + +namespace arrow::util::test { + +std::string_view StringArea(const std::string& string) { + return {string.data(), string.capacity()}; +} + +void AssertSecurelyCleared(const std::string_view area) { + // the entire area is filled with zeros + std::string zeros(area.size(), '\0'); + ASSERT_EQ(area, std::string_view(zeros)); +} + +void AssertSecurelyCleared(const std::string& string) { + AssertSecurelyCleared(StringArea(string)); +} + +/** + * Checks the area has been securely cleared after some position. + */ +void AssertSecurelyCleared(const std::string_view area, const size_t pos) { + // the area after pos is filled with zeros + if (pos < area.size()) { + std::string zeros(area.size() - pos, '\0'); + ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + } +} + +/** + * Checks the area has been securely cleared from the secret value. + * Assumes the area has been released, so it might have been reclaimed and changed after + * cleaning. We cannot check for all-zeros, best we can check here is no secret character + * has leaked. If by any chance the modification produced a former key character at the + * right position, this will be false negative / flaky. Therefore, we check for three + * consecutive secret characters before we fail. + */ +void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { + auto leaks = 0; + for (size_t i = 0; i < secret_value.size(); i++) { + if (area[i] == secret_value[i]) { + leaks++; + } else { + if (leaks >= 3) { + break; + } + leaks = 0; + } + } + if (leaks >= 3) { + FAIL() << leaks << " characters of secret leaked into " << area; + } +} + +TEST(TestSecureString, SecureClearString) { + // short string + { + std::string tiny("abc"); + auto old_area = StringArea(tiny); + SecureString::SecureClear(&tiny); + AssertSecurelyCleared(tiny); + AssertSecurelyCleared(old_area); + } + + // long string + { + std::string large(1024, 'x'); + large.resize(512, 'y'); + auto old_area = StringArea(large); + SecureString::SecureClear(&large); + AssertSecurelyCleared(large); + AssertSecurelyCleared(old_area); + } + + // empty string + { + // this creates an empty string with some non-zero characters in the string buffer + // we test that all those characters are securely cleared + std::string empty("abcdef"); + empty.resize(0); + auto old_area = StringArea(empty); + SecureString::SecureClear(&empty); + AssertSecurelyCleared(empty); + AssertSecurelyCleared(old_area); + } +} + +TEST(TestSecureString, Construct) { + // move-constructing from a string securely clears that string + std::string string("hello world"); + auto old_string = StringArea(string); + SecureString secret_from_string(std::move(string)); + AssertSecurelyCleared(string); + AssertSecurelyCleared(old_string); + ASSERT_FALSE(secret_from_string.empty()); + + // move-constructing from a secure string securely clears that secure string + auto old_secret_from_string_view = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + SecureString secret_from_move_secret(std::move(secret_from_string)); + ASSERT_TRUE(secret_from_string.empty()); + AssertSecurelyCleared(old_secret_from_string_view); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + + // copy-constructing from a secure string does not modify that secure string + SecureString secret_from_secret(secret_from_move_secret); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); +} + +TEST(TestSecureString, Assign) { + // we initialize with the first string and iteratively assign the subsequent values + // the first two values are local (15 chars and less), the remainder are non-local + // strings (larger than 15 chars) memory management of short and long strings behaves Review Comment: The short-string-optimization threshold is implementation-dependent. It's 15 chars in some C++ standard libraries, but [in libc++ it's 22 chars](https://joellaity.com/2020/01/31/string.html). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org