Repository: kudu Updated Branches: refs/heads/master 0efc1e26a -> 0609277c4
KUDU-1901. Fix crash in concurrent OpenSSL usage This fixes a crash in which a TLSContext's SSL_CTX was being used by a thread to accept a connection at the same time as its underlying certificate was being changed. A new unit test reproduced the error pretty reliably after looping for 10-20 seconds. With the new locking, it no longer produces. I also built OpenSSL with TSAN locally and verified that, after the change, the only races reported within OpenSSL were those where the code mentions the possibility of benign races. Change-Id: I578350ba6a492e6e3ef635e9294f25fc0dc9d125 Reviewed-on: http://gerrit.cloudera.org:8080/6187 Tested-by: Kudu Jenkins Reviewed-by: Dan Burkert <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f726f6ab Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f726f6ab Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f726f6ab Branch: refs/heads/master Commit: f726f6ab4a94948cef8233eb45220bcf8eb4771f Parents: 0efc1e2 Author: Todd Lipcon <[email protected]> Authored: Mon Feb 27 21:46:21 2017 -0800 Committer: Todd Lipcon <[email protected]> Committed: Wed Mar 1 18:57:03 2017 +0000 ---------------------------------------------------------------------- src/kudu/security/tls_context.cc | 37 ++++++++++++-------- src/kudu/security/tls_context.h | 24 +++++++------ src/kudu/security/tls_handshake-test.cc | 51 ++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/f726f6ab/src/kudu/security/tls_context.cc ---------------------------------------------------------------------- diff --git a/src/kudu/security/tls_context.cc b/src/kudu/security/tls_context.cc index be298a4..28c8e0e 100644 --- a/src/kudu/security/tls_context.cc +++ b/src/kudu/security/tls_context.cc @@ -17,6 +17,7 @@ #include "kudu/security/tls_context.h" +#include <mutex> #include <string> #include <vector> @@ -42,6 +43,7 @@ using strings::Substitute; using std::string; +using std::unique_lock; DEFINE_int32(ipki_server_key_size, 2048, "the number of bits for server cert's private key. The server cert " @@ -87,7 +89,8 @@ template<> struct SslTypeTraits<X509_STORE_CTX> { }; TlsContext::TlsContext() - : trusted_cert_count_(0), + : lock_(RWMutex::Priority::PREFER_READING), + trusted_cert_count_(0), has_cert_(false) { security::InitializeOpenSSL(); } @@ -166,7 +169,7 @@ Status TlsContext::Init() { return Status::OK(); } -Status TlsContext::VerifyCertChain(const Cert& cert) { +Status TlsContext::VerifyCertChainUnlocked(const Cert& cert) { X509_STORE* store = SSL_CTX_get_cert_store(ctx_.get()); auto store_ctx = ssl_make_unique<X509_STORE_CTX>(X509_STORE_CTX_new()); @@ -197,15 +200,16 @@ Status TlsContext::VerifyCertChain(const Cert& cert) { } Status TlsContext::UseCertificateAndKey(const Cert& cert, const PrivateKey& key) { + // Verify that the cert and key match. + RETURN_NOT_OK(cert.CheckKeyMatch(key)); + + std::unique_lock<RWMutex> lock(lock_); + // Verify that the appropriate CA certs have been loaded into the context // before we adopt a cert. Otherwise, client connections without the CA cert // available would fail. - RETURN_NOT_OK(VerifyCertChain(cert)); + RETURN_NOT_OK(VerifyCertChainUnlocked(cert)); - // Verify that the cert and key match. - RETURN_NOT_OK(cert.CheckKeyMatch(key)); - - MutexLock lock(lock_); CHECK(!has_cert_); OPENSSL_RET_NOT_OK(SSL_CTX_use_PrivateKey(ctx_.get(), key.GetRawData()), @@ -219,6 +223,7 @@ Status TlsContext::UseCertificateAndKey(const Cert& cert, const PrivateKey& key) Status TlsContext::AddTrustedCertificate(const Cert& cert) { VLOG(2) << "Trusting certificate " << cert.SubjectName(); + unique_lock<RWMutex> lock(lock_); ERR_clear_error(); auto* cert_store = SSL_CTX_get_cert_store(ctx_.get()); int rc = X509_STORE_add_cert(cert_store, cert.GetRawData()); @@ -233,12 +238,13 @@ Status TlsContext::AddTrustedCertificate(const Cert& cert) { } OPENSSL_RET_NOT_OK(rc, "failed to add trusted certificate"); } - MutexLock lock(lock_); trusted_cert_count_ += 1; return Status::OK(); } Status TlsContext::DumpTrustedCerts(vector<string>* cert_ders) const { + shared_lock<RWMutex> lock(lock_); + vector<string> ret; auto* cert_store = SSL_CTX_get_cert_store(ctx_.get()); @@ -316,7 +322,7 @@ Status TlsContext::GenerateSelfSignedCertAndKey() { ignore_result(X509_check_ca(cert.GetRawData())); // Step 4: Adopt the new key and cert. - MutexLock lock(lock_); + unique_lock<RWMutex> lock(lock_); CHECK(!has_cert_); OPENSSL_RET_NOT_OK(SSL_CTX_use_PrivateKey(ctx_.get(), key.GetRawData()), "failed to use private key"); @@ -328,7 +334,7 @@ Status TlsContext::GenerateSelfSignedCertAndKey() { } boost::optional<CertSignRequest> TlsContext::GetCsrIfNecessary() const { - MutexLock lock(lock_); + shared_lock<RWMutex> lock(lock_); if (csr_) { return csr_->Clone(); } @@ -336,12 +342,12 @@ boost::optional<CertSignRequest> TlsContext::GetCsrIfNecessary() const { } Status TlsContext::AdoptSignedCert(const Cert& cert) { + unique_lock<RWMutex> lock(lock_); + // Verify that the appropriate CA certs have been loaded into the context // before we adopt a cert. Otherwise, client connections without the CA cert // available would fail. - RETURN_NOT_OK(VerifyCertChain(cert)); - - MutexLock lock(lock_); + RETURN_NOT_OK(VerifyCertChainUnlocked(cert)); if (!csr_) { // A signed cert has already been adopted. @@ -392,7 +398,10 @@ Status TlsContext::InitiateHandshake(TlsHandshakeType handshake_type, TlsHandshake* handshake) const { CHECK(ctx_); CHECK(!handshake->ssl_); - handshake->adopt_ssl(ssl_make_unique(SSL_new(ctx_.get()))); + { + shared_lock<RWMutex> lock(lock_); + handshake->adopt_ssl(ssl_make_unique(SSL_new(ctx_.get()))); + } if (!handshake->ssl_) { return Status::RuntimeError("failed to create SSL handle", GetOpenSSLErrors()); } http://git-wip-us.apache.org/repos/asf/kudu/blob/f726f6ab/src/kudu/security/tls_context.h ---------------------------------------------------------------------- diff --git a/src/kudu/security/tls_context.h b/src/kudu/security/tls_context.h index 6806b0b..09f35a6 100644 --- a/src/kudu/security/tls_context.h +++ b/src/kudu/security/tls_context.h @@ -26,7 +26,8 @@ #include "kudu/security/cert.h" #include "kudu/security/tls_handshake.h" #include "kudu/util/atomic.h" -#include "kudu/util/mutex.h" +#include "kudu/util/locks.h" +#include "kudu/util/rw_mutex.h" #include "kudu/util/status.h" namespace kudu { @@ -75,7 +76,7 @@ class TlsContext { // Returns true if this TlsContext has been configured with a cert and key for // use with TLS-encrypted connections. bool has_cert() const { - MutexLock lock(lock_); + shared_lock<RWMutex> lock(lock_); return has_cert_; } @@ -83,13 +84,13 @@ class TlsContext { // cert and key for use with TLS-encrypted connections. If this method returns // true, then 'has_trusted_cert' will also return true. bool has_signed_cert() const { - MutexLock lock(lock_); + shared_lock<RWMutex> lock(lock_); return has_cert_ && !csr_; } // Returns true if this TlsContext has at least one certificate in its trust store. bool has_trusted_cert() const { - MutexLock lock(lock_); + shared_lock<RWMutex> lock(lock_); return trusted_cert_count_ > 0; } @@ -154,20 +155,21 @@ class TlsContext { // Return the number of certs that have been marked as trusted. // Used by tests. int trusted_cert_count_for_tests() const { - MutexLock lock(lock_); + shared_lock<RWMutex> lock(lock_); return trusted_cert_count_; } private: - Status VerifyCertChain(const Cert& cert) WARN_UNUSED_RESULT; + Status VerifyCertChainUnlocked(const Cert& cert) WARN_UNUSED_RESULT; - // Owned SSL context. + // Protects all members. + // + // Taken in write mode when any changes are modifying the underlying SSL_CTX + // using a mutating method (eg SSL_CTX_use_*) or when changing the value of + // any of our own member variables. + mutable RWMutex lock_; c_unique_ptr<SSL_CTX> ctx_; - - // Protexts trusted_cert_count_, has_cert_ and csr_, as well as ctx_ when it - // needs to be updated transactionally with has_cert_ and csr_. - mutable Mutex lock_; int32_t trusted_cert_count_; bool has_cert_; boost::optional<CertSignRequest> csr_; http://git-wip-us.apache.org/repos/asf/kudu/blob/f726f6ab/src/kudu/security/tls_handshake-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/security/tls_handshake-test.cc b/src/kudu/security/tls_handshake-test.cc index 681e270..60b1b91 100644 --- a/src/kudu/security/tls_handshake-test.cc +++ b/src/kudu/security/tls_handshake-test.cc @@ -17,9 +17,12 @@ #include "kudu/security/tls_handshake.h" +#include <atomic> #include <functional> #include <iostream> #include <string> +#include <thread> +#include <vector> #include <boost/optional.hpp> #include <gflags/gflags.h> @@ -29,9 +32,11 @@ #include "kudu/security/crypto.h" #include "kudu/security/security-test-util.h" #include "kudu/security/tls_context.h" +#include "kudu/util/scoped_cleanup.h" #include "kudu/util/test_util.h" using std::string; +using std::vector; DECLARE_int32(ipki_server_key_size); @@ -67,8 +72,7 @@ std::ostream& operator<<(std::ostream& o, Case c) { return o; } -class TestTlsHandshake : public KuduTest, - public ::testing::WithParamInterface<Case> { +class TestTlsHandshakeBase : public KuduTest { public: void SetUp() override { KuduTest::SetUp(); @@ -125,6 +129,49 @@ class TestTlsHandshake : public KuduTest, string key_path_; }; +class TestTlsHandshake : public TestTlsHandshakeBase, + public ::testing::WithParamInterface<Case> {}; + +class TestTlsHandshakeConcurrent : public TestTlsHandshakeBase, + public ::testing::WithParamInterface<int> {}; + +// Test concurrently running handshakes while changing the certificates on the TLS +// context. We parameterize across different numbers of threads, because surprisingly, +// fewer threads seems to trigger issues more easily in some cases. +INSTANTIATE_TEST_CASE_P(NumThreads, TestTlsHandshakeConcurrent, ::testing::Values(1, 2, 4, 8)); +TEST_P(TestTlsHandshakeConcurrent, TestConcurrentAdoptCert) { + const int kNumThreads = GetParam(); + + ASSERT_OK(server_tls_.GenerateSelfSignedCertAndKey()); + std::atomic<bool> done(false); + vector<std::thread> handshake_threads; + for (int i = 0; i < kNumThreads; i++) { + handshake_threads.emplace_back([&]() { + while (!done) { + RunHandshake(TlsVerificationMode::VERIFY_NONE, TlsVerificationMode::VERIFY_NONE); + } + }); + } + auto c = MakeScopedCleanup([&](){ + done = true; + for (std::thread& t : handshake_threads) { + t.join(); + } + }); + + SleepFor(MonoDelta::FromMilliseconds(10)); + { + PrivateKey ca_key; + Cert ca_cert; + ASSERT_OK(GenerateSelfSignedCAForTests(&ca_key, &ca_cert)); + Cert cert; + ASSERT_OK(CertSigner(&ca_cert, &ca_key).Sign(*server_tls_.GetCsrIfNecessary(), &cert)); + ASSERT_OK(server_tls_.AddTrustedCertificate(ca_cert)); + ASSERT_OK(server_tls_.AdoptSignedCert(cert)); + } + SleepFor(MonoDelta::FromMilliseconds(10)); +} + TEST_F(TestTlsHandshake, TestHandshakeSequence) { PrivateKey ca_key; Cert ca_cert;
