IMPALA-4118: extract encryption utils from BufferedBlockMgr As groundwork for IMPALA-4118, extract encryption and integrity verification routines into a separate module that can be used by the new BufferPool.
Simplify the logic in BufferedBlockMgr by not distinguishing between the integrity check and encryption options, which are controlled by the same command line flag anyway. This patch changes how the OpenSSL RNG is seeded. I consulted with the original author of the code (Michael Yoder), who suggested that the new approach would be appropriate: "I'd pick whatever implementation is easiest for you. You could add entropy once per query, or once every X keys (100 keys seems reasonable), or once every "convenient place". 4kb is probably overkill, you could use 512b for example." Testing: Added some unit tests for the utilities. We already have coverage of the BufferedBlockMgr with encryption in buffered-block-mgr-test. Also reduce the number of iterations in the buffered-block-mgr-test to save some testing time. Change-Id: Ibebe431f69c3c569cbff68171beaa32ef2ab03b6 Reviewed-on: http://gerrit.cloudera.org:8080/4389 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/757c68b2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/757c68b2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/757c68b2 Branch: refs/heads/master Commit: 757c68b29e21e64fc4586cdf24ee6f9369be460f Parents: 7050450 Author: Tim Armstrong <[email protected]> Authored: Mon Sep 12 16:01:08 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Fri Sep 30 10:24:13 2016 +0000 ---------------------------------------------------------------------- be/src/benchmarks/bswap-benchmark.cc | 1 - be/src/bufferpool/buffer-pool-test.cc | 2 - be/src/bufferpool/buffer-pool.cc | 2 - be/src/bufferpool/reservation-tracker-test.cc | 2 - be/src/bufferpool/reservation-tracker.cc | 2 - be/src/codegen/codegen-symbol-emitter.cc | 4 - be/src/common/init.cc | 18 ++- be/src/common/names.h | 4 + be/src/exec/catalog-op-executor.cc | 1 - be/src/exec/hdfs-parquet-scanner.cc | 1 - be/src/exec/kudu-scan-node-test.cc | 3 +- be/src/exec/kudu-scan-node.cc | 1 - be/src/exec/kudu-scanner.cc | 1 - be/src/exec/parquet-column-readers.cc | 2 - be/src/exec/parquet-metadata-utils.cc | 7 +- be/src/exprs/bit-byte-functions.cc | 3 +- be/src/exprs/operators-ir.cc | 9 +- be/src/exprs/timezone_db.cc | 1 - be/src/runtime/buffered-block-mgr-test.cc | 5 +- be/src/runtime/buffered-block-mgr.cc | 165 +++----------------- be/src/runtime/buffered-block-mgr.h | 67 +++------ be/src/runtime/data-stream-sender.cc | 1 - be/src/runtime/hdfs-fs-cache-test.cc | 2 - be/src/scheduling/request-pool-service.cc | 1 - be/src/util/CMakeLists.txt | 42 +++--- be/src/util/cpu-info.cc | 1 - be/src/util/hdr-histogram.cc | 3 +- be/src/util/openssl-util-test.cc | 166 +++++++++++++++++++++ be/src/util/openssl-util.cc | 141 +++++++++++++++++ be/src/util/openssl-util.h | 99 ++++++++++++ be/src/util/redactor-config-parser-test.cc | 5 +- be/src/util/redactor.cc | 9 +- 32 files changed, 505 insertions(+), 266 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/benchmarks/bswap-benchmark.cc ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/bswap-benchmark.cc b/be/src/benchmarks/bswap-benchmark.cc index 54b5a2e..6add717 100644 --- a/be/src/benchmarks/bswap-benchmark.cc +++ b/be/src/benchmarks/bswap-benchmark.cc @@ -32,7 +32,6 @@ #include "common/names.h" using std::numeric_limits; -using strings::Substitute; using namespace impala; // This benchmark is to compare the performance for all available byteswap approaches: http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/bufferpool/buffer-pool-test.cc ---------------------------------------------------------------------- diff --git a/be/src/bufferpool/buffer-pool-test.cc b/be/src/bufferpool/buffer-pool-test.cc index d45f017..16cf12c 100644 --- a/be/src/bufferpool/buffer-pool-test.cc +++ b/be/src/bufferpool/buffer-pool-test.cc @@ -33,8 +33,6 @@ #include "common/names.h" -using strings::Substitute; - namespace impala { class BufferPoolTest : public ::testing::Test { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/bufferpool/buffer-pool.cc ---------------------------------------------------------------------- diff --git a/be/src/bufferpool/buffer-pool.cc b/be/src/bufferpool/buffer-pool.cc index c89b8f6..eaa4262 100644 --- a/be/src/bufferpool/buffer-pool.cc +++ b/be/src/bufferpool/buffer-pool.cc @@ -27,8 +27,6 @@ #include "util/bit-util.h" #include "util/uid-util.h" -using strings::Substitute; - namespace impala { /// The internal representation of a page, which can be pinned or unpinned. If the http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/bufferpool/reservation-tracker-test.cc ---------------------------------------------------------------------- diff --git a/be/src/bufferpool/reservation-tracker-test.cc b/be/src/bufferpool/reservation-tracker-test.cc index 8dd1e41..93bf7b8 100644 --- a/be/src/bufferpool/reservation-tracker-test.cc +++ b/be/src/bufferpool/reservation-tracker-test.cc @@ -28,8 +28,6 @@ #include "common/names.h" -using strings::Substitute; - namespace impala { class ReservationTrackerTest : public ::testing::Test { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/bufferpool/reservation-tracker.cc ---------------------------------------------------------------------- diff --git a/be/src/bufferpool/reservation-tracker.cc b/be/src/bufferpool/reservation-tracker.cc index 1c5a14e..c5ed086 100644 --- a/be/src/bufferpool/reservation-tracker.cc +++ b/be/src/bufferpool/reservation-tracker.cc @@ -27,8 +27,6 @@ #include "common/names.h" -using strings::Substitute; - namespace impala { ReservationTracker::ReservationTracker() : initialized_(false), mem_tracker_(NULL) {} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/codegen/codegen-symbol-emitter.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-symbol-emitter.cc b/be/src/codegen/codegen-symbol-emitter.cc index f3ae84e..b2c64e2 100644 --- a/be/src/codegen/codegen-symbol-emitter.cc +++ b/be/src/codegen/codegen-symbol-emitter.cc @@ -42,12 +42,8 @@ using namespace llvm; using namespace llvm::object; -using std::fstream; using std::hex; -using std::ofstream; using std::rename; -using std::unique_ptr; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/common/init.cc ---------------------------------------------------------------------- diff --git a/be/src/common/init.cc b/be/src/common/init.cc index 3b49f44..b5c7c71 100644 --- a/be/src/common/init.cc +++ b/be/src/common/init.cc @@ -26,6 +26,14 @@ #include "exprs/expr.h" #include "exprs/timezone_db.h" #include "gutil/atomicops.h" +#include "rpc/authentication.h" +#include "rpc/thrift-util.h" +#include "runtime/decimal-value.h" +#include "runtime/exec-env.h" +#include "runtime/hdfs-fs-cache.h" +#include "runtime/lib-cache.h" +#include "runtime/mem-tracker.h" +#include "runtime/timestamp-parse-util.h" #include "util/cpu-info.h" #include "util/debug-util.h" #include "util/decimal-util.h" @@ -34,20 +42,13 @@ #include "util/mem-info.h" #include "util/minidump.h" #include "util/network-util.h" +#include "util/openssl-util.h" #include "util/os-info.h" #include "util/pretty-printer.h" #include "util/redactor.h" #include "util/test-info.h" #include "util/thread.h" #include "util/time.h" -#include "runtime/decimal-value.h" -#include "runtime/exec-env.h" -#include "runtime/hdfs-fs-cache.h" -#include "runtime/lib-cache.h" -#include "runtime/mem-tracker.h" -#include "runtime/timestamp-parse-util.h" -#include "rpc/authentication.h" -#include "rpc/thrift-util.h" #include "common/names.h" @@ -191,6 +192,7 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm, AtomicOps_x86CPUFeaturesInit(); impala::InitThreading(); impala::TimestampParser::Init(); + impala::SeedOpenSSLRNG(); ABORT_IF_ERROR(impala::TimezoneDatabase::Initialize()); ABORT_IF_ERROR(impala::InitAuth(argv[0])); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/common/names.h ---------------------------------------------------------------------- diff --git a/be/src/common/names.h b/be/src/common/names.h index 1ac4792..dac250f 100644 --- a/be/src/common/names.h +++ b/be/src/common/names.h @@ -178,3 +178,7 @@ using boost::function; using boost::bind; using boost::mem_fn; #endif + +#ifdef STRINGS_SUBSTITUTE_H_ +using strings::Substitute; +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/catalog-op-executor.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/catalog-op-executor.cc b/be/src/exec/catalog-op-executor.cc index 8806216..4d39e95 100644 --- a/be/src/exec/catalog-op-executor.cc +++ b/be/src/exec/catalog-op-executor.cc @@ -38,7 +38,6 @@ using namespace impala; using namespace apache::hive::service::cli::thrift; using namespace apache::thrift; -using strings::Substitute; DECLARE_int32(catalog_service_port); DECLARE_string(catalog_service_host); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/hdfs-parquet-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc index a4f1823..7d9adb4 100644 --- a/be/src/exec/hdfs-parquet-scanner.cc +++ b/be/src/exec/hdfs-parquet-scanner.cc @@ -47,7 +47,6 @@ #include "common/names.h" using llvm::Function; -using strings::Substitute; using namespace impala; DEFINE_double(parquet_min_filter_reject_ratio, 0.1, "(Advanced) If the percentage of " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/kudu-scan-node-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/kudu-scan-node-test.cc b/be/src/exec/kudu-scan-node-test.cc index a0eabef..4eb7d71 100644 --- a/be/src/exec/kudu-scan-node-test.cc +++ b/be/src/exec/kudu-scan-node-test.cc @@ -37,12 +37,13 @@ #include "util/cpu-info.h" #include "util/test-info.h" +#include "common/names.h" + DEFINE_bool(skip_delete_table, false, "Skips deleting the tables at the end of the tests."); DEFINE_string(use_existing_table, "", "The name of the existing table to use."); DECLARE_bool(pick_only_leaders_for_tests); using apache::thrift::ThriftDebugString; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/kudu-scan-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/kudu-scan-node.cc b/be/src/exec/kudu-scan-node.cc index 827631a..faa093e 100644 --- a/be/src/exec/kudu-scan-node.cc +++ b/be/src/exec/kudu-scan-node.cc @@ -57,7 +57,6 @@ using kudu::client::KuduSchema; using kudu::client::KuduTable; using kudu::client::KuduValue; using kudu::Slice; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/kudu-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc index 57ba356..11bef14 100644 --- a/be/src/exec/kudu-scanner.cc +++ b/be/src/exec/kudu-scanner.cc @@ -42,7 +42,6 @@ using kudu::client::KuduClient; using kudu::client::KuduScanBatch; using kudu::client::KuduSchema; using kudu::client::KuduTable; -using strings::Substitute; DEFINE_bool(pick_only_leaders_for_tests, false, "Whether to pick only leader replicas, for tests purposes only."); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/parquet-column-readers.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/parquet-column-readers.cc b/be/src/exec/parquet-column-readers.cc index 48acb27..2a83e9c 100644 --- a/be/src/exec/parquet-column-readers.cc +++ b/be/src/exec/parquet-column-readers.cc @@ -41,8 +41,6 @@ #include "common/names.h" -using strings::Substitute; - // Provide a workaround for IMPALA-1658. DEFINE_bool(convert_legacy_hive_parquet_utc_timestamps, false, "When true, TIMESTAMPs read from files written by Parquet-MR (used by Hive) will " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exec/parquet-metadata-utils.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/parquet-metadata-utils.cc b/be/src/exec/parquet-metadata-utils.cc index 1b694ed..da3844a 100644 --- a/be/src/exec/parquet-metadata-utils.cc +++ b/be/src/exec/parquet-metadata-utils.cc @@ -30,11 +30,8 @@ #include "runtime/runtime-state.h" #include "util/debug-util.h" -using std::endl; -using std::string; -using std::stringstream; -using std::vector; -using strings::Substitute; +#include "common/names.h" + using boost::algorithm::is_any_of; using boost::algorithm::split; using boost::algorithm::token_compress_on; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exprs/bit-byte-functions.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/bit-byte-functions.cc b/be/src/exprs/bit-byte-functions.cc index 8e2f208..c2208ad 100644 --- a/be/src/exprs/bit-byte-functions.cc +++ b/be/src/exprs/bit-byte-functions.cc @@ -23,12 +23,13 @@ #include "util/bit-util.h" +#include "common/names.h" + using namespace impala_udf; using boost::make_unsigned; using impala::BitUtil; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exprs/operators-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/operators-ir.cc b/be/src/exprs/operators-ir.cc index f89c2cc..6ef41a8 100644 --- a/be/src/exprs/operators-ir.cc +++ b/be/src/exprs/operators-ir.cc @@ -16,14 +16,15 @@ // under the License. #include "exprs/operators.h" + +#include <boost/cstdint.hpp> + #include "exprs/anyval-util.h" +#include "gutil/strings/substitute.h" #include "runtime/string-value.inline.h" #include "runtime/timestamp-value.h" -#include <boost/cstdint.hpp> -#include <gutil/strings/substitute.h> - -using strings::Substitute; +#include "common/names.h" #define BINARY_OP_FN(NAME, TYPE, OP) \ TYPE Operators::NAME##_##TYPE##_##TYPE(\ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/exprs/timezone_db.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc index 7fed7c2..ae58f86 100644 --- a/be/src/exprs/timezone_db.cc +++ b/be/src/exprs/timezone_db.cc @@ -28,7 +28,6 @@ using boost::local_time::tz_database; using boost::local_time::time_zone_ptr; using boost::local_time::posix_time_zone; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/runtime/buffered-block-mgr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/buffered-block-mgr-test.cc b/be/src/runtime/buffered-block-mgr-test.cc index ae822bb..92c0e89 100644 --- a/be/src/runtime/buffered-block-mgr-test.cc +++ b/be/src/runtime/buffered-block-mgr-test.cc @@ -49,7 +49,6 @@ using boost::filesystem::directory_iterator; using boost::filesystem::remove; -using strings::Substitute; // Note: This is the default scratch dir created by impala. // FLAGS_scratch_dirs + TmpFileMgr::TMP_SUB_DIR_NAME. @@ -389,8 +388,8 @@ class BufferedBlockMgrTest : public ::testing::Test { void TestRandomInternalImpl(RuntimeState* state, BufferedBlockMgr* block_mgr, int num_buffers, int tid) { ASSERT_TRUE(block_mgr != NULL); - const int num_iterations = 100000; - const int iters_before_close = num_iterations - 5000; + const int num_iterations = 10000; + const int iters_before_close = num_iterations - 1000; bool close_called = false; unordered_map<BufferedBlockMgr::Block*, int> pinned_block_map; vector<pair<BufferedBlockMgr::Block*, int32_t>> pinned_blocks; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/runtime/buffered-block-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/buffered-block-mgr.cc b/be/src/runtime/buffered-block-mgr.cc index bf7b595..f45a93b 100644 --- a/be/src/runtime/buffered-block-mgr.cc +++ b/be/src/runtime/buffered-block-mgr.cc @@ -27,11 +27,6 @@ #include "util/runtime-profile-counters.h" #include "util/uid-util.h" -#include <openssl/rand.h> -#include <openssl/evp.h> -#include <openssl/sha.h> -#include <openssl/err.h> - #include <gutil/strings/substitute.h> DEFINE_bool(disk_spill_encryption, false, "Set this to encrypt and perform an integrity " @@ -226,8 +221,6 @@ BufferedBlockMgr::BufferedBlockMgr(RuntimeState* state, TmpFileMgr* tmp_file_mgr io_mgr_(state->io_mgr()), is_cancelled_(false), writes_issued_(0), - encryption_(FLAGS_disk_spill_encryption), - check_integrity_(FLAGS_disk_spill_encryption), debug_write_delay_ms_(0) { } @@ -695,15 +688,9 @@ Status BufferedBlockMgr::PinBlock(Block* block, bool* pinned, Block* release_blo DCHECK_EQ(offset, block->write_range_->len()); } - // Verify integrity first, because the hash was generated from encrypted data. - if (check_integrity_) { - status = VerifyHash(block); - if (!status.ok()) goto error; - } - - // Decryption is done in-place, since the buffer can't be accessed by anyone else. - if (encryption_) { - status = Decrypt(block); + if (FLAGS_disk_spill_encryption) { + // Decryption is done in-place, since the buffer can't be accessed by anyone else. + status = CheckHashAndDecrypt(block); if (!status.ok()) goto error; } @@ -785,16 +772,14 @@ Status BufferedBlockMgr::WriteUnpinnedBlock(Block* block) { } uint8_t* outbuf = NULL; - if (encryption_) { + if (FLAGS_disk_spill_encryption) { // The block->buffer() could be accessed during the write path, so we have to // make a copy of it while writing. - RETURN_IF_ERROR(Encrypt(block, &outbuf)); + RETURN_IF_ERROR(EncryptAndHash(block, &outbuf)); } else { outbuf = block->buffer(); } - if (check_integrity_) SetHash(block); - block->write_range_->SetData(outbuf, block->valid_data_len_); // Issue write through DiskIoMgr. @@ -872,7 +857,7 @@ void BufferedBlockMgr::WriteComplete(Block* block, const Status& write_status) { // Explicitly release our temporarily allocated buffer here so that it doesn't // hang around needlessly. - if (encryption_) EncryptDone(block); + if (FLAGS_disk_spill_encryption) EncryptedWriteComplete(block); // ReturnUnusedBlock() will clear the block, so save required state in local vars. // state is not valid if the block was deleted because the state may be torn down @@ -1297,13 +1282,6 @@ void BufferedBlockMgr::Init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, if (initialized_) return; io_mgr->RegisterContext(&io_request_context_); - if (encryption_) { - // Seed the random number generator - // TODO: Try non-blocking read from /dev/random and add that, too. - // TODO: We don't need to re-seed every BufferedBlockMgr::Init. Consider doing this - // every X iterations. - RAND_load_file("/dev/urandom", 4096); - } profile_.reset(new RuntimeProfile(&obj_pool_, "BlockMgr")); parent_profile->AddChild(profile_.get()); @@ -1321,7 +1299,6 @@ void BufferedBlockMgr::Init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, disk_read_timer_ = ADD_TIMER(profile_.get(), "TotalReadBlockTime"); buffer_wait_timer_ = ADD_TIMER(profile_.get(), "TotalBufferWaitTime"); encryption_timer_ = ADD_TIMER(profile_.get(), "TotalEncryptionTime"); - integrity_check_timer_ = ADD_TIMER(profile_.get(), "TotalIntegrityCheckTime"); scratch_space_bytes_used_counter_ = ADD_COUNTER(profile_.get(), "ScratchFileUsedBytes", TUnit::BYTES); @@ -1355,139 +1332,45 @@ Status BufferedBlockMgr::InitTmpFiles() { return Status::OK(); } -// Callback used by OpenSSLErr() - write the error given to us through buf to the -// stringstream that's passed in through ctx. -static int OpenSSLErrCallback(const char *buf, size_t len, void* ctx) { - stringstream* errstream = static_cast<stringstream*>(ctx); - *errstream << buf; - return 1; -} - -// Called upon OpenSSL errors; returns a non-OK status with an error message. -static Status OpenSSLErr(const string& msg) { - stringstream errstream; - errstream << msg << ": "; - ERR_print_errors_cb (OpenSSLErrCallback, &errstream); - return Status(Substitute("Openssl Error: $0", errstream.str())); -} - -Status BufferedBlockMgr::Encrypt(Block* block, uint8_t** outbuf) { - DCHECK(encryption_); +Status BufferedBlockMgr::EncryptAndHash(Block* block, uint8_t** outbuf) { + DCHECK(FLAGS_disk_spill_encryption); DCHECK(block->buffer()); DCHECK(!block->is_pinned_); DCHECK(!block->in_write_); DCHECK(outbuf); SCOPED_TIMER(encryption_timer_); - - // Since we're using AES-CFB mode, we must take care not to reuse a key/iv pair. - // Regenerate a new key and iv for every block of data we write, including between - // writes of the same Block. - RAND_bytes(block->key_, sizeof(block->key_)); - RAND_bytes(block->iv_, sizeof(block->iv_)); + // Encrypt to a temporary buffer since so that the original data is still in the buffer + // if the block is re-pinned while the write is still in-flight. block->encrypted_write_buffer_.reset(new uint8_t[block->valid_data_len_]); + // Since we're using AES-CFB mode, we must take care not to reuse a key/IV pair. + // Regenerate a new key and IV for every block of data we write, including between + // writes of the same Block. + block->key_.InitializeRandom(); + RETURN_IF_ERROR(block->key_.Encrypt( + block->buffer(), block->valid_data_len_, block->encrypted_write_buffer_.get())); - EVP_CIPHER_CTX ctx; - int len = static_cast<int>(block->valid_data_len_); - - // Create and initialize the context for encryption - EVP_CIPHER_CTX_init(&ctx); - EVP_CIPHER_CTX_set_padding(&ctx, 0); - - // Start encryption. We use a 256-bit AES key, and the cipher block mode - // is CFB because this gives us a stream cipher, which supports arbitrary - // length ciphertexts - it doesn't have to be a multiple of 16 bytes. - if (EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, block->key_, block->iv_) != 1) { - return OpenSSLErr("EVP_EncryptInit_ex failure"); - } - - // Encrypt block->buffer() into the new encrypted_write_buffer_ - if (EVP_EncryptUpdate(&ctx, block->encrypted_write_buffer_.get(), &len, - block->buffer(), len) != 1) { - return OpenSSLErr("EVP_EncryptUpdate failure"); - } - - // This is safe because we're using CFB mode without padding. - DCHECK_EQ(len, block->valid_data_len_); - - // Finalize encryption. - if (1 != EVP_EncryptFinal_ex(&ctx, block->encrypted_write_buffer_.get() + len, &len)) { - return OpenSSLErr("EVP_EncryptFinal failure"); - } - - // Again safe due to CFB with no padding - DCHECK_EQ(len, 0); + block->hash_.Compute(block->encrypted_write_buffer_.get(), block->valid_data_len_); *outbuf = block->encrypted_write_buffer_.get(); return Status::OK(); } -void BufferedBlockMgr::EncryptDone(Block* block) { - DCHECK(encryption_); +void BufferedBlockMgr::EncryptedWriteComplete(Block* block) { + DCHECK(FLAGS_disk_spill_encryption); DCHECK(block->encrypted_write_buffer_.get()); block->encrypted_write_buffer_.reset(); } -Status BufferedBlockMgr::Decrypt(Block* block) { - DCHECK(encryption_); +Status BufferedBlockMgr::CheckHashAndDecrypt(Block* block) { + DCHECK(FLAGS_disk_spill_encryption); DCHECK(block->buffer()); SCOPED_TIMER(encryption_timer_); - EVP_CIPHER_CTX ctx; - int len = static_cast<int>(block->valid_data_len_); - - // Create and initialize the context for encryption - EVP_CIPHER_CTX_init(&ctx); - EVP_CIPHER_CTX_set_padding(&ctx, 0); - - // Start decryption; same parameters as encryption for obvious reasons - if (EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, block->key_, block->iv_) != 1) { - return OpenSSLErr("EVP_DecryptInit_ex failure"); - } - - // Decrypt block->buffer() in-place. Safe because no one is accessing it. - if (EVP_DecryptUpdate(&ctx, block->buffer(), &len, block->buffer(), len) != 1) { - return OpenSSLErr("EVP_DecryptUpdate failure"); - } - - // This is safe because we're using CFB mode without padding. - DCHECK_EQ(len, block->valid_data_len_); - - // Finalize decryption. - if (1 != EVP_DecryptFinal_ex(&ctx, block->buffer() + len, &len)) { - return OpenSSLErr("EVP_DecryptFinal failure"); - } - - // Again safe due to CFB with no padding - DCHECK_EQ(len, 0); - - return Status::OK(); -} - -void BufferedBlockMgr::SetHash(Block* block) { - DCHECK(check_integrity_); - SCOPED_TIMER(integrity_check_timer_); - uint8_t* data = NULL; - if (encryption_) { - DCHECK(block->encrypted_write_buffer_.get()); - data = block->encrypted_write_buffer_.get(); - } else { - DCHECK(block->buffer()); - data = block->buffer(); - } - // Explicitly ignore the return value from SHA256(); it can't fail. - (void) SHA256(data, block->valid_data_len_, block->hash_); -} - -Status BufferedBlockMgr::VerifyHash(Block* block) { - DCHECK(check_integrity_); - DCHECK(block->buffer()); - SCOPED_TIMER(integrity_check_timer_); - uint8_t test_hash[SHA256_DIGEST_LENGTH]; - (void) SHA256(block->buffer(), block->valid_data_len_, test_hash); - if (memcmp(test_hash, block->hash_, SHA256_DIGEST_LENGTH) != 0) { + if (!block->hash_.Verify(block->buffer(), block->valid_data_len_)) { return Status("Block verification failure"); } - return Status::OK(); + // Decrypt block->buffer() in-place. Safe because no one is accessing it. + return block->key_.Decrypt(block->buffer(), block->valid_data_len_, block->buffer()); } } // namespace impala http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/runtime/buffered-block-mgr.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/buffered-block-mgr.h b/be/src/runtime/buffered-block-mgr.h index b715ec5..858a2a9 100644 --- a/be/src/runtime/buffered-block-mgr.h +++ b/be/src/runtime/buffered-block-mgr.h @@ -20,9 +20,7 @@ #include "runtime/disk-io-mgr.h" #include "runtime/tmp-file-mgr.h" - -#include <openssl/aes.h> -#include <openssl/sha.h> +#include "util/openssl-util.h" namespace impala { @@ -253,26 +251,19 @@ class BufferedBlockMgr { /// Number of rows in this block. int num_rows_; - /// If encryption_ is on, in the write path we allocate a new buffer to hold - /// encrypted data while it's being written to disk. The read path, having no - /// references to the data, can be decrypted in place. + /// If --disk_spill_encryption is on, in the write path we allocate a new buffer to + /// hold encrypted data while it's being written to disk. In the read path, we can + /// instead decrypt data in place since no one else because the read buffer isn't + /// accessible to any other threads until Pin() returns. boost::scoped_array<uint8_t> encrypted_write_buffer_; - /// If encryption_ is on, a AES 256-bit key. Regenerated on each write. - uint8_t key_[32]; - - /// If encryption_ is on, the IV to use. IV stands for Initialization Vector, - /// and is used as an input to the cipher as the "block to supply before the - /// first block of plaintext". This is required because all ciphers (except the - /// weak ECB) are built such that each block depends on the output from the - /// previous block. Since the first block doesn't have a previous block, we - /// supply this IV. Think of it as starting off the chain of encryption. - /// This IV is also regenerated on each write. - uint8_t iv_[AES_BLOCK_SIZE]; + /// If --disk_spill_encryption is on, a AES 256-bit key and initialization vector. + /// Regenerated on each write. + EncryptionKey key_; - /// If integrity_ is on, our SHA256 hash of the data being written. Filled in on - /// writes; verified on reads. This is calculated _after_ encryption. - uint8_t hash_[SHA256_DIGEST_LENGTH]; + /// If --disk_spill_encryption is on, our hash of the data being written. Filled in + /// on writes; verified on reads. This is calculated _after_ encryption. + IntegrityHash hash_; /// Block state variables. The block's buffer can be freed only if is_pinned_ and /// in_write_ are both false. @@ -638,12 +629,9 @@ class BufferedBlockMgr { /// Number of writes outstanding (issued but not completed). RuntimeProfile::Counter* outstanding_writes_counter_; - /// Time spent in disk spill encryption and decryption. + /// Time spent in disk spill encryption, decryption, and integrity checking. RuntimeProfile::Counter* encryption_timer_; - /// Time spent in disk spill integrity generation and checking. - RuntimeProfile::Counter* integrity_check_timer_; - /// Amount of scratch space allocated in bytes. RuntimeProfile::Counter* scratch_space_bytes_used_counter_; @@ -661,31 +649,16 @@ class BufferedBlockMgr { BlockMgrsMap; static BlockMgrsMap query_to_block_mgrs_; - /// Takes the data in buffer(), allocates encrypted_write_buffer_, and returns - /// a pointer to the encrypted data in outbuf. - Status Encrypt(Block* block, uint8_t** outbuf); - - /// Deallocates temporary buffer alloced in Encrypt(). - void EncryptDone(Block* block); - - /// Decrypts the contents of buffer() in place. - Status Decrypt(Block* block); - - /// Takes a cryptographic hash of the data and sets hash_ with it. - void SetHash(Block* block); - - /// Verifies that the contents of buffer() match those that were set by SetHash() - Status VerifyHash(Block* block); + /// Takes the data in buffer(), allocates 'encrypted_write_buffer_', encrypts the data + /// into 'encrypted_write_buffer_' and computes 'hash_'. Returns a pointer to the + /// encrypted data in 'outbuf'. + Status EncryptAndHash(Block* block, uint8_t** outbuf); - /// Set to true if --disk_spill_encryption is true. When true, blocks will be encrypted - /// before being written to disk. - const bool encryption_; + /// Deallocates the block's encrypted write buffer alloced in EncryptAndHash(). + void EncryptedWriteComplete(Block* block); - /// Set to true if --disk_spill_encryption is true. We can split this into a different - /// flag in the future, but there is little performance overhead in the integrity check - /// and hence no real reason to keep this separate from encryption. When true, blocks - /// will have an integrity check (SHA-256) performed after being read from disk. - const bool check_integrity_; + /// Verifies the integrity hash and decrypts the contents of buffer() in place. + Status CheckHashAndDecrypt(Block* block); /// Debug option to delay write completion. int debug_write_delay_ms_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/runtime/data-stream-sender.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/data-stream-sender.cc b/be/src/runtime/data-stream-sender.cc index 33dc7a0..2a8ba6a 100644 --- a/be/src/runtime/data-stream-sender.cc +++ b/be/src/runtime/data-stream-sender.cc @@ -48,7 +48,6 @@ using boost::condition_variable; using namespace apache::thrift; using namespace apache::thrift::protocol; using namespace apache::thrift::transport; -using strings::Substitute; namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/runtime/hdfs-fs-cache-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/hdfs-fs-cache-test.cc b/be/src/runtime/hdfs-fs-cache-test.cc index bd110f0..9c69dc3 100644 --- a/be/src/runtime/hdfs-fs-cache-test.cc +++ b/be/src/runtime/hdfs-fs-cache-test.cc @@ -22,8 +22,6 @@ #include "testutil/gtest-util.h" #include "common/names.h" -using strings::Substitute; - namespace impala { void ValidateNameNode(const string& path, const string& expected_namenode, const string& expected_error) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/scheduling/request-pool-service.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/request-pool-service.cc b/be/src/scheduling/request-pool-service.cc index 6c9ccdb..9bddde9 100644 --- a/be/src/scheduling/request-pool-service.cc +++ b/be/src/scheduling/request-pool-service.cc @@ -34,7 +34,6 @@ #include "common/names.h" using namespace impala; -using strings::Substitute; DEFINE_bool(require_username, false, "Requires that a user be provided in order to " "schedule requests. If enabled and a user is not provided, requests will be " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 62fff80..0dfd12e 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -59,6 +59,7 @@ add_library(Util metrics.cc minidump.cc network-util.cc + openssl-util.cc os-info.cc os-util.cc parse-util.cc @@ -102,35 +103,36 @@ target_link_libraries(parquet-reader ${IMPALA_LINK_LIBS}) target_link_libraries(loggingsupport ${IMPALA_LINK_LIBS_DYNAMIC_TARGETS}) -ADD_BE_TEST(runtime-profile-test) ADD_BE_TEST(benchmark-test) -ADD_BE_TEST(decompress-test) -ADD_BE_TEST(metrics-test) -ADD_BE_TEST(debug-util-test) -ADD_BE_TEST(coding-util-test) +ADD_BE_TEST(bitmap-test) ADD_BE_TEST(bit-util-test) -ADD_BE_TEST(rle-test) ADD_BE_TEST(blocking-queue-test) +ADD_BE_TEST(bloom-filter-test) +ADD_BE_TEST(coding-util-test) +ADD_BE_TEST(debug-util-test) +ADD_BE_TEST(decompress-test) ADD_BE_TEST(dict-test) -ADD_BE_TEST(thread-pool-test) +ADD_BE_TEST(error-util-test) +ADD_BE_TEST(filesystem-util-test) +ADD_BE_TEST(fixed-size-hash-table-test) +ADD_BE_TEST(hdfs-util-test) ADD_BE_TEST(internal-queue-test) -ADD_BE_TEST(string-parser-test) +ADD_BE_TEST(logging-support-test) +ADD_BE_TEST(lru-cache-test) +ADD_BE_TEST(metrics-test) +ADD_BE_TEST(openssl-util-test) ADD_BE_TEST(parse-util-test) -ADD_BE_TEST(promise-test) -ADD_BE_TEST(symbols-util-test) #ADD_BE_TEST(perf-counters-test) -ADD_BE_TEST(webserver-test) ADD_BE_TEST(pretty-printer-test) +ADD_BE_TEST(proc-info-test) +ADD_BE_TEST(promise-test) ADD_BE_TEST(redactor-config-parser-test) ADD_BE_TEST(redactor-test) ADD_BE_TEST(redactor-unconfigured-test) -ADD_BE_TEST(error-util-test) -ADD_BE_TEST(proc-info-test) -ADD_BE_TEST(lru-cache-test) -ADD_BE_TEST(filesystem-util-test) -ADD_BE_TEST(bitmap-test) -ADD_BE_TEST(fixed-size-hash-table-test) -ADD_BE_TEST(bloom-filter-test) -ADD_BE_TEST(logging-support-test) -ADD_BE_TEST(hdfs-util-test) +ADD_BE_TEST(rle-test) +ADD_BE_TEST(runtime-profile-test) +ADD_BE_TEST(string-parser-test) +ADD_BE_TEST(symbols-util-test) +ADD_BE_TEST(thread-pool-test) ADD_BE_TEST(uid-util-test) +ADD_BE_TEST(webserver-test) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/cpu-info.cc ---------------------------------------------------------------------- diff --git a/be/src/util/cpu-info.cc b/be/src/util/cpu-info.cc index ec667c4..532c9dd 100644 --- a/be/src/util/cpu-info.cc +++ b/be/src/util/cpu-info.cc @@ -38,7 +38,6 @@ using boost::algorithm::contains; using boost::algorithm::trim; using std::max; -using strings::Substitute; DECLARE_bool(abort_on_config_error); DEFINE_int32(num_cores, 0, "(Advanced) If > 0, it sets the number of cores available to" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/hdr-histogram.cc ---------------------------------------------------------------------- diff --git a/be/src/util/hdr-histogram.cc b/be/src/util/hdr-histogram.cc index 539cefc..25b8835 100644 --- a/be/src/util/hdr-histogram.cc +++ b/be/src/util/hdr-histogram.cc @@ -26,12 +26,13 @@ #include "common/status.h" +#include "common/names.h" + using base::subtle::Atomic64; using base::subtle::NoBarrier_AtomicIncrement; using base::subtle::NoBarrier_Store; using base::subtle::NoBarrier_Load; using base::subtle::NoBarrier_CompareAndSwap; -using strings::Substitute; using namespace impala; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/openssl-util-test.cc ---------------------------------------------------------------------- diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc new file mode 100644 index 0000000..b0238bf --- /dev/null +++ b/be/src/util/openssl-util-test.cc @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <openssl/rand.h> +#include <boost/random/mersenne_twister.hpp> +#include <boost/random/uniform_int.hpp> + +#include "common/init.h" +#include "testutil/gtest-util.h" +#include "util/openssl-util.h" + +using boost::uniform_int; +using boost::mt19937_64; + +namespace impala { + +class OpenSSLUtilTest : public ::testing::Test { + protected: + virtual void SetUp() { SeedOpenSSLRNG(); } + virtual void TearDown() {} + + /// Fill buffer 'data' with 'len' bytes of random binary data from 'rng_'. + /// 'len' must be a multiple of 8 bytes'. + void GenerateRandomData(uint8_t* data, int64_t len) { + DCHECK_EQ(len % 8, 0); + for (int64_t i = 0; i < len; i += sizeof(uint64_t)) { + *(reinterpret_cast<uint64_t*>(&data[i])) = + uniform_int<uint64_t>(0, numeric_limits<uint64_t>::max())(rng_); + } + } + + mt19937_64 rng_; +}; + +/// Test basic encryption functionality. +TEST_F(OpenSSLUtilTest, Encryption) { + const int buffer_size = 1024 * 1024; + vector<uint8_t> original(buffer_size); + vector<uint8_t> prev_encrypted(buffer_size); + vector<uint8_t> encrypted(buffer_size); + vector<uint8_t> decrypted(buffer_size); + GenerateRandomData(original.data(), buffer_size); + + // Iterate multiple times to ensure that key regeneration works correctly. + EncryptionKey key; + for (int i = 0; i < 2; ++i) { + key.InitializeRandom(); // Generate a new key for each iteration. + + // Check that OpenSSL is happy with the amount of entropy we're feeding it. + DCHECK_EQ(1, RAND_status()); + + ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data())); + if (i > 0) { + // Check that we're not somehow reusing the same key. + ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size)); + } + memcpy(prev_encrypted.data(), encrypted.data(), buffer_size); + + // We should get the original data by decrypting it. + ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data())); + ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size)); + } +} + +/// Test that encryption and decryption work in-place. +TEST_F(OpenSSLUtilTest, EncryptInPlace) { + const int buffer_size = 1024 * 1024; + vector<uint8_t> original(buffer_size); + vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption. + + GenerateRandomData(original.data(), buffer_size); + memcpy(scratch.data(), original.data(), buffer_size); + + EncryptionKey key; + key.InitializeRandom(); + ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data())); + // Check that encryption did something + ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size)); + ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data())); + // Check that we get the original data back. + ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size)); +} + +/// Test that encryption works with buffer lengths that don't fit in a 32-bit integer. +TEST_F(OpenSSLUtilTest, EncryptInPlaceHugeBuffer) { + const int64_t buffer_size = 3 * 1024L * 1024L * 1024L; + vector<uint8_t> original(buffer_size); + vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption. + GenerateRandomData(original.data(), buffer_size); + memcpy(scratch.data(), original.data(), buffer_size); + + EncryptionKey key; + key.InitializeRandom(); + ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data())); + // Check that encryption did something + ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size)); + ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data())); + // Check that we get the original data back. + ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size)); +} + +/// Test basic integrity hash functionality. +TEST_F(OpenSSLUtilTest, IntegrityHash) { + const int buffer_size = 1024 * 1024; + vector<uint8_t> buf1(buffer_size); + vector<uint8_t> buf1_copy(buffer_size); + vector<uint8_t> buf2(buffer_size); + GenerateRandomData(buf1.data(), buffer_size); + memcpy(buf1_copy.data(), buf1.data(), buffer_size); + memcpy(buf2.data(), buf1.data(), buffer_size); + ++buf2[buffer_size - 1]; // Alter a byte in buf2 to ensure it's different. + + IntegrityHash buf1_hash; + buf1_hash.Compute(buf1.data(), buffer_size); + EXPECT_TRUE(buf1_hash.Verify(buf1.data(), buffer_size)); + EXPECT_TRUE(buf1_hash.Verify(buf1_copy.data(), buffer_size)); + + EXPECT_FALSE(buf1_hash.Verify(buf2.data(), buffer_size)); + // We should generally get different results for different buffer sizes (unless we're + // cosmically lucky and get a hash collision). + EXPECT_FALSE(buf1_hash.Verify(buf1.data(), buffer_size / 2)); +} + +/// Test that integrity hash works with buffer lengths that don't fit in a 32-bit integer. +TEST_F(OpenSSLUtilTest, IntegrityHashHugeBuffer) { + const int64_t buffer_size = 3L * 1024L * 1024L * 1024L; + vector<uint8_t> buf(buffer_size); + GenerateRandomData(buf.data(), buffer_size); + + IntegrityHash hash; + hash.Compute(buf.data(), buffer_size); + EXPECT_TRUE(hash.Verify(buf.data(), buffer_size)); + // We should generally get different results for different buffer sizes (unless we're + // cosmically lucky and get a hash collision). + EXPECT_FALSE(hash.Verify(buf.data(), buffer_size - 10)); +} + +/// Test that we are seeding the OpenSSL random number generator well enough to +/// generate lots of encryption keys. +TEST_F(OpenSSLUtilTest, RandSeeding) { + for (int i = 0; i < 100000; ++i) { + // Check that OpenSSL is happy with the amount of entropy we're feeding it. + DCHECK_EQ(1, RAND_status()); + + EncryptionKey key; + key.InitializeRandom(); + } +} +} + +IMPALA_TEST_MAIN(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/openssl-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc new file mode 100644 index 0000000..e3b2299 --- /dev/null +++ b/be/src/util/openssl-util.cc @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/openssl-util.h" + +#include <limits.h> +#include <sstream> + +#include <openssl/err.h> +#include <openssl/evp.h> +#include <openssl/rand.h> +#include <openssl/sha.h> + +#include "common/atomic.h" +#include "gutil/strings/substitute.h" + +#include "common/names.h" + +namespace impala { + +// Counter to track the number of encryption keys generated. Incremented before each key +// is generated. +static AtomicInt64 keys_generated(0); + +// Reseed the OpenSSL with new entropy after generating this number of keys. +static const int RNG_RESEED_INTERVAL = 128; + +// Number of bytes of entropy to add at RNG_RESEED_INTERVAL. +static const int RNG_RESEED_BYTES = 512; + +// Callback used by OpenSSLErr() - write the error given to us through buf to the +// stringstream that's passed in through ctx. +static int OpenSSLErrCallback(const char* buf, size_t len, void* ctx) { + stringstream* errstream = static_cast<stringstream*>(ctx); + *errstream << buf; + return 1; +} + +// Called upon OpenSSL errors; returns a non-OK status with an error message. +static Status OpenSSLErr(const string& function) { + stringstream errstream; + ERR_print_errors_cb(OpenSSLErrCallback, &errstream); + return Status(Substitute("OpenSSL error in $0: $1", function, errstream.str())); +} + +void SeedOpenSSLRNG() { + RAND_load_file("/dev/urandom", RNG_RESEED_BYTES); +} + +void IntegrityHash::Compute(const uint8_t* data, int64_t len) { + // Explicitly ignore the return value from SHA256(); it can't fail. + (void)SHA256(data, len, hash_); +} + +bool IntegrityHash::Verify(const uint8_t* data, int64_t len) const { + IntegrityHash test_hash; + test_hash.Compute(data, len); + return memcmp(hash_, test_hash.hash_, sizeof(hash_)) == 0; +} + +void EncryptionKey::InitializeRandom() { + uint64_t next_key_num = keys_generated.Add(1); + if (next_key_num % RNG_RESEED_INTERVAL == 0) { + SeedOpenSSLRNG(); + } + RAND_bytes(key_, sizeof(key_)); + RAND_bytes(iv_, sizeof(iv_)); + initialized_ = true; +} + +Status EncryptionKey::Encrypt(const uint8_t* data, int64_t len, uint8_t* out) const { + return EncryptInternal(true, data, len, out); +} + +Status EncryptionKey::Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const { + return EncryptInternal(false, data, len, out); +} + +Status EncryptionKey::EncryptInternal( + bool encrypt, const uint8_t* data, int64_t len, uint8_t* out) const { + DCHECK(initialized_); + DCHECK_GE(len, 0); + // Create and initialize the context for encryption + EVP_CIPHER_CTX ctx; + EVP_CIPHER_CTX_init(&ctx); + EVP_CIPHER_CTX_set_padding(&ctx, 0); + + int success; + + // Start encryption/decryption. We use a 256-bit AES key, and the cipher block mode + // is CFB because this gives us a stream cipher, which supports arbitrary + // length ciphertexts - it doesn't have to be a multiple of 16 bytes. + success = encrypt ? EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_) : + EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_); + if (success != 1) { + return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex"); + } + + // The OpenSSL encryption APIs use ints for buffer lengths for some reason. To support + // larger buffers we need to chunk larger buffers into smaller parts. + int64_t offset = 0; + while (offset < len) { + int in_len = static_cast<int>(min<int64_t>(len - offset, numeric_limits<int>::max())); + int out_len; + success = encrypt ? + EVP_EncryptUpdate(&ctx, out + offset, &out_len, data + offset, in_len) : + EVP_DecryptUpdate(&ctx, out + offset, &out_len, data + offset, in_len); + if (success != 1) { + return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate"); + } + // This is safe because we're using CFB mode without padding. + DCHECK_EQ(in_len, out_len); + offset += in_len; + } + + // Finalize encryption or decryption. + int final_out_len; + success = encrypt ? EVP_EncryptFinal_ex(&ctx, out + offset, &final_out_len) : + EVP_DecryptFinal_ex(&ctx, out + offset, &final_out_len); + if (success != 1) { + return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal"); + } + // Again safe due to CFB with no padding + DCHECK_EQ(final_out_len, 0); + return Status::OK(); +} +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/openssl-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/openssl-util.h b/be/src/util/openssl-util.h new file mode 100644 index 0000000..b214ee9 --- /dev/null +++ b/be/src/util/openssl-util.h @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef IMPALA_UTIL_OPENSSL_UTIL_H +#define IMPALA_UTIL_OPENSSL_UTIL_H + +#include <openssl/aes.h> +#include <openssl/sha.h> + +#include "common/status.h" + +namespace impala { + +/// Add entropy from the system RNG to OpenSSL's global RNG. Called at system startup +/// and again periodically to add new entropy. +void SeedOpenSSLRNG(); + +/// The hash of a data buffer used for checking integrity. A SHA256 hash is used +/// internally. +class IntegrityHash { + public: + /// Computes the hash of the data in a buffer and stores it in this object. + void Compute(const uint8_t* data, int64_t len); + + /// Verify that the data in a buffer matches this hash. Returns true on match, false + /// otherwise. + bool Verify(const uint8_t* data, int64_t len) const; + + private: + uint8_t hash_[SHA256_DIGEST_LENGTH]; +}; + +/// The key and initialization vector (IV) required to encrypt and decrypt a buffer of +/// data. This should be regenerated for each buffer of data. +/// +/// We use AES with a 256-bit key and CFB cipher block mode, which gives us a stream +/// cipher that can support arbitrary-length ciphertexts. The IV is used as an input to +/// the cipher as the "block to supply before the first block of plaintext". This is +/// required because all ciphers (except the weak ECB) are built such that each block +/// depends on the output from the previous block. Since the first block doesn't have +/// a previous block, we supply this IV. Think of it as starting off the chain of +/// encryption. +class EncryptionKey { + public: + EncryptionKey() : initialized_(false) {} + + /// Initialize a key for temporary use with randomly generated data. Reinitializes with + /// new random values if the key was already initialized. We use AES-CFB mode so key/IV + /// pairs should not be reused. This function automatically reseeds the RNG + /// periodically, so callers do not need to do it. + void InitializeRandom(); + + /// Encrypts a buffer of input data 'data' of length 'len' into an output buffer 'out'. + /// Exactly 'len' bytes will be written to 'out'. This key must be initialized before + /// calling. Operates in-place if 'in' == 'out', otherwise the buffers must not overlap. + Status Encrypt(const uint8_t* data, int64_t len, uint8_t* out) const; + + /// Decrypts a buffer of input data 'data' of length 'len' that was encrypted with this + /// key into an output buffer 'out'. Exactly 'len' bytes will be written to 'out'. + /// This key must be initialized before calling. Operates in-place if 'in' == 'out', + /// otherwise the buffers must not overlap. + Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const; + + private: + /// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively. + /// A buffer of input data 'data' of length 'len' is encrypted/decrypted with this + /// key into an output buffer 'out'. Exactly 'len' bytes will be written to 'out'. + /// This key must be initialized before calling. Operates in-place if 'in' == 'out', + /// otherwise the buffers must not overlap. + Status EncryptInternal( + bool encrypt, const uint8_t* data, int64_t len, uint8_t* out) const; + + /// Track whether this key has been initialized, to avoid accidentally using + /// uninitialized keys. + bool initialized_; + + /// An AES 256-bit key. + uint8_t key_[32]; + + /// An initialization vector to feed as the first block to AES. + uint8_t iv_[AES_BLOCK_SIZE]; +}; +} + +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/redactor-config-parser-test.cc ---------------------------------------------------------------------- diff --git a/be/src/util/redactor-config-parser-test.cc b/be/src/util/redactor-config-parser-test.cc index 582169a..bcc5ddf 100644 --- a/be/src/util/redactor-config-parser-test.cc +++ b/be/src/util/redactor-config-parser-test.cc @@ -20,10 +20,9 @@ #include "redactor-test-utils.h" #include "testutil/gtest-util.h" -namespace impala { +#include "common/names.h" -using std::string; -using strings::Substitute; +namespace impala { extern std::vector<Rule>* g_rules; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/757c68b2/be/src/util/redactor.cc ---------------------------------------------------------------------- diff --git a/be/src/util/redactor.cc b/be/src/util/redactor.cc index fae1549..77db8b7 100644 --- a/be/src/util/redactor.cc +++ b/be/src/util/redactor.cc @@ -35,17 +35,12 @@ #include "common/logging.h" +#include "common/names.h" + namespace impala { using rapidjson::Document; using rapidjson::Value; -using std::endl; -using std::map; -using std::ostream; -using std::ostringstream; -using std::string; -using std::vector; -using strings::Substitute; typedef re2::RE2 Regex;
