Repository: incubator-impala Updated Branches: refs/heads/master dde559b31 -> a4206d3f1
IMPALA-4820: avoid writing unencrypted data during write cancellation The bug was that unencrypted data could be written to disk if the write was cancelled before it completed. This bug was introduced after Impala 2.8.0 was branched in the commit "IMPALA-3202,IMPALA-2079: rework scratch file I/O", so does not appear in any released versions of Impala. The fix is to only start decrypting data after the write is complete. Testing: Added a regression test that reproduced the problem (after adding a delay to the write). Change-Id: I956bae0685e863f30be23634b29aa076394db184 Reviewed-on: http://gerrit.cloudera.org:8080/5788 Tested-by: Impala Public Jenkins Reviewed-by: Tim Armstrong <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/a4206d3f Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/a4206d3f Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/a4206d3f Branch: refs/heads/master Commit: a4206d3f1c5504c430e7d52255c673ae3de646c5 Parents: dde559b Author: Tim Armstrong <[email protected]> Authored: Tue Jan 24 15:41:02 2017 -0800 Committer: Tim Armstrong <[email protected]> Committed: Mon Jan 30 22:06:29 2017 +0000 ---------------------------------------------------------------------- be/src/common/global-flags.cc | 4 ++- be/src/runtime/disk-io-mgr.cc | 9 +++++ be/src/runtime/tmp-file-mgr-test.cc | 58 ++++++++++++++++++++++++++++++++ be/src/runtime/tmp-file-mgr.cc | 7 ++-- 4 files changed, 73 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a4206d3f/be/src/common/global-flags.cc ---------------------------------------------------------------------- diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc index 5a3eff4..cc80011 100644 --- a/be/src/common/global-flags.cc +++ b/be/src/common/global-flags.cc @@ -91,7 +91,7 @@ DEFINE_bool(load_auth_to_local_rules, false, "If true, load auth_to_local config "hadoop.security.auth_to_local and applies them to translate the Kerberos principal " "to its corresponding local user name for authorization."); -// Stress option for testing failed memory allocation. Debug builds only. +// Stress options that are only enabled in debug builds for testing. #ifndef NDEBUG DEFINE_int32(stress_free_pool_alloc, 0, "A stress option which causes memory allocations " "to fail once every n allocations where n is the value of this flag. Effective in " @@ -106,6 +106,8 @@ DEFINE_int32(fault_injection_rpc_delay_ms, 0, "A fault injection option that cau "Effective in debug builds only."); DEFINE_int32(fault_injection_rpc_type, 0, "A fault injection option that specifies " "which rpc call will be injected with the delay. Effective in debug builds only."); +DEFINE_int32(stress_scratch_write_delay_ms, 0, "A stress option which causes writes to " + "scratch files to be to be delayed to simulate slow writes."); #endif // Used for testing the path where the Kudu client is stubbed. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a4206d3f/be/src/runtime/disk-io-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/disk-io-mgr.cc b/be/src/runtime/disk-io-mgr.cc index 3450046..5d9ced1 100644 --- a/be/src/runtime/disk-io-mgr.cc +++ b/be/src/runtime/disk-io-mgr.cc @@ -23,8 +23,12 @@ #include "gutil/bits.h" #include "gutil/strings/substitute.h" #include "util/hdfs-util.h" +#include "util/time.h" DECLARE_bool(disable_mem_pools); +#ifndef NDEBUG +DECLARE_int32(stress_scratch_write_delay_ms); +#endif #include "common/names.h" @@ -1197,6 +1201,11 @@ Status DiskIoMgr::WriteRangeHelper(FILE* file_handle, WriteRange* write_range) { write_range->file_, write_range->offset(), errno, GetStrErrMsg()))); } +#ifndef NDEBUG + if (FLAGS_stress_scratch_write_delay_ms > 0) { + SleepForMs(FLAGS_stress_scratch_write_delay_ms); + } +#endif int64_t bytes_written = fwrite(write_range->data_, 1, write_range->len_, file_handle); if (bytes_written < write_range->len_) { return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a4206d3f/be/src/runtime/tmp-file-mgr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tmp-file-mgr-test.cc b/be/src/runtime/tmp-file-mgr-test.cc index df495a8..59b5af4 100644 --- a/be/src/runtime/tmp-file-mgr-test.cc +++ b/be/src/runtime/tmp-file-mgr-test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include <cstdio> #include <cstdlib> #include <boost/filesystem.hpp> @@ -38,6 +39,11 @@ using boost::filesystem::path; +DECLARE_bool(disk_spill_encryption); +#ifndef NDEBUG +DECLARE_int32(stress_scratch_write_delay_ms); +#endif + namespace impala { class TmpFileMgrTest : public ::testing::Test { @@ -47,6 +53,12 @@ class TmpFileMgrTest : public ::testing::Test { profile_ = obj_pool_.Add(new RuntimeProfile(&obj_pool_, "tmp-file-mgr-test")); test_env_.reset(new TestEnv); cb_counter_ = 0; + + // Reset query options that are modified by tests. + FLAGS_disk_spill_encryption = false; +#ifndef NDEBUG + FLAGS_stress_scratch_write_delay_ms = 0; +#endif } virtual void TearDown() { @@ -436,6 +448,52 @@ TEST_F(TmpFileMgrTest, TestProcessMemLimitExceeded) { file_group.Close(); test_env_->TearDownQueries(); } + +// Regression test for IMPALA-4820 - encrypted data can get written to disk. +TEST_F(TmpFileMgrTest, TestEncryptionDuringCancellation) { + FLAGS_disk_spill_encryption = true; + // A delay is required for this to reproduce the issue, since writes are often buffered + // in memory by the OS. The test should succeed regardless of the delay. +#ifndef NDEBUG + FLAGS_stress_scratch_write_delay_ms = 1000; +#endif + TUniqueId id; + TmpFileMgr::FileGroup file_group(test_env_->tmp_file_mgr(), io_mgr(), profile_, id); + + // Make the data fairly large so that we have a better chance of cancelling while the + // write is in flight + const int DATA_SIZE = 8 * 1024 * 1024; + string data(DATA_SIZE, ' '); + MemRange data_mem_range(reinterpret_cast<uint8_t*>(&data[0]), DATA_SIZE); + + // Write out a string repeatedly. We don't want to see this written unencypted to disk. + string plaintext("the quick brown fox jumped over the lazy dog"); + for (int pos = 0; pos + plaintext.size() < DATA_SIZE; pos += plaintext.size()) { + memcpy(&data[pos], &plaintext[0], plaintext.size()); + } + + // Start a write in flight, which should encrypt the data and write it to disk. + unique_ptr<TmpFileMgr::WriteHandle> handle; + DiskIoMgr::WriteRange::WriteDoneCallback callback = + bind(mem_fn(&TmpFileMgrTest::SignalCallback), this, _1); + ASSERT_OK(file_group.Write(data_mem_range, callback, &handle)); + string file_path = handle->TmpFilePath(); + + // Cancel the write - prior to the IMPALA-4820 fix decryption could race with the write. + ASSERT_OK(file_group.CancelWriteAndRestoreData(move(handle), data_mem_range)); + WaitForCallbacks(1); + + // Read the data from the scratch file and check that the plaintext isn't present. + FILE* file = fopen(file_path.c_str(), "r"); + ASSERT_EQ(DATA_SIZE, fread(&data[0], 1, DATA_SIZE, file)); + for (int pos = 0; pos + plaintext.size() < DATA_SIZE; pos += plaintext.size()) { + ASSERT_NE(0, memcmp(&data[pos], &plaintext[0], plaintext.size())) + << file_path << "@" << pos; + } + fclose(file); + file_group.Close(); + test_env_->TearDownQueries(); +} } int main(int argc, char** argv) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a4206d3f/be/src/runtime/tmp-file-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc index 347db96..0416597 100644 --- a/be/src/runtime/tmp-file-mgr.cc +++ b/be/src/runtime/tmp-file-mgr.cc @@ -418,14 +418,13 @@ Status TmpFileMgr::FileGroup::CancelWriteAndRestoreData( DCHECK_EQ(handle->len(), buffer.len()); handle->Cancel(); - // Decrypt regardless of whether the write is still in flight or not. An in-flight - // write may write bogus data to disk but this lets us get some work done while the - // write is being cancelled. + handle->WaitForWrite(); + // Decrypt after the write is finished, so that we don't accidentally write decrypted + // data to disk. Status status; if (FLAGS_disk_spill_encryption) { status = handle->CheckHashAndDecrypt(buffer); } - handle->WaitForWrite(); RecycleFileRange(move(handle)); return status; }
