IMPALA-4842: BufferedBlockMgrTest.WriteError is flaky The test should allow Unpin() to fail with a scratch allocation error to handle the case where the first write fails and blacklists the scratch disk around the same time that the second write starts. Usually either the second write succeeds because it started before the first write failed or it fails with CANCELLED because the BufferedBlockMgr::is_cancelled_ flag is set. There is a small window for a race after the disk is blacklisted in TmpFileMgr but before BufferedBlockMgr::WriteComplete() is called.
Testing: I was able to reproduce the problem locally by adding some delays to the test. I added a variant of the WriteError test that more reliably reproduces the bug. Ran both WriteError tests in a loop locally to try to flush out flakiness. Change-Id: I9878d7000b03a64ee06c2088a8c30e318fe1d2a3 Reviewed-on: http://gerrit.cloudera.org:8080/5940 Tested-by: Impala Public Jenkins Reviewed-by: Michael Ho <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1335af36 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1335af36 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1335af36 Branch: refs/heads/master Commit: 1335af3684ec54b63a9602816b2cda776b40ce2f Parents: 77208d3 Author: Tim Armstrong <[email protected]> Authored: Tue Feb 7 17:47:10 2017 -0800 Committer: Dan Hecht <[email protected]> Committed: Fri Feb 10 01:28:16 2017 +0000 ---------------------------------------------------------------------- be/src/runtime/buffered-block-mgr-test.cc | 52 ++++++++++++++++++++------ be/src/runtime/tmp-file-mgr.cc | 7 +--- common/thrift/generate_error_codes.py | 6 +++ 3 files changed, 47 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1335af36/be/src/runtime/buffered-block-mgr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/buffered-block-mgr-test.cc b/be/src/runtime/buffered-block-mgr-test.cc index 188d9e9..48b2f6a 100644 --- a/be/src/runtime/buffered-block-mgr-test.cc +++ b/be/src/runtime/buffered-block-mgr-test.cc @@ -216,16 +216,27 @@ class BufferedBlockMgrTest : public ::testing::Test { } // Pin all blocks. By default, expect no errors from Unpin() calls. If - // expect_cancelled is true, returning cancelled is allowed. + // expected_error_codes is non-NULL, returning one of the error codes is + // also allowed. void UnpinBlocks(const vector<BufferedBlockMgr::Block*>& blocks, - bool expect_cancelled = false) { + const vector<TErrorCode::type>* expected_error_codes = nullptr, + int delay_between_unpins_ms = 0) { for (int i = 0; i < blocks.size(); ++i) { Status status = blocks[i]->Unpin(); - if (expect_cancelled) { - ASSERT_TRUE(status.ok() || status.IsCancelled()) << status.msg().msg(); + if (!status.ok() && expected_error_codes != nullptr) { + // Check if it's one of the expected errors. + bool is_expected_error = false; + for (TErrorCode::type code : *expected_error_codes) { + if (status.code() == code) { + is_expected_error = true; + break; + } + } + ASSERT_TRUE(is_expected_error) << status.msg().msg(); } else { ASSERT_TRUE(status.ok()) << status.msg().msg(); } + if (delay_between_unpins_ms > 0) SleepForMs(delay_between_unpins_ms); } } @@ -581,6 +592,8 @@ class BufferedBlockMgrTest : public ::testing::Test { /// before destroying block mgr. void TestRuntimeStateTeardown(bool write_error, bool wait_for_writes); + void TestWriteError(int write_delay_ms); + scoped_ptr<TestEnv> test_env_; ObjectPool pool_; vector<string> created_tmp_dirs_; @@ -916,7 +929,8 @@ void BufferedBlockMgrTest::TestRuntimeStateTeardown( // Unpin will initiate writes. If the write error propagates fast enough, some Unpin() // calls may see a cancelled block mgr. - UnpinBlocks(blocks, write_error); + vector<TErrorCode::type> cancelled_code = {TErrorCode::CANCELLED}; + UnpinBlocks(blocks, write_error ? &cancelled_code : nullptr); // Tear down while writes are in flight. The block mgr may outlive the runtime state // because it may be referenced by other runtime states. This test simulates this @@ -982,12 +996,13 @@ static int remove_scratch_perms() { // Test that the block manager behaves correctly after a write error. Delete the scratch // directory before an operation that would cause a write and test that subsequent API // calls return 'CANCELLED' correctly. -TEST_F(BufferedBlockMgrTest, WriteError) { +void BufferedBlockMgrTest::TestWriteError(int write_delay_ms) { int max_num_buffers = 2; const int block_size = 1024; BufferedBlockMgr* block_mgr; BufferedBlockMgr::Client* client; block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, false, &client); + block_mgr->set_debug_write_delay_ms(write_delay_ms); vector<BufferedBlockMgr::Block*> blocks; AllocateBlocks(block_mgr, client, max_num_buffers, &blocks); @@ -999,7 +1014,11 @@ TEST_F(BufferedBlockMgrTest, WriteError) { // Remove the backing storage so that future writes will fail int num_files = remove_scratch_perms(); ASSERT_GT(num_files, 0); - UnpinBlocks(blocks, true); + vector<TErrorCode::type> expected_error_codes = {TErrorCode::CANCELLED, + TErrorCode::SCRATCH_ALLOCATION_FAILED}; + // Give the first write a chance to fail before the second write starts. + int interval_ms = 10; + UnpinBlocks(blocks, &expected_error_codes, interval_ms); WaitForWrites(block_mgr); // Subsequent calls should fail. DeleteBlocks(blocks); @@ -1010,6 +1029,16 @@ TEST_F(BufferedBlockMgrTest, WriteError) { TearDownMgrs(); } +TEST_F(BufferedBlockMgrTest, WriteError) { + TestWriteError(0); +} + +// Regression test for IMPALA-4842 - inject a delay in the write to +// reproduce the issue. +TEST_F(BufferedBlockMgrTest, WriteErrorWriteDelay) { + TestWriteError(100); +} + // Test block manager error handling when temporary file space cannot be allocated to // back an unpinned buffer. TEST_F(BufferedBlockMgrTest, TmpFileAllocateError) { @@ -1216,11 +1245,10 @@ TEST_F(BufferedBlockMgrTest, NoDirsAllocationError) { stringstream error_string; PrintErrorMap(&error_string, error_log); LOG(INFO) << "Errors: " << error_string.str(); - ASSERT_NE( - string::npos, error_string.str().find("No usable scratch files: space could " - "not be allocated in any of the configured " - "scratch directories (--scratch_dirs)")) - << error_string.str(); + // SCRATCH_ALLOCATION_FAILED error should exist in the error log. + ErrorLogMap::const_iterator it = error_log.find(TErrorCode::SCRATCH_ALLOCATION_FAILED); + ASSERT_NE(it, error_log.end()); + ASSERT_GT(it->second.count, 0); DeleteBlocks(blocks); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1335af36/be/src/runtime/tmp-file-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc index 0416597..c939d88 100644 --- a/be/src/runtime/tmp-file-mgr.cc +++ b/be/src/runtime/tmp-file-mgr.cc @@ -337,12 +337,7 @@ Status TmpFileMgr::FileGroup::AllocateSpace( << ". Will try another scratch file."; scratch_errors_.push_back(status); } - // TODO: IMPALA-4697: the merged errors do not show up in the query error log, - // so we must point users to the impalad error log. - Status err_status( - "No usable scratch files: space could not be allocated in any of " - "the configured scratch directories (--scratch_dirs). See logs for previous " - "errors that may have caused this."); + Status err_status(TErrorCode::SCRATCH_ALLOCATION_FAILED); // Include all previous errors that may have caused the failure. for (Status& err : scratch_errors_) err_status.MergeStatus(err); return err_status; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1335af36/common/thrift/generate_error_codes.py ---------------------------------------------------------------------- diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py index 2e45f2b..fe001e5 100755 --- a/common/thrift/generate_error_codes.py +++ b/common/thrift/generate_error_codes.py @@ -306,6 +306,12 @@ error_codes = ( ("PARQUET_TIMESTAMP_OUT_OF_RANGE", 100, "Parquet file '$0' column '$1' contains an out of range timestamp. " "The valid date range is 1400-01-01..9999-12-31."), + + # TODO: IMPALA-4697: the merged errors do not show up in the query error log, + # so we must point users to the impalad error log. + ("SCRATCH_ALLOCATION_FAILED", 102, "Could not create files in any configured scratch " + "directories (--scratch_dirs). See logs for previous errors that may have prevented " + "creating or writing scratch files."), ) import sys
