log block manager: fix when append exceeds preallocated space In commit abea8c6 I added a DCHECK to enforce the assumption that a container's preallocation offset is always ahead of its append offset. This turns out to be wrong when very large values are written, or when the preallocation size is tuned down (as it is in disk_reservation-itest.cc). The effect: disk_reservation-itest.cc failed from time to time because the tserver crashed too early.
So let's throw that away, and let's also adjust the preallocation algorithm slightly: when we preallocate, we take the write offset into account. If it's greater than the preallocation offset, we preallocate again, and we do so from the write offset. The new test crashes 100% of the time without the fix. Change-Id: I99460d35e355abf843c965d3c2b4309cba24eadd Reviewed-on: http://gerrit.cloudera.org:8080/5024 Reviewed-by: Jean-Daniel Cryans <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/af9fc13c Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/af9fc13c Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/af9fc13c Branch: refs/heads/master Commit: af9fc13cfc277faa0078b2502f4baa25fd0d9102 Parents: 388908c Author: Adar Dembo <[email protected]> Authored: Wed Nov 9 14:22:25 2016 -0800 Committer: Adar Dembo <[email protected]> Committed: Wed Nov 9 23:48:59 2016 +0000 ---------------------------------------------------------------------- src/kudu/fs/block_manager-test.cc | 19 +++++++++++++++++++ src/kudu/fs/log_block_manager.cc | 18 ++++++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/af9fc13c/src/kudu/fs/block_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/block_manager-test.cc b/src/kudu/fs/block_manager-test.cc index 9aeb29e..fc65fd1 100644 --- a/src/kudu/fs/block_manager-test.cc +++ b/src/kudu/fs/block_manager-test.cc @@ -1007,6 +1007,25 @@ TEST_F(LogBlockManagerTest, TestMetadataTruncation) { false)); } +// Regression test for a crash when a container's append offset exceeded its +// preallocation offset. +TEST_F(LogBlockManagerTest, TestAppendExceedsPreallocation) { + RETURN_NOT_LOG_BLOCK_MANAGER(); + + FLAGS_log_container_preallocate_bytes = 1; + + // Create a container, preallocate it by one byte, and append more than one. + gscoped_ptr<WritableBlock> writer; + ASSERT_OK(bm_->CreateBlock(&writer)); + ASSERT_OK(writer->Append("hello world")); + ASSERT_OK(writer->Close()); + + // On second append, don't crash just because the append offset is ahead of + // the preallocation offset! + ASSERT_OK(bm_->CreateBlock(&writer)); + ASSERT_OK(writer->Append("hello world")); +} + TYPED_TEST(BlockManagerTest, TestDiskSpaceCheck) { // Reopen the block manager with metrics enabled. MetricRegistry registry; http://git-wip-us.apache.org/repos/asf/kudu/blob/af9fc13c/src/kudu/fs/log_block_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc index 129766b..91609da 100644 --- a/src/kudu/fs/log_block_manager.cc +++ b/src/kudu/fs/log_block_manager.cc @@ -576,8 +576,10 @@ Status LogBlockContainer::AppendData(const Slice& data) { } total_bytes_written_ += data.size(); - if (PREDICT_FALSE(!FLAGS_log_container_preallocate_bytes)) { - // Without preallocation, every call grows the container. + // This append may have changed the container size if: + // 1. It was large enough that it blew out the preallocated space. + // 2. Preallocation was disabled. + if (total_bytes_written_ > preallocated_offset_) { RETURN_NOT_OK(data_dir_->RefreshIsFull(DataDir::RefreshMode::ALWAYS)); } return Status::OK(); @@ -630,12 +632,12 @@ Status LogBlockContainer::EnsurePreallocated(size_t next_append_bytes) { if (!FLAGS_log_container_preallocate_bytes) { return Status::OK(); } - DCHECK_GE(preallocated_offset_, total_bytes_written_); - // If the next write exceeds the remaining preallocated window, we need to - // preallocate another chunk. - if (next_append_bytes > preallocated_offset_ - total_bytes_written_) { - int64_t off = preallocated_offset_; + // If the last write blew out the preallocation window, or if the next write + // exceeds it, we need to preallocate another chunk. + if (total_bytes_written_ > preallocated_offset_ || + next_append_bytes > preallocated_offset_ - total_bytes_written_) { + int64_t off = std::max(preallocated_offset_, total_bytes_written_); int64_t len = FLAGS_log_container_preallocate_bytes; { std::lock_guard<Mutex> l(data_file_lock_); @@ -645,7 +647,7 @@ Status LogBlockContainer::EnsurePreallocated(size_t next_append_bytes) { VLOG(2) << Substitute("Preallocated $0 bytes at offset $1 in container $2", len, off, ToString()); - preallocated_offset_ += len; + preallocated_offset_ = off + len; } return Status::OK();
