log block manager: fix when append exceeds preallocated space

In commit abea8c6 I added a DCHECK to enforce the assumption that a
container's preallocation offset is always ahead of its append offset. This
turns out to be wrong when very large values are written, or when the
preallocation size is tuned down (as it is in disk_reservation-itest.cc).
The effect: disk_reservation-itest.cc failed from time to time because the
tserver crashed too early.

So let's throw that away, and let's also adjust the preallocation algorithm
slightly: when we preallocate, we take the write offset into account. If
it's greater than the preallocation offset, we preallocate again, and we do
so from the write offset.

The new test crashes 100% of the time without the fix.

Change-Id: I99460d35e355abf843c965d3c2b4309cba24eadd
Reviewed-on: http://gerrit.cloudera.org:8080/5024
Reviewed-by: Jean-Daniel Cryans <[email protected]>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/af9fc13c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/af9fc13c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/af9fc13c

Branch: refs/heads/master
Commit: af9fc13cfc277faa0078b2502f4baa25fd0d9102
Parents: 388908c
Author: Adar Dembo <[email protected]>
Authored: Wed Nov 9 14:22:25 2016 -0800
Committer: Adar Dembo <[email protected]>
Committed: Wed Nov 9 23:48:59 2016 +0000

----------------------------------------------------------------------
 src/kudu/fs/block_manager-test.cc | 19 +++++++++++++++++++
 src/kudu/fs/log_block_manager.cc  | 18 ++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/af9fc13c/src/kudu/fs/block_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager-test.cc 
b/src/kudu/fs/block_manager-test.cc
index 9aeb29e..fc65fd1 100644
--- a/src/kudu/fs/block_manager-test.cc
+++ b/src/kudu/fs/block_manager-test.cc
@@ -1007,6 +1007,25 @@ TEST_F(LogBlockManagerTest, TestMetadataTruncation) {
                                      false));
 }
 
+// Regression test for a crash when a container's append offset exceeded its
+// preallocation offset.
+TEST_F(LogBlockManagerTest, TestAppendExceedsPreallocation) {
+  RETURN_NOT_LOG_BLOCK_MANAGER();
+
+  FLAGS_log_container_preallocate_bytes = 1;
+
+  // Create a container, preallocate it by one byte, and append more than one.
+  gscoped_ptr<WritableBlock> writer;
+  ASSERT_OK(bm_->CreateBlock(&writer));
+  ASSERT_OK(writer->Append("hello world"));
+  ASSERT_OK(writer->Close());
+
+  // On second append, don't crash just because the append offset is ahead of
+  // the preallocation offset!
+  ASSERT_OK(bm_->CreateBlock(&writer));
+  ASSERT_OK(writer->Append("hello world"));
+}
+
 TYPED_TEST(BlockManagerTest, TestDiskSpaceCheck) {
   // Reopen the block manager with metrics enabled.
   MetricRegistry registry;

http://git-wip-us.apache.org/repos/asf/kudu/blob/af9fc13c/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 129766b..91609da 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -576,8 +576,10 @@ Status LogBlockContainer::AppendData(const Slice& data) {
   }
   total_bytes_written_ += data.size();
 
-  if (PREDICT_FALSE(!FLAGS_log_container_preallocate_bytes)) {
-    // Without preallocation, every call grows the container.
+  // This append may have changed the container size if:
+  // 1. It was large enough that it blew out the preallocated space.
+  // 2. Preallocation was disabled.
+  if (total_bytes_written_ > preallocated_offset_) {
     RETURN_NOT_OK(data_dir_->RefreshIsFull(DataDir::RefreshMode::ALWAYS));
   }
   return Status::OK();
@@ -630,12 +632,12 @@ Status LogBlockContainer::EnsurePreallocated(size_t 
next_append_bytes) {
   if (!FLAGS_log_container_preallocate_bytes) {
     return Status::OK();
   }
-  DCHECK_GE(preallocated_offset_, total_bytes_written_);
 
-  // If the next write exceeds the remaining preallocated window, we need to
-  // preallocate another chunk.
-  if (next_append_bytes > preallocated_offset_ - total_bytes_written_) {
-    int64_t off = preallocated_offset_;
+  // If the last write blew out the preallocation window, or if the next write
+  // exceeds it, we need to preallocate another chunk.
+  if (total_bytes_written_ > preallocated_offset_ ||
+      next_append_bytes > preallocated_offset_ - total_bytes_written_) {
+    int64_t off = std::max(preallocated_offset_, total_bytes_written_);
     int64_t len = FLAGS_log_container_preallocate_bytes;
     {
       std::lock_guard<Mutex> l(data_file_lock_);
@@ -645,7 +647,7 @@ Status LogBlockContainer::EnsurePreallocated(size_t 
next_append_bytes) {
     VLOG(2) << Substitute("Preallocated $0 bytes at offset $1 in container $2",
                           len, off, ToString());
 
-    preallocated_offset_ += len;
+    preallocated_offset_ = off + len;
   }
 
   return Status::OK();

Reply via email to