Repository: kudu Updated Branches: refs/heads/master 7034bffc1 -> c658c8696
KUDU-1931: fix SIGSEGV in file cache RandomAccessFile::memory_footprint() TIL that the pointer returned by std::make_shared().get() or *std::make_shared() isn't the base of the underlying heap memory allocation, because the shared_ptr control block _precedes_ the object pointer. This means that kudu_malloc_usable_size(this) is totally unsafe if 'this' was allocated via std::make_shared(). With glibc's malloc, it caused a crash. Thankfully tcmalloc is more forgiving: it finds its allocations' bookkeeping information by converting addresses into their underlying pages, so a crash would only occur if there was a page boundary between the shared_ptr control block and the object's pointer address (I don't even know if this is possible). I found this bug by accident when I ran an in-progress unit test with --block_manager=file. This speaks to our lack of FBM coverage. For now, I've added some coverage but we really should be running at least one end-to-end test on all block managers; I filed KUDU-1932 to track that. Change-Id: I9e57c06dd35252d03729c10ff64fe43d79288513 Reviewed-on: http://gerrit.cloudera.org:8080/6359 Reviewed-by: David Ribeiro Alves <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c658c869 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c658c869 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c658c869 Branch: refs/heads/master Commit: c658c869689b9589e653e8a24568b9f95d6ff65d Parents: 7034bff Author: Adar Dembo <[email protected]> Authored: Sun Mar 12 13:21:01 2017 -0700 Committer: Adar Dembo <[email protected]> Committed: Mon Mar 13 23:32:05 2017 +0000 ---------------------------------------------------------------------- src/kudu/fs/block_manager-test.cc | 4 ++++ src/kudu/util/file_cache-test.cc | 15 +++++++++++++++ src/kudu/util/file_cache.cc | 20 +++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/fs/block_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/block_manager-test.cc b/src/kudu/fs/block_manager-test.cc index a58f27f..600ce05 100644 --- a/src/kudu/fs/block_manager-test.cc +++ b/src/kudu/fs/block_manager-test.cc @@ -435,6 +435,10 @@ TYPED_TEST(BlockManagerTest, EndToEndTest) { ASSERT_OK(read_block->Read(0, test_data.length(), &data, scratch.get())); ASSERT_EQ(test_data, data); + // We don't actually do anything with the result of this call; we just want + // to make sure it doesn't trigger a crash (see KUDU-1931). + LOG(INFO) << "Block memory footprint: " << read_block->memory_footprint(); + // Delete the block. ASSERT_OK(this->bm_->DeleteBlock(written_block->id())); ASSERT_TRUE(this->bm_->OpenBlock(written_block->id(), nullptr) http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/util/file_cache-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/file_cache-test.cc b/src/kudu/util/file_cache-test.cc index 2347ba9..d61a0fd 100644 --- a/src/kudu/util/file_cache-test.cc +++ b/src/kudu/util/file_cache-test.cc @@ -288,4 +288,19 @@ TYPED_TEST(FileCacheTest, TestNoRecursiveDeadlock) { } } +class RandomAccessFileCacheTest : public FileCacheTest<RandomAccessFile> { +}; + +TEST_F(RandomAccessFileCacheTest, TestMemoryFootprintDoesNotCrash) { + const string kFile = this->GetTestPath("foo"); + ASSERT_OK(this->WriteTestFile(kFile, "test data")); + + shared_ptr<RandomAccessFile> f; + ASSERT_OK(this->cache_->OpenExistingFile(kFile, &f)); + + // This used to crash due to a kudu_malloc_usable_size() call on a memory + // address that wasn't the start of an actual heap allocation. + LOG(INFO) << f->memory_footprint(); +} + } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/util/file_cache.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc index 7e3519a..6d4e5d6 100644 --- a/src/kudu/util/file_cache.cc +++ b/src/kudu/util/file_cache.cc @@ -337,7 +337,25 @@ class Descriptor<RandomAccessFile> : public RandomAccessFile { } size_t memory_footprint() const override { - return kudu_malloc_usable_size(this) + + // Normally we would use kudu_malloc_usable_size(this). However, that's + // not safe because 'this' was allocated via std::make_shared(), which + // means it isn't necessarily the base of the memory allocation; it may be + // preceded by the shared_ptr control block. + // + // It doesn't appear possible to get the base of the allocation via any + // shared_ptr APIs, so we'll use sizeof(*this) + 16 instead. The 16 bytes + // represent the shared_ptr control block. Overall the object size is still + // undercounted as it doesn't account for any internal heap fragmentation, + // but at least it's safe. + // + // Some anecdotal memory measurements taken inside gdb: + // - glibc 2.23 malloc_usable_size() on make_shared<FileType>: 88 bytes. + // - tcmalloc malloc_usable_size() on make_shared<FileType>: 96 bytes. + // - sizeof(std::_Sp_counted_base<>) with libstdc++ 5.4: 16 bytes. + // - sizeof(std::__1::__shared_ptr_emplace<>) with libc++ 3.9: 16 bytes. + // - sizeof(*this): 72 bytes. + return sizeof(*this) + + 16 + // shared_ptr control block once_.memory_footprint_excluding_this() + base_.filename().capacity(); }
