Alexey Serbin created KUDU-3570:
-----------------------------------
Summary: Use-after-free and data race in MajorDeltaCompactionOp
when AlterTablet is running
Key: KUDU-3570
URL: https://issues.apache.org/jira/browse/KUDU-3570
Project: Kudu
Issue Type: Bug
Components: tserver
Reporter: Alexey Serbin
Running {{alter_table-randomized-test}} under TSAN produced heap-use-after-free
and data race warnings like below, indicating corresponding conditions might
hit when a major delta compaction (MajorDeltaCompactionOp) maintenance
operation is run when a table being altered.
In addition to TSAN warnings, running the {{alter_table-randomized-test}} for
DEBUG/ASAN/TSAN builds would crash with SIGABRT and fatal messages like below
due to a triggered DCHECK constraint. In RELEASE build that condition might
lead to a silent data corruption or a crash.
DCHECK triggers a crash with SIGABRT with funny size numbers:
{noformat}
F20240426 14:25:15.006683 245509 schema.h:584] Check failed: cols_.size() ==
name_to_index_.size() (5270498306772959232 vs. 643461730718517486)
*** Check failure stack trace: ***
@ 0x7f2006677390 google::LogMessage::Flush()
@ 0x7f200667c4cb google::LogMessageFatal::~LogMessageFatal()
@ 0x4eefff kudu::Schema::num_columns()
@ 0x7f200dd18529 kudu::tablet::DeltaPreparer<>::Start()
@ 0x7f200dcde94f kudu::tablet::DeltaFileIterator<>::PrepareBatch()
@ 0x7f200dd07d81 kudu::tablet::DeltaIteratorMerger::PrepareBatch()
@ 0x7f200dd012b1
kudu::tablet::MajorDeltaCompaction::FlushRowSetAndDeltas()
@ 0x7f200dd02fa1 kudu::tablet::MajorDeltaCompaction::Compact()
@ 0x7f200dc1f85d
kudu::tablet::DiskRowSet::MajorCompactDeltaStoresWithColumnIds()
@ 0x7f200dc1f504 kudu::tablet::DiskRowSet::MajorCompactDeltaStores()
@ 0x7f200dae1cc3 kudu::tablet::Tablet::CompactWorstDeltas()
@ 0x7f200db74cd7 kudu::tablet::MajorDeltaCompactionOp::Perform()
@ 0x7f2007498827 kudu::MaintenanceManager::LaunchOp()
@ 0x7f200749c773
kudu::MaintenanceManager::RunSchedulerThread()::$_3::operator()()
{noformat}
TSAN warning on use-after-free:
{noformat}
WARNING: ThreadSanitizer: heap-use-after-free (pid=3392364)
Read of size 8 at 0x7b4400100060 by thread T23 (mutexes: write
M236855935862339888, write M206456689917755456):
#0 std::__1::vector<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema> >::size() const
thirdparty/installed/tsan/include/c++/v1/vector:658:46 (kudu+0x4ee25b)
#1 kudu::Schema::num_columns() const src/kudu/common/schema.h:584:5
(kudu+0x4eef50)
#2
kudu::tablet::DeltaPreparer<kudu::tablet::DeltaFilePreparerTraits<(kudu::tablet::DeltaType)0>
>::Start(unsigned long, int) src/kudu/tablet/delta_store.cc:204:46
(libtablet.so+0x578488)
#3
kudu::tablet::DeltaFileIterator<(kudu::tablet::DeltaType)0>::PrepareBatch(unsigned
long, int) src/kudu/tablet/deltafile.cc:608:13 (libtablet.so+0x53e8ae)
#4 kudu::tablet::DeltaIteratorMerger::PrepareBatch(unsigned long, int)
src/kudu/tablet/delta_iterator_merger.cc:66:5 (libtablet.so+0x567ce0)
#5
kudu::tablet::MajorDeltaCompaction::FlushRowSetAndDeltas(kudu::fs::IOContext
const*) src/kudu/tablet/delta_compaction.cc:155:5 (libtablet.so+0x561210)
#6 kudu::tablet::MajorDeltaCompaction::Compact(kudu::fs::IOContext const*)
src/kudu/tablet/delta_compaction.cc:340:3 (libtablet.so+0x562f00)
#7
kudu::tablet::DiskRowSet::MajorCompactDeltaStoresWithColumnIds(std::__1::vector<kudu::ColumnId,
std::__1::allocator<kudu::ColumnId> > const&, kudu::fs::IOContext const*,
kudu::tablet::HistoryGcOpts) src/kudu/tablet/diskrowset.cc:588:3
(libtablet.so+0x47f7bc)
#8 kudu::tablet::DiskRowSet::MajorCompactDeltaStores(kudu::fs::IOContext
const*, kudu::tablet::HistoryGcOpts) src/kudu/tablet/diskrowset.cc:572:10
(libtablet.so+0x47f463)
#9
kudu::tablet::Tablet::CompactWorstDeltas(kudu::tablet::RowSet::DeltaCompactionType)
src/kudu/tablet/tablet.cc:2881:5 (libtablet.so+0x341c92)
#10 kudu::tablet::MajorDeltaCompactionOp::Perform()
src/kudu/tablet/tablet_mm_ops.cc:364:3 (libtablet.so+0x3d4ca6)
#11 kudu::MaintenanceManager::LaunchOp(kudu::MaintenanceOp*)
src/kudu/util/maintenance_manager.cc:640:9 (libkudu_util.so+0x38c826)
#12 kudu::MaintenanceManager::RunSchedulerThread()::$_3::operator()() const
src/kudu/util/maintenance_manager.cc:422:5 (libkudu_util.so+0x390772)
...
Previous write of size 8 at 0x7b4400100060 by thread T158:
#0 operator delete(void*)
thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_new_delete.cpp:126
(kudu+0x4dd4e9)
#1 std::__1::_DeallocateCaller::__do_call(void*)
thirdparty/installed/tsan/include/c++/v1/new:334:12 (kudu+0x4e9389)
#2 std::__1::_DeallocateCaller::__do_deallocate_handle_size(void*, unsigned
long) thirdparty/installed/tsan/include/c++/v1/new:292:12 (kudu+0x4e9329)
#3 std::__1::_DeallocateCaller::__do_deallocate_handle_size_align(void*,
unsigned long, unsigned long)
thirdparty/installed/tsan/include/c++/v1/new:268:14 (libtablet.so+0x35fe42)
#4 std::__1::__libcpp_deallocate(void*, unsigned long, unsigned long)
thirdparty/installed/tsan/include/c++/v1/new:340:3 (libtablet.so+0x35fdd9)
#5 std::__1::allocator<std::__1::__shared_ptr_emplace<kudu::Schema,
std::__1::allocator<kudu::Schema> >
>::deallocate(std::__1::__shared_ptr_emplace<kudu::Schema,
std::__1::allocator<kudu::Schema> >*, unsigned long)
thirdparty/installed/tsan/include/c++/v1/memory:1798:10 (libtablet.so+0x39e68d)
#6 std::__1::__shared_ptr_emplace<kudu::Schema,
std::__1::allocator<kudu::Schema> >::__on_zero_shared_weak()
thirdparty/installed/tsan/include/c++/v1/memory:3515:9 (libtablet.so+0x39e210)
#7 std::__1::__shared_weak_count::__release_weak()
thirdparty/src/llvm-11.0.0.src/projects/libcxx/src/memory.cpp
(libc++.so.1+0xc1ec6)
#8 std::__1::__shared_weak_count::__release_shared()
thirdparty/installed/tsan/include/c++/v1/memory:3384:9 (kudu+0x4ee635)
#9 std::__1::shared_ptr<kudu::Schema>::~shared_ptr()
thirdparty/installed/tsan/include/c++/v1/memory:4098:19 (kudu+0x549d68)
#10
kudu::tablet::TabletMetadata::SetSchema(std::__1::shared_ptr<kudu::Schema>
const&, unsigned int) src/kudu/tablet/tablet_metadata.cc:957:1
(libtablet.so+0x4eccb2)
#11 kudu::tablet::Tablet::AlterSchema(kudu::tablet::AlterSchemaOpState*)
src/kudu/tablet/tablet.cc:1727:14 (libtablet.so+0x33b66a)
#12 kudu::tablet::AlterSchemaOp::Apply(kudu::consensus::CommitMsg**)
src/kudu/tablet/ops/alter_schema_op.cc:127:3 (libtablet.so+0x415858)
...
SUMMARY: ThreadSanitizer: heap-use-after-free
thirdparty/installed/tsan/include/c++/v1/vector:658:46 in
std::__1::vector<kudu::ColumnSchema, std::__1::allocator<kudu::ColumnSchema>
>::size() const
{noformat}
TSAN warning on a data race:
{noformat}
WARNING: ThreadSanitizer: data race (pid=205486)
Write of size 8 at 0x7b440011ad20 by thread T131:
#0 std::__1::__vector_base<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema>
>::__destruct_at_end(kudu::ColumnSchema*)
thirdparty/installed/tsan/include/c++/v1/vector:429:12 (kudu+0x4ee4c0)
#1 std::__1::__vector_base<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema> >::clear()
thirdparty/installed/tsan/include/c++/v1/vector:371:29 (kudu+0x4ee3d4)
#2 std::__1::__vector_base<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema> >::~__vector_base()
thirdparty/installed/tsan/include/c++/v1/vector:465:9 (kudu+0x4ee16b)
#3 std::__1::vector<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema> >::~vector()
thirdparty/installed/tsan/include/c++/v1/vector:557:5 (kudu+0x4eac41)
#4 kudu::Schema::~Schema() src/kudu/common/schema.h:491:7 (kudu+0x4e5da0)
#5 std::__1::__shared_ptr_emplace<kudu::Schema,
std::__1::allocator<kudu::Schema> >::__on_zero_shared()
thirdparty/installed/tsan/include/c++/v1/memory:3503:23 (libtablet.so+0x39e1d5)
#6 std::__1::__shared_count::__release_shared()
thirdparty/installed/tsan/include/c++/v1/memory:3341:9 (kudu+0x4ee685)
#7 std::__1::__shared_weak_count::__release_shared()
thirdparty/installed/tsan/include/c++/v1/memory:3383:27 (kudu+0x4ee629)
#8 std::__1::shared_ptr<kudu::Schema>::~shared_ptr()
thirdparty/installed/tsan/include/c++/v1/memory:4098:19 (kudu+0x549d68)
#9
kudu::tablet::TabletMetadata::SetSchema(std::__1::shared_ptr<kudu::Schema>
const&, unsigned int) src/kudu/tablet/tablet_metadata.cc:957:1
(libtablet.so+0x4ecd52)
#10 kudu::tablet::Tablet::AlterSchema(kudu::tablet::AlterSchemaOpState*)
src/kudu/tablet/tablet.cc:1727:14 (libtablet.so+0x33b69a)
#11 kudu::tablet::AlterSchemaOp::Apply(kudu::consensus::CommitMsg**)
src/kudu/tablet/ops/alter_schema_op.cc:127:3 (libtablet.so+0x415888)
...
Previous read of size 8 at 0x7b440011ad20 by thread T19 (mutexes: write
M153539342756080912, write M120888296998031872):
#0 std::__1::vector<kudu::ColumnSchema,
std::__1::allocator<kudu::ColumnSchema> >::size() const
thirdparty/installed/tsan/include/c++/v1/vector:658:46 (kudu+0x4ee25b)
#1 kudu::Schema::num_columns() const src/kudu/common/schema.h:584:5
(kudu+0x4eef50)
#2
kudu::tablet::DeltaPreparer<kudu::tablet::DeltaFilePreparerTraits<(kudu::tablet::DeltaType)0>
>::Start(unsigned long, int) src/kudu/tablet/delta_store.cc:204:46
(libtablet.so+0x578528)
#3
kudu::tablet::DeltaFileIterator<(kudu::tablet::DeltaType)0>::PrepareBatch(unsigned
long, int) src/kudu/tablet/deltafile.cc:608:13 (libtablet.so+0x53e94e) #4
kudu::tablet::DeltaIteratorMerger::PrepareBatch(unsigned long, int)
src/kudu/tablet/delta_iterator_merger.cc:66:5 (libtablet.so+0x567d80)
#5
kudu::tablet::MajorDeltaCompaction::FlushRowSetAndDeltas(kudu::fs::IOContext
const*) src/kudu/tablet/delta_compaction.cc:155:5 (libtablet.so+0x5612b0)
#6 kudu::tablet::MajorDeltaCompaction::Compact(kudu::fs::IOContext const*)
src/kudu/tablet/delta_compaction.cc:340:3 (libtablet.so+0x562fa0)
#7
kudu::tablet::DiskRowSet::MajorCompactDeltaStoresWithColumnIds(std::__1::vector<kudu::ColumnId,
std::__1::allocator<kudu::ColumnId> > const&, kudu::fs::IOContext const*,
kudu::tablet::HistoryGcOpts) src/kudu/tablet/diskrowset.cc:588:3
(libtablet.so+0x47f85c)
#8 kudu::tablet::DiskRowSet::MajorCompactDeltaStores(kudu::fs::IOContext
const*, kudu::tablet::HistoryGcOpts) src/kudu/tablet/diskrowset.cc:572:10
(libtablet.so+0x47f503)
#9
kudu::tablet::Tablet::CompactWorstDeltas(kudu::tablet::RowSet::DeltaCompactionType)
src/kudu/tablet/tablet.cc:2881:5 (libtablet.so+0x341cc2)
#10 kudu::tablet::MajorDeltaCompactionOp::Perform()
src/kudu/tablet/tablet_mm_ops.cc:364:3 (libtablet.so+0x3d4cd6)
#11 kudu::MaintenanceManager::LaunchOp(kudu::MaintenanceOp*)
src/kudu/util/maintenance_manager.cc:640:9 (libkudu_util.so+0x38c826)
#12 kudu::MaintenanceManager::RunSchedulerThread()::$_3::operator()() const
src/kudu/util/maintenance_manager.cc:422:5 (libkudu_util.so+0x390772)
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)