Joe McDonnell created KUDU-3461:
-----------------------------------
Summary: Kudu client can blow the stack with infinite recursions
between PickLeader() and LookupTabletByKey()
Key: KUDU-3461
URL: https://issues.apache.org/jira/browse/KUDU-3461
Project: Kudu
Issue Type: Bug
Components: client
Affects Versions: 1.17.0
Reporter: Joe McDonnell
In an Impala cluster, we ran into a scenario that causes Impala to crash with a
SIGSEGV. When reproducing while running in gdb, we see the stack get blown out
with this recursion:
{noformat}
#0 0x00007f983e031a1c in clock_gettime ()
#1 0x00007f983bfda0b5 in __GI___clock_gettime (clock_id=clock_id@entry=1,
tp=0x7f967bd8b070) at ../sysdeps/unix/sysv/linux/clock_gettime.c:38
#2 0x00007f983c9f8e48 in kudu::Stopwatch::GetTimes (times=0x7f967bd8b1b0,
this=<optimized out>, this=<optimized out>) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:294
#3 0x00007f983ca09829 in kudu::Stopwatch::stop (this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:218
#4 kudu::Stopwatch::stop (this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:213
#5 kudu::sw_internal::LogTiming::Print (max_expected_millis=50,
this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:359
#6 kudu::sw_internal::LogTiming::~LogTiming (this=0x7f967bd8b320,
__in_chrg=<optimized out>) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:329
#7 0x00007f983c9fe32c in
kudu::client::internal::MetaCache::LookupEntryByKeyFastPath (this=<optimized
out>, table=<optimized out>, partition_key=..., entry=0x7f967bd8b4c0) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/locks.h:99
#8 0x00007f983c9fe656 in kudu::client::internal::MetaCache::DoFastPathLookup
(this=0xde431e0, table=0xf899300, partition_key=0x7f967bd8b700,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1243
#9 0x00007f983ca05731 in
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1405
#10 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#11 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8b8c0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#12
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#13 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#14 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bad0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#15
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#16 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#17 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bce0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#18
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#19 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#20 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bef0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#21
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#22 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#23 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8c100) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#24
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#25 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#26 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8c310) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#27
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
... continues ...
#47617 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47618 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967c589290) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47619
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47620 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
--Type <RET> for more, q to quit, c to continue without paging--
#47621 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967c5894a0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47622
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47623 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47624 0x00007f983ca066a7 in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0xca50918)
at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617#47625
kudu::client::internal::LookupRpc::SendRpcCb (this=0xca50800, status=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:966
#47626 0x00007f983c9db65c in
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}::operator()()
const (this=<optimized out>, this=<optimized out>)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/status.h:230#47627
std::__invoke_impl<void,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(std::__invoke_other,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)
(__f=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:60
#47628 std::__invoke_r<void,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(void&&,
(kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)...)
(__fn=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:110
#47629 std::_Function_handler<void (),
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}>::_M_invoke(std::_Any_data
const&) (__functor=...)
at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:291
#47630 0x00007f983cac860b in std::function<void ()>::operator()() const
(this=0xee3f9c0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47631 kudu::rpc::OutboundCall::CallCallback (this=0xee3f840) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/outbound_call.cc:309
#47632 0x00007f983cabb763 in kudu::rpc::Connection::HandleCallResponse
(this=0xcd00700, transfer=...) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172
#47633 0x00007f983cabc215 in kudu::rpc::Connection::ReadHandler
(this=0xcd00700, watcher=..., revents=<optimized out>) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172#47634
0x00007f983cdb3ffb in ev_invoke_pending (loop=0xcc99b00) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3155
#47635 0x00007f983ca97cc8 in kudu::rpc::ReactorThread::InvokePendingCb
(loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:202
#47636 0x00007f983cdb73f7 in ev_run (flags=0, loop=0xcc99b00) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3555
#47637 ev_run (loop=0xcc99b00, flags=0) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3402
#47638 0x00007f983ca98bd9 in ev::loop_ref::run (flags=0, this=0xef75be0) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/installed/uninstrumented/include/ev++.h:211#47639
kudu::rpc::ReactorThread::RunThread (this=0xef75bd8) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:503
#47640 0x00007f983cc2d36c in std::function<void ()>::operator()() const
(this=0xec68358) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47641 kudu::Thread::SuperviseThread (arg=0xec68300) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/thread.cc:691
#47642 0x00007f983dfec609 in start_thread (arg=<optimized out>) at
pthread_create.c:477
#47643 0x00007f983c01c133 in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95{noformat}
It hits a SIGSEGV because the stack gets blown out.
Here are the steps to reproduce it from Impala:
{noformat}
/** 1. Create table **/
drop table if exists impala_crash;
create table if not exists impala_crash (
dt string,
col string,
primary key(dt)
)
partition by range(dt) (
partition values <= '00000000'
)
stored as kudu;/** 2. alter and insert **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');
/* normal *//** 3. Run the same queries again and impala daemon crashes **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)