Ashwani Raina created KUDU-3544:
-----------------------------------
Summary: CLONE - Investigate Kudu client behavior for read/scan
when metacache tablet entry becomes invalid
Key: KUDU-3544
URL: https://issues.apache.org/jira/browse/KUDU-3544
Project: Kudu
Issue Type: Bug
Components: client
Affects Versions: 1.17.0
Reporter: Ashwani Raina
Assignee: Ashwani Raina
This Jira ticket has been raised to track and investigate whether there is a
need to fix known impala crash issue (due to infinite recursion) in case of
read/scan operations.
For more details, refer to [https://gerrit.cloudera.org/#/c/20270/8//COMMIT_MSG]
and https://issues.apache.org/jira/browse/KUDU-3461
Above is the fix for handling such a scenario when impala shell is used to
write to a tablet that has a stale entry in client metacache.
Same investigation needs to be done for scan operation, and if required, fix it
as part of this jira.
In an Impala cluster, we ran into a scenario that causes Impala to crash with a
SIGSEGV. When reproducing while running in gdb, we see the stack get blown out
with this recursion:
{noformat}
#0 0x00007f983e031a1c in clock_gettime ()
#1 0x00007f983bfda0b5 in __GI___clock_gettime (clock_id=clock_id@entry=1,
tp=0x7f967bd8b070) at ../sysdeps/unix/sysv/linux/clock_gettime.c:38
#2 0x00007f983c9f8e48 in kudu::Stopwatch::GetTimes (times=0x7f967bd8b1b0,
this=<optimized out>, this=<optimized out>) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:294
#3 0x00007f983ca09829 in kudu::Stopwatch::stop (this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:218
#4 kudu::Stopwatch::stop (this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:213
#5 kudu::sw_internal::LogTiming::Print (max_expected_millis=50,
this=0x7f967bd8b320) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:359
#6 kudu::sw_internal::LogTiming::~LogTiming (this=0x7f967bd8b320,
__in_chrg=<optimized out>) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:329
#7 0x00007f983c9fe32c in
kudu::client::internal::MetaCache::LookupEntryByKeyFastPath (this=<optimized
out>, table=<optimized out>, partition_key=..., entry=0x7f967bd8b4c0) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/locks.h:99
#8 0x00007f983c9fe656 in kudu::client::internal::MetaCache::DoFastPathLookup
(this=0xde431e0, table=0xf899300, partition_key=0x7f967bd8b700,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1243
#9 0x00007f983ca05731 in
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1405
#10 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#11 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8b8c0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#12
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#13 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#14 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bad0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#15
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#16 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#17 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bce0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#18
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#19 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#20 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8bef0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#21
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#22 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#23 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8c100) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#24
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#25 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#26 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967bd8c310) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#27
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
... continues ...
#47617 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47618 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967c589290) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47619
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47620 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
--Type <RET> for more, q to quit, c to continue without paging--
#47621 0x00007f983ca0575f in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=...,
this=0x7f967c5894a0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47622
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable
const*, kudu::PartitionKey, kudu::MonoTime const&,
kudu::client::internal::MetaCache::LookupType,
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300,
partition_key=..., deadline=...,
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint,
remote_tablet=0x0, callback=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47623 0x00007f983ca0598c in
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&,
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47624 0x00007f983ca066a7 in std::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0xca50918)
at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617#47625
kudu::client::internal::LookupRpc::SendRpcCb (this=0xca50800, status=...) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:966
#47626 0x00007f983c9db65c in
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}::operator()()
const (this=<optimized out>, this=<optimized out>)
at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/status.h:230#47627
std::__invoke_impl<void,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(std::__invoke_other,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)
(__f=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:60
#47628 std::__invoke_r<void,
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(void&&,
(kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)...)
(__fn=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:110
#47629 std::_Function_handler<void (),
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}>::_M_invoke(std::_Any_data
const&) (__functor=...)
at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:291
#47630 0x00007f983cac860b in std::function<void ()>::operator()() const
(this=0xee3f9c0) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47631 kudu::rpc::OutboundCall::CallCallback (this=0xee3f840) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/outbound_call.cc:309
#47632 0x00007f983cabb763 in kudu::rpc::Connection::HandleCallResponse
(this=0xcd00700, transfer=...) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172
#47633 0x00007f983cabc215 in kudu::rpc::Connection::ReadHandler
(this=0xcd00700, watcher=..., revents=<optimized out>) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172#47634
0x00007f983cdb3ffb in ev_invoke_pending (loop=0xcc99b00) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3155
#47635 0x00007f983ca97cc8 in kudu::rpc::ReactorThread::InvokePendingCb
(loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:202
#47636 0x00007f983cdb73f7 in ev_run (flags=0, loop=0xcc99b00) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3555
#47637 ev_run (loop=0xcc99b00, flags=0) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3402
#47638 0x00007f983ca98bd9 in ev::loop_ref::run (flags=0, this=0xef75be0) at
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/installed/uninstrumented/include/ev++.h:211#47639
kudu::rpc::ReactorThread::RunThread (this=0xef75bd8) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:503
#47640 0x00007f983cc2d36c in std::function<void ()>::operator()() const
(this=0xec68358) at
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47641 kudu::Thread::SuperviseThread (arg=0xec68300) at
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/thread.cc:691
#47642 0x00007f983dfec609 in start_thread (arg=<optimized out>) at
pthread_create.c:477
#47643 0x00007f983c01c133 in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95{noformat}
It hits a SIGSEGV because the stack gets blown out.
Here are the steps to reproduce it from Impala:
{noformat}
/** 1. Create table **/
drop table if exists impala_crash;
create table if not exists impala_crash (
dt string,
col string,
primary key(dt)
)
partition by range(dt) (
partition values <= '00000000'
)
stored as kudu;/** 2. alter and insert **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');
/* normal *//** 3. Run the same queries again and impala daemon crashes **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)