[
https://issues.apache.org/jira/browse/IMPALA-7714?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17177287#comment-17177287
]
Tim Armstrong commented on IMPALA-7714:
---------------------------------------
I looked at the stack trace and I don't really understand how we can hit this -
we are calling SetLastTopicVersionProcessed() with an entry from topic_deltas
and it appears to not be finding the topic in the subscribed topics map, but
topic_deltas was constructed by iterating over the subscribed topic maps and we
don't mutate the maps after they are constructed.
I guess one possibility, since we think this is happening as the process is
being torn down, is that it is comparing the topic id to these strings around
the time they're getting destructed and that it's picking out the wrong topic
map.
{noformat}
const string Statestore::IMPALA_MEMBERSHIP_TOPIC("impala-membership");
const string Statestore::IMPALA_REQUEST_QUEUE_TOPIC("impala-request-queue");
{noformat}
> Statestore::Subscriber::SetLastTopicVersionProcessed() crashed in
> AtomicInt64::Store()
> --------------------------------------------------------------------------------------
>
> Key: IMPALA-7714
> URL: https://issues.apache.org/jira/browse/IMPALA-7714
> Project: IMPALA
> Issue Type: Bug
> Components: Distributed Exec
> Affects Versions: Impala 3.1.0
> Reporter: Michael Ho
> Assignee: Tim Armstrong
> Priority: Blocker
> Labels: broken-build
> Fix For: Impala 3.1.0
>
> Attachments: d67bcce1-ebf0-4927-991dc191-612d675d.dmp_dumped,
> dbfd9687-09a9-4ab0-dcd7128b-41a2c5b3.dmp.resolved
>
>
> When running one of the customer cluster tests,
> {{Statestore::Subscriber::SetLastTopicVersionProcessed()}} most likely
> crashed at the following line. It could be a race or something but I didn't
> have time to dig more into it.
> {noformat}
> void Statestore::Subscriber::SetLastTopicVersionProcessed(const TopicId&
> topic_id,
> TopicEntry::Version version) {
> // Safe to call concurrently for different topics because
> 'subscribed_topics' is not
> // modified.
> Topics* subscribed_topics = GetTopicsMapForId(topic_id);
> Topics::iterator topic_it = subscribed_topics->find(topic_id);
> DCHECK(topic_it != subscribed_topics->end());
> topic_it->second.last_version.Store(version); <<-----
> }
> {noformat}
> {noformat}
> Error Message
> Minidump generated:
> /data/jenkins/workspace/impala-asf-master-exhaustive-release/repos/Impala/logs/custom_cluster_tests/minidumps/statestored/336d9ca9-88dc-4360-6a5adf97-936db5c0.dmp
> Standard Error
> Operating system: Linux
> 0.0.0 Linux 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20
> 20:32:50 UTC 2017 x86_64
> CPU: amd64
> family 6 model 85 stepping 4
> 1 CPU
> GPU: UNKNOWN
> Crash reason: SIGSEGV
> Crash address: 0x28
> Process uptime: not available
> Thread 18 (crashed)
> 0
> impalad!impala::Statestore::Subscriber::SetLastTopicVersionProcessed(std::string
> const&, long) [atomicops-internals-x86.h : 300 + 0x0]
> rax = 0x0000000000000000 rdx = 0xc34174ed00000000
> rcx = 0x0022c65a25a97b5b rbx = 0x0000000004624e38
> rsi = 0x0000000000000070 rdi = 0x0000000004906a79
> rbp = 0x00007fd582d81320 rsp = 0x00007fd582d812e0
> r8 = 0x000000009e3779b9 r9 = 0x0000000000000000
> r10 = 0x0000000000000000 r11 = 0x00007fd58da31a90
> r12 = 0x83bfbe948682e9da r13 = 0x0000000004593e20
> r14 = 0x000000000000000f r15 = 0x000000000000000a
> rip = 0x0000000001022a65
> Found by: given as instruction pointer in context
> 1
> impalad!impala::Statestore::SendTopicUpdate(impala::Statestore::Subscriber*,
> impala::Statestore::UpdateKind, bool*) [statestore.cc : 704 + 0x12]
> rbx = 0x00007fd582d813d0 rbp = 0x00007fd582d81580
> rsp = 0x00007fd582d81330 r12 = 0x0000000004593e00
> r13 = 0x0000000004624dd0 r14 = 0x00007fd582d81508
> r15 = 0x00007fd582d814f0 rip = 0x00000000010283da
> Found by: call frame info
> 2
> impalad!impala::Statestore::DoSubscriberUpdate(impala::Statestore::UpdateKind,
> int, impala::Statestore::ScheduledSubscriberUpdate const&) [statestore.cc :
> 933 + 0x23]
> rbx = 0x0000000000000000 rbp = 0x00007fd582d817d0
> rsp = 0x00007fd582d81590 r12 = 0x00007fd582d81840
> r13 = 0x20c49ba5e353f7cf r14 = 0x000001667beb277f
> r15 = 0x00007ffc38ca1080 rip = 0x0000000001029064
> Found by: call frame info
> 3
> impalad!impala::ThreadPool<impala::Statestore::ScheduledSubscriberUpdate>::WorkerThread(int)
> [function_template.hpp : 767 + 0x10]
> rbx = 0x00007ffc38ca1500 rbp = 0x00007fd582d818a0
> rsp = 0x00007fd582d817e0 r12 = 0x00007ffc38ca1720
> r13 = 0x00007fd582d81830 r14 = 0x00007fd582d81840
> r15 = 0x0000000000000000 rip = 0x0000000001030bdc
> Found by: call frame info
> 4 impalad!impala::Thread::SuperviseThread(std::string const&, std::string
> const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
> impala::Promise<long, (impala::PromiseMode)0>*) [function_template.hpp : 767
> + 0x7]
> rbx = 0x00007fd582d81980 rbp = 0x00007fd582d81bf0
> rsp = 0x00007fd582d818b0 r12 = 0x0000000000000000
> r13 = 0x0000000004658300 r14 = 0x00007fd58e6af6a0
> r15 = 0x00007ffc38ca07a0 rip = 0x00000000010fec72
> Found by: call frame info
> 5 impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void
> (*)(std::string const&, std::string const&, boost::function<void ()>,
> impala::ThreadDebugInfo const*, impala::Promise<long,
> (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>,
> boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
> boost::_bi::value<impala::ThreadDebugInfo*>,
> boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >
> >::run() [bind.hpp : 525 + 0x6]
> rbx = 0x00000000045f0600 rbp = 0x00007fd582d81c50
> rsp = 0x00007fd582d81c00 r12 = 0x00007fd582d81c10
> r13 = 0x00000000010fe980 r14 = 0x00007fd582d82700
> r15 = 0x00007fd58e6af6a0 rip = 0x00000000010ff7ba
> Found by: call frame info
> 6 impalad!thread_proxy + 0xda
> rbx = 0x0000000000000000 rbp = 0x0000000000000000
> rsp = 0x00007fd582d81c60 r12 = 0x0000000000000000
> r13 = 0x00007fd582d829c0 r14 = 0x00007fd582d82700
> r15 = 0x00007fd58e6af6a0 rip = 0x00000000016a06fa
> Found by: call frame info
> 7 libpthread-2.17.so + 0x7e25
> rbx = 0x0000000000000000 rbp = 0x0000000000000000
> rsp = 0x00007fd582d81ca0 r12 = 0x0000000000000000
> r13 = 0x00007fd582d829c0 r14 = 0x00007fd582d82700
> r15 = 0x00007fd58e6af6a0 rip = 0x00007fd58dc78e25
> Found by: call frame info
> 8 libc-2.17.so + 0xf834d
> rsp = 0x00007fd582d81d40 rip = 0x00007fd58d9a634d
> Found by: stack scanning
> Thread 0
> 0 libjvm.so + 0xa7aa0f
> rax = 0x00007fd5910e94c0 rdx = 0x00007fd590c049f0
> rcx = 0x0000000000000003 rbx = 0x00007fd591169f50
> rsi = 0x0000000000000000 rdi = 0x00007fd591169ee0
> rbp = 0x00007ffc38c9fbb0 rsp = 0x00007ffc38c9fba0
> r8 = 0x0000000000030878 r9 = 0x0000000003ddd000
> r10 = 0x00007ffc38c9efa0 r11 = 0x00000000028d1ab0
> r12 = 0x00000000045b4d10 r13 = 0x0000000000000000
> r14 = 0x00000000045b4d00 r15 = 0x00000000000007f1
> rip = 0x00007fd590c04a0f
> Found by: given as instruction pointer in context
> 1 libc-2.17.so + 0x38dda
> rsp = 0x00007ffc38c9fbc0 rip = 0x00007fd58d8e6dda
> Found by: stack scanning
> 2 libjvm.so + 0x220066
> rsp = 0x00007ffc38c9fc00 rip = 0x00007fd5903aa066
> Found by: stack scanning
> 3 libjvm.so + 0xafae51
> rsp = 0x00007ffc38c9fc20 rip = 0x00007fd590c84e51
> Found by: stack scanning
> 4 ld-2.17.so + 0xfb58
> rsp = 0x00007ffc38c9fc30 rip = 0x00007fd5915b0b58
> Found by: stack scanning
> 5 ld-2.17.so + 0xf9fd
> rsp = 0x00007ffc38c9fd50 rip = 0x00007fd5915b09fd
> Found by: stack scanning
> 6 libc-2.17.so + 0x38a69
> rsp = 0x00007ffc38c9fdc0 rip = 0x00007fd58d8e6a69
> Found by: stack scanning
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]