Zoram Thanga created IMPALA-6764:
------------------------------------

             Summary: Codegend UnionNode::MaterializeBatch() causes memory 
corruption crash of Impalad
                 Key: IMPALA-6764
                 URL: https://issues.apache.org/jira/browse/IMPALA-6764
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 2.11.0
            Reporter: Zoram Thanga
            Assignee: Zoram Thanga


A CTAS statement involving UNION ALL with LEFT JOIN children is reliably 
crashing with a stack trace similar to the following:

{noformat}
(gdb) bt
#0  0x00007fb85fdf11f7 in raise () from ./debug-stuff/lib64/libc.so.6
#1  0x00007fb85fdf28e8 in abort () from ./debug-stuff/lib64/libc.so.6
#2  0x00007fb862106f35 in os::abort(bool) () from 
./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
#3  0x00007fb8622aaf33 in VMError::report_and_die() () from 
./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
#4  0x00007fb86210d22f in JVM_handle_linux_signal () from 
./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
#5  0x00007fb862103253 in signalHandler(int, siginfo*, void*) () from 
./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
#6  <signal handler called>
#7  0x00007fb85ff08706 in __memcpy_ssse3_back () from 
./debug-stuff/lib64/libc.so.6
#8  0x00007fb840700d73 in 
impala::UnionNode::MaterializeBatch(impala::RowBatch*, unsigned char**) [clone 
.588] ()
#9  0x0000000001001806 in impala::UnionNode::GetNextMaterialized 
(this=this@entry=0x8280000, state=state@entry=0x848ed00, 
row_batch=row_batch@entry=0xcef9950)
    at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/union-node.cc:228
#10 0x0000000001001b5c in impala::UnionNode::GetNext (this=0x8280000, 
state=0x848ed00, row_batch=0xcef9950, eos=0x7fb7fe9a987e)
    at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/union-node.cc:294
#11 0x0000000000b724d2 in impala::FragmentInstanceState::ExecInternal 
(this=this@entry=0x4c030c0)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/fragment-instance-state.cc:270
#12 0x0000000000b74e42 in impala::FragmentInstanceState::Exec 
(this=this@entry=0x4c030c0) at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/fragment-instance-state.cc:89
#13 0x0000000000b64488 in impala::QueryState::ExecFInstance (this=0x8559200, 
fis=0x4c030c0) at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/query-state.cc:382
#14 0x0000000000d13613 in boost::function0<void>::operator() 
(this=0x7fb7fe9a9c60)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
#15 impala::Thread::SuperviseThread(std::string const&, std::string const&, 
boost::function<void ()>, impala::Promise<long>*) (name=..., category=..., 
functor=..., 
    thread_started=0x7fb7f999f0f0) at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/util/thread.cc:352
#16 0x0000000000d13d54 in 
boost::_bi::list4<boost::_bi::value<std::basic_string<char, 
std::char_traits<char>, std::allocator<char> > >, 
boost::_bi::value<std::basic_string<char, std::char_traits<char>, 
std::allocator<char> > >, boost::_bi::value<boost::function<void()> >, 
boost::_bi::value<impala::Promise<long int>*> >::operator()<void (*)(const 
std::basic_string<char>&, const std::basic_string<char>&, 
boost::function<void()>, impala::Promise<long int>*), boost::_bi::list0> (
    f=@0x808bfb8: 0xd13460 <impala::Thread::SuperviseThread(std::string const&, 
std::string const&, boost::function<void ()>, impala::Promise<long>*)>, 
a=<synthetic pointer>, 
    this=0x808bfc0) at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/bind/bind.hpp:457
#17 boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, 
boost::function<void ()>, impala::Promise<long>*), 
boost::_bi::list4<boost::_bi::value<std::string>, 
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, 
boost::_bi::value<impala::Promise<long>*> > >::operator()() (this=0x808bfb8)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/bind/bind_template.hpp:20
#18 boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string 
const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), 
boost::_bi::list4<boost::_bi::value<std::string>, 
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, 
boost::_bi::value<impala::Promise<long>*> > > >::run() (this=0x808be00)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/thread/detail/thread.hpp:116
#19 0x000000000128e8ea in thread_proxy ()
#20 0x00007fb860186e25 in start_thread () from 
./debug-stuff/lib64/libpthread.so.0
#21 0x00007fb85feb434d in clone () from ./debug-stuff/lib64/libc.so.6

{noformat}

The exact location or reason of the crash varies, i.e., sometimes we crash 
while accessing the source address of memcpy, other times we crash on accessing 
the destination address. In this particular instance, we see:


{noformat}
   0x00007fb85ff086e4 <+6676>:  add    %rdx,%rsi
   0x00007fb85ff086e7 <+6679>:  add    %rdx,%rdi
   0x00007fb85ff086ea <+6682>:  lea    0x375df(%rip),%r11        # 
0x7fb85ff3fcd0
   0x00007fb85ff086f1 <+6689>:  movslq (%r11,%rdx,4),%rdx
   0x00007fb85ff086f5 <+6693>:  lea    (%r11,%rdx,1),%rdx
   0x00007fb85ff086f9 <+6697>:  jmpq   *%rdx
   0x00007fb85ff086fb <+6699>:  ud2    
   0x00007fb85ff086fd <+6701>:  nopl   (%rax)
   0x00007fb85ff08700 <+6704>:  add    %rdx,%rsi
   0x00007fb85ff08703 <+6707>:  add    %rdx,%rdi
=> 0x00007fb85ff08706 <+6710>:  movdqu -0x10(%rsi),%xmm0
   0x00007fb85ff0870b <+6715>:  lea    -0x10(%rdi),%r8
   0x00007fb85ff0870f <+6719>:  mov    %rdi,%r9
   0x00007fb85ff08712 <+6722>:  and    $0xfffffffffffffff0,%rdi
   0x00007fb85ff08716 <+6726>:  sub    %rdi,%r9
   0x00007fb85ff08719 <+6729>:  sub    %r9,%rsi
   0x00007fb85ff0871c <+6732>:  sub    %r9,%rdx
   0x00007fb85ff0871f <+6735>:  mov    0x26fb0a(%rip),%rcx        # 
0x7fb860178230 <__x86_64_shared_cache_size_half>
   0x00007fb85ff08726 <+6742>:  cmp    %rcx,%rdx

{noformat}

which looks like the source address is whacked.

Setting DISABLE_CODEGEN=TRUE for the statement avoids the crash, which means 
that the generated code is somehow using invalid pointers.

The crash has reproduced on RHEL/CENTOS 6 and 7.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to