Tim Armstrong created IMPALA-6370:
-------------------------------------

             Summary: Crash when querying nested data in partitioned Parquet 
table 
                 Key: IMPALA-6370
                 URL: https://issues.apache.org/jira/browse/IMPALA-6370
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 2.11.0
            Reporter: Tim Armstrong
            Assignee: Tim Armstrong
            Priority: Critical


Header of hs_err file:
{noformat}
#
# A fatal error has been detected by the Java Runtime Environment:
#
#  SIGSEGV (0xb) at pc=0x00007f908452d782, pid=1223, tid=0x00007f8fc0d1e700
#
# JRE version: Java(TM) SE Runtime Environment (8.0_111-b14) (build 
1.8.0_111-b14)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.111-b14 mixed mode linux-amd64 
compressed oops)
# Problematic frame:
# C  [libc.so.6+0x89782]  memcpy+0xd2
#
# Core dump written. Default location: /data/1/cores/core or core.1223
#
# If you would like to submit a bug report, please visit:
#   http://bugreport.java.com/bugreport/crash.jsp
#

---------------  T H R E A D  ---------------

Current thread is native thread

siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: 
0x0000000000000000

Registers:
RAX=0x00000000276a9000, RBX=0x000000001ae72f00, RCX=0x0000000000000019, 
RDX=0x0000000000000341
RSP=0x00007f8fc0d1d058, RBP=0x00007f8fc0d1d150, RSI=0x0000000000000000, 
RDI=0x00000000276a9000
R8 =0x00007f8fc0d1d108, R9 =0x00007f8fc0d1d110, R10=0x0000000000000000, 
R11=0x00007f9084527e5d
R12=0x000000001b06e418, R13=0x000000002485d2e8, R14=0x0000000000000001, 
R15=0x000000002485d2a0
RIP=0x00007f908452d782, EFLAGS=0x0000000000010202, CSGSFS=0x0000000000000033, 
ERR=0x0000000000000004
  TRAPNO=0x000000000000000e

{noformat}

Backtrace from core:
{noformat}
(gdb) bt
#0  0x00007f90844d6495 in raise () from sysroot/lib64/libc.so.6
#1  0x00007f90844d7c75 in abort () from sysroot/lib64/libc.so.6
#2  0x00007f90867419d5 in os::abort(bool) ()
   from sysroot/usr/java/jdk1.8.0_111/jre/lib/amd64/server/libjvm.so
#3  0x00007f90868e4033 in VMError::report_and_die() ()
   from sysroot/usr/java/jdk1.8.0_111/jre/lib/amd64/server/libjvm.so
#4  0x00007f90867471ff in JVM_handle_linux_signal ()
   from sysroot/usr/java/jdk1.8.0_111/jre/lib/amd64/server/libjvm.so
#5  0x00007f908673d993 in signalHandler(int, siginfo*, void*) ()
   from sysroot/usr/java/jdk1.8.0_111/jre/lib/amd64/server/libjvm.so
#6  <signal handler called>
#7  0x00007f908452d782 in memcpy () from sysroot/lib64/libc.so.6
#8  0x0000000000dbb61b in InitTupleFromTemplate (this=<optimized out>, 
    tuple_byte_size=<optimized out>, tuple=<optimized out>, template_tuple=0x0)
    at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/hdfs-scanner.h:446
#9  InitTuple (tuple=<optimized out>, template_tuple=0x0, 
    desc=<optimized out>, this=0x1ae72f00)
    at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/hdfs-scanner.h:432
#10 impala::HdfsParquetScanner::AssembleCollection (this=<optimized out>, 
    column_readers=..., new_collection_rep_level=<optimized out>, 
    coll_value_builder=coll_value_builder@entry=0x7f8fc0d1d160)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/hdfs-parquet-scanner.cc:1249
#11 0x0000000000de3ba1 in ReadSlot (pool=0x1b06e418, tuple=<optimized out>, 
    this=0x1ae72f00)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/parquet-column-readers.cc:1280
#12 impala::CollectionColumnReader::ReadValue (this=0x1ae72f00, 
    pool=0x1b06e418, tuple=<optimized out>)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/parquet-column-readers.cc:1256
#13 0x0000000040bcc682 in ?? ()
#14 0x0000000027b91c20 in ?? ()
#15 0x000000001b06e418 in ?? ()
#16 0x000000003f565000 in ?? ()
#17 0x0000000000000044 in ?? ()
#18 0x00007f8fc0d1d2e0 in ?? ()
#19 0x000000003f565000 in ?? ()
#20 0x0000000000000003 in ?? ()
#21 0x0000000000000400 in ?? ()
#22 0x000000001b06e3d0 in ?? ()
#23 0x000000001b06e418 in ?? ()
#24 0x0000000000de36b1 in impala::ParquetColumnReader::ReadValueBatch (
    this=0x2485d2a0, pool=0x2485d2e8, max_values=1086113410, tuple_size=1, 
    tuple_mem=0x40bcc000 "", num_values=0x1b06e418)
    at 
/usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/parquet-column-readers.cc:802
#25 0x0000000002de8c50 in vtable for parquet::Statistics ()
#26 0x0000000040bcc000 in ?? ()
#27 0x000000001ae72f00 in ?? ()
#28 0x000000001ae72f00 in ?? ()
#29 0x00000000ffffffff in ?? ()
#30 0x0000000000000000 in ?? ()
{noformat}

I have a repro:
{code}
impala-shell.sh -q "
DROP TABLE IF EXISTS complextypes_partitioned;
CREATE TABLE complextypes_partitioned (
  id BIGINT,
  int_array ARRAY<INT>,
  int_array_array ARRAY<ARRAY<INT>>,
  int_map MAP<STRING,INT>,
  int_map_array ARRAY<MAP<STRING,INT>>,
  nested_struct 
STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
)
PARTITIONED BY (
  part int
)
STORED AS PARQUET;
alter table complextypes_partitioned add partition (part=1);
"

hdfs dfs -cp "/test-warehouse/complextypestbl_parquet/*" 
/test-warehouse/complextypes_partitioned/part=1

impala-shell.sh -q "
REFRESH complextypes_partitioned;
select id, part, a.item from complextypes_partitioned t, t.int_array a
"
{code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to