László Pintér created HIVE-26318:
------------------------------------

             Summary: Select on migrated iceberg table fails with NPE
                 Key: HIVE-26318
                 URL: https://issues.apache.org/jira/browse/HIVE-26318
             Project: Hive
          Issue Type: Bug
            Reporter: László Pintér
            Assignee: László Pintér


Enable vectorization:

{code:sql}
set hive.vectorized.execution.enabled=true;
{code}

Create a hive table with the following schema:
{code:sql}
CREATE EXTERNAL TABLE tbl_complex (
a int, 
arrayofprimitives array<string>, 
arrayofarrays array<array<string>>,
arrayofmaps array<map<string, string>>,
arrayofstructs array<struct<something:string, someone:string, 
somewhere:string>>,
mapofprimitives map<string, string>,
mapofarrays map<string, array<string>>,
mapofmaps map<string, map<string, string>>,
mapofstructs map<string, struct<something:string, someone:string, 
somewhere:string>>,
structofprimitives struct<something:string, somewhere:string>, 
structofarrays struct<names:array<string>, birthdays:array<string>>, 
structofmaps struct<map1:map<string, string>, map2:map<string, string>>
) STORED AS PARQUET" {code}

Insert some data:
{code:sql}
INSERT INTO tbl_complex VALUES (
        1, 
        array('a','b','c'), 
        array(array('a'), array('b', 'c')), 
        array(map('a','b'), map('e','f')), 
        array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), 
        named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), 
        map('a', 'b'), 
        map('a', array('b','c')), 
        map('a', map('b','c')), 
        map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 
'd')), 
        named_struct('something', 'a', 'somewhere', 'b'), 
        named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 
'e')), 
        named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) 
 )
{code}

Migrate the table to iceberg:

{code:sql}
ALTER TABLE tbl_complex SET TBLPROPERTIES 
('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
{code}

Run a simple query: 

{code:sql}
SELECT * FROM tbl_complex ORDER BY a;
{code}

It will fail with:

{code:txt}
TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : 
attempt_1655110825475_0001_3_00_000000_1:java.lang.RuntimeException: 
java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
        at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: java.io.IOException: 
java.lang.NullPointerException
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:200)
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:139)
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:105)
        at 
org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:164)
        at 
org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
        at 
org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:706)
        at 
org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:665)
        at 
org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
        at 
org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292)
        ... 16 more
Caused by: java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
        at 
org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:458)
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:197)
        ... 27 more
Caused by: java.lang.NullPointerException
        at 
org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:101)
        at 
org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:38)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:52)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:83)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitFields(TypeWithSchemaVisitor.java:169)
        at 
org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:47)
        at 
org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.parquetRecordReader(HiveVectorizedReader.java:203)
        at 
org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.reader(HiveVectorizedReader.java:138)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.iceberg.common.DynMethods$UnboundMethod.invokeChecked(DynMethods.java:65)
        at 
org.apache.iceberg.common.DynMethods$UnboundMethod.invoke(DynMethods.java:77)
        at 
org.apache.iceberg.common.DynMethods$StaticMethod.invoke(DynMethods.java:196)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.newParquetIterable(IcebergInputFormat.java:417)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.openTask(IcebergInputFormat.java:336)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.open(IcebergInputFormat.java:353)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextTask(IcebergInputFormat.java:263)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.initialize(IcebergInputFormat.java:259)
        at 
org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader.<init>(AbstractMapredIcebergRecordReader.java:40)
        at 
org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.<init>(HiveIcebergVectorizedRecordReader.java:41)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at 
org.apache.iceberg.common.DynConstructors$Ctor.newInstanceChecked(DynConstructors.java:60)
        at 
org.apache.iceberg.common.DynConstructors$Ctor.newInstance(DynConstructors.java:73)
        at 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat.getRecordReader(HiveIcebergInputFormat.java:163)
        at 
org.apache.hadoop.hive.ql.io.RecordReaderWrapper.create(RecordReaderWrapper.java:72)
        at 
org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:449)
        ... 28 more
{code}







--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to