László Pintér created HIVE-26318: ------------------------------------ Summary: Select on migrated iceberg table fails with NPE Key: HIVE-26318 URL: https://issues.apache.org/jira/browse/HIVE-26318 Project: Hive Issue Type: Bug Reporter: László Pintér Assignee: László Pintér
Enable vectorization: {code:sql} set hive.vectorized.execution.enabled=true; {code} Create a hive table with the following schema: {code:sql} CREATE EXTERNAL TABLE tbl_complex ( a int, arrayofprimitives array<string>, arrayofarrays array<array<string>>, arrayofmaps array<map<string, string>>, arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, mapofprimitives map<string, string>, mapofarrays map<string, array<string>>, mapofmaps map<string, map<string, string>>, mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, structofprimitives struct<something:string, somewhere:string>, structofarrays struct<names:array<string>, birthdays:array<string>>, structofmaps struct<map1:map<string, string>, map2:map<string, string>> ) STORED AS PARQUET" {code} Insert some data: {code:sql} INSERT INTO tbl_complex VALUES ( 1, array('a','b','c'), array(array('a'), array('b', 'c')), array(map('a','b'), map('e','f')), array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), map('a', 'b'), map('a', array('b','c')), map('a', map('b','c')), map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), named_struct('something', 'a', 'somewhere', 'b'), named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) ) {code} Migrate the table to iceberg: {code:sql} ALTER TABLE tbl_complex SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'); {code} Run a simple query: {code:sql} SELECT * FROM tbl_complex ORDER BY a; {code} It will fail with: {code:txt} TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1655110825475_0001_3_00_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:200) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:139) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:105) at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:164) at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:706) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:665) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292) ... 16 more Caused by: java.io.IOException: java.lang.NullPointerException at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:458) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:197) ... 27 more Caused by: java.lang.NullPointerException at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:101) at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:38) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:52) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:83) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitFields(TypeWithSchemaVisitor.java:169) at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:47) at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.parquetRecordReader(HiveVectorizedReader.java:203) at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.reader(HiveVectorizedReader.java:138) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.iceberg.common.DynMethods$UnboundMethod.invokeChecked(DynMethods.java:65) at org.apache.iceberg.common.DynMethods$UnboundMethod.invoke(DynMethods.java:77) at org.apache.iceberg.common.DynMethods$StaticMethod.invoke(DynMethods.java:196) at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.newParquetIterable(IcebergInputFormat.java:417) at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.openTask(IcebergInputFormat.java:336) at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.open(IcebergInputFormat.java:353) at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextTask(IcebergInputFormat.java:263) at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.initialize(IcebergInputFormat.java:259) at org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader.<init>(AbstractMapredIcebergRecordReader.java:40) at org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.<init>(HiveIcebergVectorizedRecordReader.java:41) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.iceberg.common.DynConstructors$Ctor.newInstanceChecked(DynConstructors.java:60) at org.apache.iceberg.common.DynConstructors$Ctor.newInstance(DynConstructors.java:73) at org.apache.iceberg.mr.hive.HiveIcebergInputFormat.getRecordReader(HiveIcebergInputFormat.java:163) at org.apache.hadoop.hive.ql.io.RecordReaderWrapper.create(RecordReaderWrapper.java:72) at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:449) ... 28 more {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)