[ https://issues.apache.org/jira/browse/HIVE-18553?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16357248#comment-16357248 ]
Hive QA commented on HIVE-18553: -------------------------------- Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12909816/HIVE-18553.6.patch {color:green}SUCCESS:{color} +1 due to 5 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 118 failed/errored test(s), 12954 tests executed *Failed tests:* {noformat} TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=107) [join_cond_pushdown_unqual4.q,union_remove_7.q,join13.q,join_vc.q,groupby_cube1.q,parquet_vectorization_2.q,bucket_map_join_spark2.q,sample3.q,smb_mapjoin_19.q,union23.q,union.q,union31.q,cbo_udf_udaf.q,ptf_decimal.q,bucketmapjoin2.q] TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=116) [skewjoinopt3.q,skewjoinopt19.q,timestamp_comparison.q,join_merge_multi_expressions.q,union5.q,insert_into1.q,vectorization_4.q,parquet_vectorization_10.q,vector_left_outer_join.q,decimal_1_1.q,semijoin.q,skewjoinopt9.q,smb_mapjoin_3.q,stats10.q,rcfile_bigdata.q] TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=144) [groupby2_noskew_multi_distinct.q,load_dyn_part12.q,scriptfile1.q,join15.q,auto_join17.q,subquery_multiinsert.q,join_hive_626.q,tez_join_tests.q,parquet_vectorization_16.q,auto_join21.q,join_view.q,join_cond_pushdown_4.q,vectorization_0.q,union_null.q,auto_join3.q] org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_queries] (batchId=240) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_complex_types_vectorization] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_map_type_vectorization] (batchId=85) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_types_vectorization] (batchId=14) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_0] (batchId=16) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_10] (batchId=23) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_11] (batchId=38) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_12] (batchId=23) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_13] (batchId=52) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_14] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_15] (batchId=87) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_16] (batchId=82) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_17] (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_2] (batchId=3) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_5] (batchId=71) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_6] (batchId=42) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_7] (batchId=85) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_8] (batchId=14) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_9] (batchId=30) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_not] (batchId=79) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part_project] (batchId=36) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part_varchar] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ppd_join5] (batchId=36) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[row__id] (batchId=79) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[schema_evol_par_vec_table_non_dictionary_encoding] (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorized_parquet_types] (batchId=67) org.apache.hadoop.hive.cli.TestEncryptedHDFSCliDriver.testCliDriver[encryption_move_tbl] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[llap_smb] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_complex_types_vectorization] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_map_type_vectorization] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_types_vectorization] (batchId=148) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_orig_table_use_metadata] (batchId=167) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid] (batchId=171) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid_fast] (batchId=162) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[resourceplan] (batchId=164) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sysdb] (batchId=161) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_partitioned_date_time] (batchId=170) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorization_input_format_excludes] (batchId=163) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_parquet] (batchId=165) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_0] (batchId=113) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_11] (batchId=123) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_12] (batchId=117) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_13] (batchId=130) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_14] (batchId=123) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_15] (batchId=146) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_17] (batchId=119) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_5] (batchId=138) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_6] (batchId=125) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_7] (batchId=145) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_8] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_9] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_decimal_date] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_limit] (batchId=117) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_not] (batchId=142) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part] (batchId=139) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part_project] (batchId=122) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part_varchar] (batchId=139) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[ppd_join5] (batchId=122) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[sample1] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[skewjoin] (batchId=117) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_16] (batchId=125) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_18] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_22] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats15] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats1] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats6] (batchId=119) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats_only_null] (batchId=119) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_exists] (batchId=125) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_nested_subquery] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_notin] (batchId=138) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_views] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[table_access_keys_stats] (batchId=139) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[temp_table] (batchId=146) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[timestamp_3] (batchId=123) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[timestamp_udf] (batchId=142) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[transform1] (batchId=123) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union16] (batchId=146) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union17] (batchId=139) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union22] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union30] (batchId=142) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union32] (batchId=119) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union6] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union9] (batchId=125) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_10] (batchId=117) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_13] (batchId=146) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_20] (batchId=145) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_22] (batchId=142) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_6_subq] (batchId=123) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_9] (batchId=139) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_view] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vector_count_distinct] (batchId=117) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vector_orderby_5] (batchId=125) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_14] (batchId=112) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_3] (batchId=142) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_input_format_excludes] (batchId=127) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorized_timestamp_funcs] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[windowing] (batchId=130) org.apache.hadoop.hive.cli.control.TestDanglingQOuts.checkDanglingQOut (batchId=221) org.apache.hadoop.hive.metastore.client.TestTablesGetExists.testGetAllTablesCaseInsensitive[Embedded] (batchId=206) org.apache.hadoop.hive.ql.exec.TestOperators.testNoConditionalTaskSizeForLlap (batchId=282) org.apache.hadoop.hive.ql.io.TestDruidRecordWriter.testWrite (batchId=256) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedDictionaryEncodingColumnReader.testBinaryRead (batchId=269) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadEqualOneBatch (batchId=270) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadLessOneBatch (batchId=270) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadMoreOneBatch (batchId=270) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testUnrepeatedStringWithoutNullListRead (batchId=270) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testVectorizedRowBatchSizeChange (batchId=270) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadEqualOneBatch (batchId=271) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadLessOneBatch (batchId=271) org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadMoreOneBatch (batchId=271) org.apache.hive.beeline.cli.TestHiveCli.testNoErrorDB (batchId=188) org.apache.hive.jdbc.TestSSL.testConnectionMismatch (batchId=234) org.apache.hive.jdbc.TestSSL.testConnectionWrongCertCN (batchId=234) org.apache.hive.jdbc.TestSSL.testMetastoreConnectionWrongCertCN (batchId=234) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/9096/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/9096/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-9096/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.YetusPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 118 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12909816 - PreCommit-HIVE-Build > VectorizedParquetReader fails after adding a new column to table > ---------------------------------------------------------------- > > Key: HIVE-18553 > URL: https://issues.apache.org/jira/browse/HIVE-18553 > Project: Hive > Issue Type: Sub-task > Affects Versions: 3.0.0, 2.4.0, 2.3.2 > Reporter: Vihang Karajgaonkar > Assignee: Ferdinand Xu > Priority: Major > Attachments: HIVE-18553.2.patch, HIVE-18553.3.patch, > HIVE-18553.4.patch, HIVE-18553.5.patch, HIVE-18553.6.patch, HIVE-18553.patch, > test_result_based_on_HIVE-18553.xlsx > > > VectorizedParquetReader throws an exception when trying to reading from a > parquet table on which new columns are added. Steps to reproduce below: > {code} > 0: jdbc:hive2://localhost:10000/default> desc test_p; > +-----------+------------+----------+ > | col_name | data_type | comment | > +-----------+------------+----------+ > | t1 | tinyint | | > | t2 | tinyint | | > | i1 | int | | > | i2 | int | | > +-----------+------------+----------+ > 0: jdbc:hive2://localhost:10000/default> set hive.fetch.task.conversion=none; > 0: jdbc:hive2://localhost:10000/default> set > hive.vectorized.execution.enabled=true; > 0: jdbc:hive2://localhost:10000/default> alter table test_p add columns (ts > timestamp); > 0: jdbc:hive2://localhost:10000/default> select * from test_p; > Error: Error while processing statement: FAILED: Execution Error, return code > 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask (state=08S01,code=2) > {code} > Following exception is seen in the logs > {code} > Caused by: java.lang.IllegalArgumentException: [ts] BINARY is not in the > store: [[i1] INT32, [i2] INT32, [t1] INT32, [t2] INT32] 3 > at > org.apache.parquet.hadoop.ColumnChunkPageReadStore.getPageReader(ColumnChunkPageReadStore.java:160) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.buildVectorizedParquetReader(VectorizedParquetRecordReader.java:479) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:432) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:393) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:345) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:88) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:360) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:167) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:52) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:116) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.doNextWithExceptionHandler(HadoopShimsSecure.java:229) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.next(HadoopShimsSecure.java:142) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.mapred.MapTask$TrackedRecordReader.moveToNext(MapTask.java:199) > ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at > org.apache.hadoop.mapred.MapTask$TrackedRecordReader.next(MapTask.java:185) > ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:52) > ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459) > ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) > ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at > org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) > ~[hadoop-mapreduce-client-common-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?] > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[?:1.8.0_121] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ~[?:1.8.0_121] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > ~[?:1.8.0_121] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > ~[?:1.8.0_121] > at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121] > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)