[
https://issues.apache.org/jira/browse/HIVE-18553?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16357248#comment-16357248
]
Hive QA commented on HIVE-18553:
--------------------------------
Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12909816/HIVE-18553.6.patch
{color:green}SUCCESS:{color} +1 due to 5 test(s) being added or modified.
{color:red}ERROR:{color} -1 due to 118 failed/errored test(s), 12954 tests
executed
*Failed tests:*
{noformat}
TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out)
(batchId=107)
[join_cond_pushdown_unqual4.q,union_remove_7.q,join13.q,join_vc.q,groupby_cube1.q,parquet_vectorization_2.q,bucket_map_join_spark2.q,sample3.q,smb_mapjoin_19.q,union23.q,union.q,union31.q,cbo_udf_udaf.q,ptf_decimal.q,bucketmapjoin2.q]
TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out)
(batchId=116)
[skewjoinopt3.q,skewjoinopt19.q,timestamp_comparison.q,join_merge_multi_expressions.q,union5.q,insert_into1.q,vectorization_4.q,parquet_vectorization_10.q,vector_left_outer_join.q,decimal_1_1.q,semijoin.q,skewjoinopt9.q,smb_mapjoin_3.q,stats10.q,rcfile_bigdata.q]
TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out)
(batchId=144)
[groupby2_noskew_multi_distinct.q,load_dyn_part12.q,scriptfile1.q,join15.q,auto_join17.q,subquery_multiinsert.q,join_hive_626.q,tez_join_tests.q,parquet_vectorization_16.q,auto_join21.q,join_view.q,join_cond_pushdown_4.q,vectorization_0.q,union_null.q,auto_join3.q]
org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_queries]
(batchId=240)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_complex_types_vectorization]
(batchId=73)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_map_type_vectorization]
(batchId=85)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_types_vectorization]
(batchId=14)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_0]
(batchId=16)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_10]
(batchId=23)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_11]
(batchId=38)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_12]
(batchId=23)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_13]
(batchId=52)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_14]
(batchId=39)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_15]
(batchId=87)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_16]
(batchId=82)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_17]
(batchId=29)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_2]
(batchId=3)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_5]
(batchId=71)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_6]
(batchId=42)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_7]
(batchId=85)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_8]
(batchId=14)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_9]
(batchId=30)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_not]
(batchId=79)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part]
(batchId=73)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part_project]
(batchId=36)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[parquet_vectorization_part_varchar]
(batchId=73)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ppd_join5] (batchId=36)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[row__id] (batchId=79)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[schema_evol_par_vec_table_non_dictionary_encoding]
(batchId=50)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorized_parquet_types]
(batchId=67)
org.apache.hadoop.hive.cli.TestEncryptedHDFSCliDriver.testCliDriver[encryption_move_tbl]
(batchId=175)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[llap_smb]
(batchId=152)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_complex_types_vectorization]
(batchId=151)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_map_type_vectorization]
(batchId=152)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_types_vectorization]
(batchId=148)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_orig_table_use_metadata]
(batchId=167)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid]
(batchId=171)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid_fast]
(batchId=162)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[resourceplan]
(batchId=164)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sysdb]
(batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_partitioned_date_time]
(batchId=170)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorization_input_format_excludes]
(batchId=163)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_parquet]
(batchId=165)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_0]
(batchId=113)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_11]
(batchId=123)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_12]
(batchId=117)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_13]
(batchId=130)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_14]
(batchId=123)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_15]
(batchId=146)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_17]
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_5]
(batchId=138)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_6]
(batchId=125)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_7]
(batchId=145)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_8]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_9]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_decimal_date]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_limit]
(batchId=117)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_not]
(batchId=142)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part]
(batchId=139)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part_project]
(batchId=122)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[parquet_vectorization_part_varchar]
(batchId=139)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[ppd_join5]
(batchId=122)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[sample1]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[skewjoin]
(batchId=117)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_16]
(batchId=125)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_18]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[smb_mapjoin_22]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats15]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats1]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats6]
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats_only_null]
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_exists]
(batchId=125)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_nested_subquery]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_notin]
(batchId=138)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_views]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[table_access_keys_stats]
(batchId=139)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[temp_table]
(batchId=146)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[timestamp_3]
(batchId=123)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[timestamp_udf]
(batchId=142)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[transform1]
(batchId=123)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union16]
(batchId=146)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union17]
(batchId=139)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union22]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union30]
(batchId=142)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union32]
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union6]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union9]
(batchId=125)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_10]
(batchId=117)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_13]
(batchId=146)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_20]
(batchId=145)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_22]
(batchId=142)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_6_subq]
(batchId=123)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_9]
(batchId=139)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_view]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vector_count_distinct]
(batchId=117)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vector_orderby_5]
(batchId=125)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_14]
(batchId=112)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_3]
(batchId=142)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorization_input_format_excludes]
(batchId=127)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vectorized_timestamp_funcs]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[windowing]
(batchId=130)
org.apache.hadoop.hive.cli.control.TestDanglingQOuts.checkDanglingQOut
(batchId=221)
org.apache.hadoop.hive.metastore.client.TestTablesGetExists.testGetAllTablesCaseInsensitive[Embedded]
(batchId=206)
org.apache.hadoop.hive.ql.exec.TestOperators.testNoConditionalTaskSizeForLlap
(batchId=282)
org.apache.hadoop.hive.ql.io.TestDruidRecordWriter.testWrite (batchId=256)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedDictionaryEncodingColumnReader.testBinaryRead
(batchId=269)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadEqualOneBatch
(batchId=270)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadLessOneBatch
(batchId=270)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testListReadMoreOneBatch
(batchId=270)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testUnrepeatedStringWithoutNullListRead
(batchId=270)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedListColumnReader.testVectorizedRowBatchSizeChange
(batchId=270)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadEqualOneBatch
(batchId=271)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadLessOneBatch
(batchId=271)
org.apache.hadoop.hive.ql.io.parquet.TestVectorizedMapColumnReader.testMapReadMoreOneBatch
(batchId=271)
org.apache.hive.beeline.cli.TestHiveCli.testNoErrorDB (batchId=188)
org.apache.hive.jdbc.TestSSL.testConnectionMismatch (batchId=234)
org.apache.hive.jdbc.TestSSL.testConnectionWrongCertCN (batchId=234)
org.apache.hive.jdbc.TestSSL.testMetastoreConnectionWrongCertCN (batchId=234)
{noformat}
Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/9096/testReport
Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/9096/console
Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-9096/
Messages:
{noformat}
Executing org.apache.hive.ptest.execution.TestCheckPhase
Executing org.apache.hive.ptest.execution.PrepPhase
Executing org.apache.hive.ptest.execution.YetusPhase
Executing org.apache.hive.ptest.execution.ExecutionPhase
Executing org.apache.hive.ptest.execution.ReportingPhase
Tests exited with: TestsFailedException: 118 tests failed
{noformat}
This message is automatically generated.
ATTACHMENT ID: 12909816 - PreCommit-HIVE-Build
> VectorizedParquetReader fails after adding a new column to table
> ----------------------------------------------------------------
>
> Key: HIVE-18553
> URL: https://issues.apache.org/jira/browse/HIVE-18553
> Project: Hive
> Issue Type: Sub-task
> Affects Versions: 3.0.0, 2.4.0, 2.3.2
> Reporter: Vihang Karajgaonkar
> Assignee: Ferdinand Xu
> Priority: Major
> Attachments: HIVE-18553.2.patch, HIVE-18553.3.patch,
> HIVE-18553.4.patch, HIVE-18553.5.patch, HIVE-18553.6.patch, HIVE-18553.patch,
> test_result_based_on_HIVE-18553.xlsx
>
>
> VectorizedParquetReader throws an exception when trying to reading from a
> parquet table on which new columns are added. Steps to reproduce below:
> {code}
> 0: jdbc:hive2://localhost:10000/default> desc test_p;
> +-----------+------------+----------+
> | col_name | data_type | comment |
> +-----------+------------+----------+
> | t1 | tinyint | |
> | t2 | tinyint | |
> | i1 | int | |
> | i2 | int | |
> +-----------+------------+----------+
> 0: jdbc:hive2://localhost:10000/default> set hive.fetch.task.conversion=none;
> 0: jdbc:hive2://localhost:10000/default> set
> hive.vectorized.execution.enabled=true;
> 0: jdbc:hive2://localhost:10000/default> alter table test_p add columns (ts
> timestamp);
> 0: jdbc:hive2://localhost:10000/default> select * from test_p;
> Error: Error while processing statement: FAILED: Execution Error, return code
> 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask (state=08S01,code=2)
> {code}
> Following exception is seen in the logs
> {code}
> Caused by: java.lang.IllegalArgumentException: [ts] BINARY is not in the
> store: [[i1] INT32, [i2] INT32, [t1] INT32, [t2] INT32] 3
> at
> org.apache.parquet.hadoop.ColumnChunkPageReadStore.getPageReader(ColumnChunkPageReadStore.java:160)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.buildVectorizedParquetReader(VectorizedParquetRecordReader.java:479)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:432)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:393)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:345)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:88)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:360)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:167)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:52)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:116)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.doNextWithExceptionHandler(HadoopShimsSecure.java:229)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.next(HadoopShimsSecure.java:142)
> ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT]
> at
> org.apache.hadoop.mapred.MapTask$TrackedRecordReader.moveToNext(MapTask.java:199)
> ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at
> org.apache.hadoop.mapred.MapTask$TrackedRecordReader.next(MapTask.java:185)
> ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:52)
> ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459)
> ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
> ~[hadoop-mapreduce-client-core-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271)
> ~[hadoop-mapreduce-client-common-3.0.0-alpha3-cdh6.x-SNAPSHOT.jar:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_121]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[?:1.8.0_121]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> ~[?:1.8.0_121]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> ~[?:1.8.0_121]
> at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121]
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)