This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 658a9bcccc8 HIVE-28224: Upgrade Orc version in Hive to 1.9.4 (#5218). (Dmitriy Fingerman, reviewed by Ayush Saxena) 658a9bcccc8 is described below commit 658a9bcccc865dc2bb33e85e3af8edd0ffcb5beb Author: Dmitriy Fingerman <dmitriy.finger...@gmail.com> AuthorDate: Thu Aug 8 10:35:33 2024 -0400 HIVE-28224: Upgrade Orc version in Hive to 1.9.4 (#5218). (Dmitriy Fingerman, reviewed by Ayush Saxena) --- .../ql/txn/compactor/TestCrudCompactorOnTez.java | 42 +++++++++++----------- pom.xml | 2 +- .../io/orc/encoded/EncodedTreeReaderFactory.java | 1 + .../acid_bloom_filter_orc_file_dump.q | 1 + .../materialized_view_create_rewrite_10.q | 2 +- .../queries/clientpositive/orc_llap_nonvector.q | 1 + ql/src/test/queries/clientpositive/orc_merge12.q | 1 + .../llap/acid_bloom_filter_orc_file_dump.q.out | 4 +-- .../llap/materialized_view_create_rewrite_10.q.out | 8 ++--- .../clientpositive/llap/orc_llap_nonvector.q.out | 24 ++++++------- .../results/clientpositive/tez/orc_merge12.q.out | 2 +- standalone-metastore/pom.xml | 2 +- 12 files changed, 47 insertions(+), 43 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java index a6b327ae174..148a7cb3225 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java @@ -178,22 +178,22 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13", }, { - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4", "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4", }, { - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3", "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3", "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3", }, { - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2", }, }; verifyRebalance(testDataProvider, tableName, null, expectedBuckets, @@ -234,22 +234,22 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { }, { "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":5}\t12\t12", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4", "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4", + "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4", }, { - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3", "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3", + "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3", "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3", }, { - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2", + "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2", }, }; verifyRebalance(testDataProvider, tableName, null, expectedBuckets, @@ -525,6 +525,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4", + "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2", + "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t5\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12", "{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14", @@ -533,9 +535,7 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17", }, { - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t5\t2", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t5\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t2\t4", + "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t4", }, { "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t3", diff --git a/pom.xml b/pom.xml index da80147e90c..852fb234b78 100644 --- a/pom.xml +++ b/pom.xml @@ -182,7 +182,7 @@ <postgres.version>42.7.3</postgres.version> <oracle.version>21.3.0.0</oracle.version> <opencsv.version>5.9</opencsv.version> - <orc.version>1.8.5</orc.version> + <orc.version>1.9.4</orc.version> <mockito-core.version>3.4.4</mockito-core.version> <mockito-inline.version>4.11.0</mockito-inline.version> <mina.version>2.0.0-M5</mina.version> diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index 46e5a3c3be8..869528fc889 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -1078,6 +1078,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { FilterContext filterContext, ReadPhase readPhase) throws IOException { if (vectors == null) { super.nextVector(previousVector, isNull, batchSize, filterContext, readPhase); + previousVector.isRepeating = false; return; } vectors.get(vectorIndex++).shallowCopyTo(previousVector); diff --git a/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q index d5bb1c65b7f..37a80098a44 100644 --- a/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q +++ b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q @@ -1,4 +1,5 @@ --! qt:replace:/(File Version:)(.+)/$1#Masked#/ +--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/ -- SORT_QUERY_RESULTS SET hive.vectorized.execution.enabled=FALSE; SET hive.mapred.mode=nonstrict; diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q index 417872c2727..ff99561a8cd 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q +++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q @@ -1,6 +1,6 @@ -- Try to run incremental on a non-transactional MV in presence of delete operations -- Compiler should fall back to full rebuild. - +--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; diff --git a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q index 95a0384ccaf..31a3d63c58a 100644 --- a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q +++ b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q @@ -1,4 +1,5 @@ --! qt:dataset:alltypesorc +--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; diff --git a/ql/src/test/queries/clientpositive/orc_merge12.q b/ql/src/test/queries/clientpositive/orc_merge12.q index 348c3a8f5e0..d1487d22098 100644 --- a/ql/src/test/queries/clientpositive/orc_merge12.q +++ b/ql/src/test/queries/clientpositive/orc_merge12.q @@ -1,4 +1,5 @@ --! qt:replace:/(File Version:)(.+)/$1#Masked#/ +--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; CREATE TABLE `alltypesorc3xcols`( diff --git a/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out b/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out index 96327ed0400..d68bfe05243 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out @@ -182,7 +182,7 @@ Stripes: Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 -File length: 1217 bytes +File length: #Masked# bytes Padding length: 0 bytes Padding ratio: 0% @@ -299,7 +299,7 @@ Stripes: Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 -File length: 1211 bytes +File length: #Masked# bytes Padding length: 0 bytes Padding ratio: 0% diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out index 86cce82f092..5ff07b9fc54 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out @@ -79,17 +79,17 @@ STAGE PLANS: TableScan alias: t1 filterExpr: (b = 1) (type: boolean) - Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (b = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), 1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out index 669df679d82..8ef0f0dde97 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out @@ -94,17 +94,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector - Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,17 +249,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector - Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1025 - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1354,17 +1354,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector_2 - Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out index 73526e4e93e..3bda3727910 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out @@ -814,7 +814,7 @@ Stripes: Entry 0: count: 6889 hasNull: true true: 3402 positions: 0,0,0,0,0,0,0,0 Entry 1: count: 2284 hasNull: true true: 581 positions: 0,168,8,0,0,520,97,1 -File length: 3004637 bytes +File length: #Masked# bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index 04deae9c166..c604d5020cf 100644 --- a/standalone-metastore/pom.xml +++ b/standalone-metastore/pom.xml @@ -91,7 +91,7 @@ <libthrift.version>0.16.0</libthrift.version> <log4j2.version>2.18.0</log4j2.version> <mockito-core.version>3.4.4</mockito-core.version> - <orc.version>1.8.5</orc.version> + <orc.version>1.9.4</orc.version> <protobuf.version>3.24.4</protobuf.version> <io.grpc.version>1.51.0</io.grpc.version> <sqlline.version>1.9.0</sqlline.version>