This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 658a9bcccc8 HIVE-28224: Upgrade Orc version in Hive to 1.9.4 (#5218). 
(Dmitriy Fingerman, reviewed by Ayush Saxena)
658a9bcccc8 is described below

commit 658a9bcccc865dc2bb33e85e3af8edd0ffcb5beb
Author: Dmitriy Fingerman <dmitriy.finger...@gmail.com>
AuthorDate: Thu Aug 8 10:35:33 2024 -0400

    HIVE-28224: Upgrade Orc version in Hive to 1.9.4 (#5218). (Dmitriy 
Fingerman, reviewed by Ayush Saxena)
---
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   | 42 +++++++++++-----------
 pom.xml                                            |  2 +-
 .../io/orc/encoded/EncodedTreeReaderFactory.java   |  1 +
 .../acid_bloom_filter_orc_file_dump.q              |  1 +
 .../materialized_view_create_rewrite_10.q          |  2 +-
 .../queries/clientpositive/orc_llap_nonvector.q    |  1 +
 ql/src/test/queries/clientpositive/orc_merge12.q   |  1 +
 .../llap/acid_bloom_filter_orc_file_dump.q.out     |  4 +--
 .../llap/materialized_view_create_rewrite_10.q.out |  8 ++---
 .../clientpositive/llap/orc_llap_nonvector.q.out   | 24 ++++++-------
 .../results/clientpositive/tez/orc_merge12.q.out   |  2 +-
 standalone-metastore/pom.xml                       |  2 +-
 12 files changed, 47 insertions(+), 43 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index a6b327ae174..148a7cb3225 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -178,22 +178,22 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
             "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13",
         },
         {
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4",
             "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
         },
         {
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4",
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3",
             "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3",
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3",
             "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3",
         },
         {
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3",
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2",
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2",
         },
     };
     verifyRebalance(testDataProvider, tableName, null, expectedBuckets,
@@ -234,22 +234,22 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
         },
         {
             "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":5}\t12\t12",
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4",
             "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
-            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
+            "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
         },
         {
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4",
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3",
             "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3",
-            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3",
+            "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3",
             "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3",
         },
         {
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3",
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2",
-            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2",
+            "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2",
         },
     };
     verifyRebalance(testDataProvider, tableName, null, expectedBuckets,
@@ -525,6 +525,8 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
             "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2",
             "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3",
             "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t5\t3",
             "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12",
             "{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13",
             "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14",
@@ -533,9 +535,7 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
             "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17",
         },
         {
-            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t5\t2",
-            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t5\t3",
-            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t2\t4",
+            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t4",
         },
         {
             "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t3",
diff --git a/pom.xml b/pom.xml
index da80147e90c..852fb234b78 100644
--- a/pom.xml
+++ b/pom.xml
@@ -182,7 +182,7 @@
     <postgres.version>42.7.3</postgres.version>
     <oracle.version>21.3.0.0</oracle.version>
     <opencsv.version>5.9</opencsv.version>
-    <orc.version>1.8.5</orc.version>
+    <orc.version>1.9.4</orc.version>
     <mockito-core.version>3.4.4</mockito-core.version>
     <mockito-inline.version>4.11.0</mockito-inline.version>
     <mina.version>2.0.0-M5</mina.version>
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
index 46e5a3c3be8..869528fc889 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
@@ -1078,6 +1078,7 @@ public class EncodedTreeReaderFactory extends 
TreeReaderFactory {
         FilterContext filterContext, ReadPhase readPhase) throws IOException {
       if (vectors == null) {
         super.nextVector(previousVector, isNull, batchSize, filterContext, 
readPhase);
+        previousVector.isRepeating = false;
         return;
       }
       vectors.get(vectorIndex++).shallowCopyTo(previousVector);
diff --git 
a/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q 
b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q
index d5bb1c65b7f..37a80098a44 100644
--- a/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q
+++ b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q
@@ -1,4 +1,5 @@
 --! qt:replace:/(File Version:)(.+)/$1#Masked#/
+--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
 -- SORT_QUERY_RESULTS
 SET hive.vectorized.execution.enabled=FALSE;
 SET hive.mapred.mode=nonstrict;
diff --git 
a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q 
b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q
index 417872c2727..ff99561a8cd 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q
@@ -1,6 +1,6 @@
 -- Try to run incremental on a non-transactional MV in presence of delete 
operations
 -- Compiler should fall back to full rebuild.
-
+--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: 
\S+)/$1#Masked#$2/
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
 
diff --git a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q 
b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q
index 95a0384ccaf..31a3d63c58a 100644
--- a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q
+++ b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q
@@ -1,4 +1,5 @@
 --! qt:dataset:alltypesorc
+--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: 
\S+)/$1#Masked#$2/
 
 set hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
diff --git a/ql/src/test/queries/clientpositive/orc_merge12.q 
b/ql/src/test/queries/clientpositive/orc_merge12.q
index 348c3a8f5e0..d1487d22098 100644
--- a/ql/src/test/queries/clientpositive/orc_merge12.q
+++ b/ql/src/test/queries/clientpositive/orc_merge12.q
@@ -1,4 +1,5 @@
 --! qt:replace:/(File Version:)(.+)/$1#Masked#/
+--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
 set hive.vectorized.execution.enabled=false;
 
 CREATE TABLE `alltypesorc3xcols`(
diff --git 
a/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out 
b/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out
index 96327ed0400..d68bfe05243 100644
--- 
a/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/acid_bloom_filter_orc_file_dump.q.out
@@ -182,7 +182,7 @@ Stripes:
       Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 
0.0001 expectedFpp: 1.5953551E-25
       Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 
loadFactor: 0.0001 expectedFpp: 1.5953551E-25
 
-File length: 1217 bytes
+File length: #Masked# bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
@@ -299,7 +299,7 @@ Stripes:
       Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 
0.0001 expectedFpp: 1.5953551E-25
       Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 
loadFactor: 0.0001 expectedFpp: 1.5953551E-25
 
-File length: 1211 bytes
+File length: #Masked# bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
index 86cce82f092..5ff07b9fc54 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
@@ -79,17 +79,17 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   filterExpr: (b = 1) (type: boolean)
-                  Statistics: Num rows: 69 Data size: 13710 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 70 Data size: #Masked# Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (b = 1) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 198 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: #Masked# Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: a (type: int), 1 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 198 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: #Masked# Basic stats: 
COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 1 Data size: 198 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: #Masked# Basic 
stats: COMPLETE Column stats: NONE
                         table:
                             input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out 
b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
index 669df679d82..8ef0f0dde97 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
@@ -94,17 +94,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_llap_nonvector
-                  Statistics: Num rows: 12288 Data size: 2942394 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 12288 Data size: #Masked# Basic stats: 
COMPLETE Column stats: COMPLETE
                   Limit
                     Number of rows: 100
-                    Statistics: Num rows: 100 Data size: 24360 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 100 Data size: #Masked# Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean), rdm (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                      Statistics: Num rows: 100 Data size: 24360 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 100 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 100 Data size: 24360 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 100 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -249,17 +249,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_llap_nonvector
-                  Statistics: Num rows: 12288 Data size: 899146 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 12288 Data size: #Masked# Basic stats: 
COMPLETE Column stats: COMPLETE
                   Limit
                     Number of rows: 1025
-                    Statistics: Num rows: 1025 Data size: 75068 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1025 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int), cstring1 (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1025 Data size: 75068 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1025 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 1025 Data size: 75068 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1025 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1354,17 +1354,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_llap_nonvector_2
-                  Statistics: Num rows: 12288 Data size: 4468250 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 12288 Data size: #Masked# Basic stats: 
COMPLETE Column stats: COMPLETE
                   Limit
                     Number of rows: 10
-                    Statistics: Num rows: 10 Data size: 80 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 10 Data size: #Masked# Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 760 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 10 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 10 Data size: 760 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 10 Data size: #Masked# Basic 
stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out 
b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out
index 73526e4e93e..3bda3727910 100644
--- a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out
@@ -814,7 +814,7 @@ Stripes:
       Entry 0: count: 6889 hasNull: true true: 3402 positions: 0,0,0,0,0,0,0,0
       Entry 1: count: 2284 hasNull: true true: 581 positions: 
0,168,8,0,0,520,97,1
 
-File length: 3004637 bytes
+File length: #Masked# bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
________________________________________________________________________________________________________________________
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index 04deae9c166..c604d5020cf 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -91,7 +91,7 @@
     <libthrift.version>0.16.0</libthrift.version>
     <log4j2.version>2.18.0</log4j2.version>
     <mockito-core.version>3.4.4</mockito-core.version>
-    <orc.version>1.8.5</orc.version>
+    <orc.version>1.9.4</orc.version>
     <protobuf.version>3.24.4</protobuf.version>
     <io.grpc.version>1.51.0</io.grpc.version>
     <sqlline.version>1.9.0</sqlline.version>

Reply via email to