This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 94b383c82a HIVE-26193: Fix Iceberg partitioned tables null bucket 
handling (Peter Vary reviewed by Marton Bod and Adam Szita) (#3261)
94b383c82a is described below

commit 94b383c82a7c5d2e619c3bab84dfca88b4591b4e
Author: pvary <[email protected]>
AuthorDate: Mon May 2 08:35:29 2022 +0200

    HIVE-26193: Fix Iceberg partitioned tables null bucket handling (Peter Vary 
reviewed by Marton Bod and Adam Szita) (#3261)
---
 .../iceberg/mr/hive/GenericUDFIcebergBucket.java   |  2 +-
 .../queries/positive/dynamic_partition_writes.q    |  4 +-
 .../positive/dynamic_partition_writes.q.out        | 43 ++++++++++++++--------
 .../hive/ql/exec/vector/VectorizedRowBatchCtx.java |  4 +-
 4 files changed, 32 insertions(+), 21 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/GenericUDFIcebergBucket.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/GenericUDFIcebergBucket.java
index cab4bb11ab..52b0a1edbf 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/GenericUDFIcebergBucket.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/GenericUDFIcebergBucket.java
@@ -185,7 +185,7 @@ public class GenericUDFIcebergBucket extends GenericUDF {
   public Object evaluate(DeferredObject[] arguments) throws HiveException {
 
     DeferredObject argument = arguments[0];
-    if (argument == null) {
+    if (argument == null || argument.get() == null) {
       return null;
     } else {
       evaluator.apply(argument);
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/dynamic_partition_writes.q 
b/iceberg/iceberg-handler/src/test/queries/positive/dynamic_partition_writes.q
index 93094529f6..622e870350 100644
--- 
a/iceberg/iceberg-handler/src/test/queries/positive/dynamic_partition_writes.q
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/dynamic_partition_writes.q
@@ -10,9 +10,9 @@ drop table if exists tbl_target_mixed;
 
 
 create external table tbl_src (a int, b string, c bigint) stored by iceberg 
stored as orc;
-insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), (3, 'USD', 11), (4, 
'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 100), (8, 'PLN', 20), 
(9, 'PLN', 11), (10, 'CZK', 5);
+insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), (3, 'USD', 11), (4, 
'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 100), (8, 'PLN', 20), 
(9, 'PLN', 11), (10, 'CZK', 5), (12, NULL, NULL);
 --need at least 2 files to ensure ClusteredWriter encounters out-of-order 
records
-insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), (30, 'USD', 100), 
(40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 'USD', 20), (80, 'PLN', 
100), (90, 'PLN', 18), (100, 'CZK', 12);
+insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), (30, 'USD', 100), 
(40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 'USD', 20), (80, 'PLN', 
100), (90, 'PLN', 18), (100, 'CZK', 12), (110, NULL, NULL);
 
 create external table tbl_target_identity (a int) partitioned by (ccy string) 
stored by iceberg stored as orc;
 explain insert overwrite table tbl_target_identity select a, b from tbl_src;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
index 36fb39c091..53be8c39ed 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
@@ -22,19 +22,19 @@ POSTHOOK: query: create external table tbl_src (a int, b 
string, c bigint) store
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@tbl_src
-PREHOOK: query: insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), (3, 
'USD', 11), (4, 'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 100), 
(8, 'PLN', 20), (9, 'PLN', 11), (10, 'CZK', 5)
+PREHOOK: query: insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), (3, 
'USD', 11), (4, 'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 100), 
(8, 'PLN', 20), (9, 'PLN', 11), (10, 'CZK', 5), (12, NULL, NULL)
 PREHOOK: type: QUERY
 PREHOOK: Input: _dummy_database@_dummy_table
 PREHOOK: Output: default@tbl_src
-POSTHOOK: query: insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), 
(3, 'USD', 11), (4, 'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 
100), (8, 'PLN', 20), (9, 'PLN', 11), (10, 'CZK', 5)
+POSTHOOK: query: insert into tbl_src values (1, 'EUR', 10), (2, 'EUR', 10), 
(3, 'USD', 11), (4, 'EUR', 12), (5, 'HUF', 30), (6, 'USD', 10), (7, 'USD', 
100), (8, 'PLN', 20), (9, 'PLN', 11), (10, 'CZK', 5), (12, NULL, NULL)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@tbl_src
-PREHOOK: query: insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), 
(30, 'USD', 100), (40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 
'USD', 20), (80, 'PLN', 100), (90, 'PLN', 18), (100, 'CZK', 12)
+PREHOOK: query: insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), 
(30, 'USD', 100), (40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 
'USD', 20), (80, 'PLN', 100), (90, 'PLN', 18), (100, 'CZK', 12), (110, NULL, 
NULL)
 PREHOOK: type: QUERY
 PREHOOK: Input: _dummy_database@_dummy_table
 PREHOOK: Output: default@tbl_src
-POSTHOOK: query: insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), 
(30, 'USD', 100), (40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 
'USD', 20), (80, 'PLN', 100), (90, 'PLN', 18), (100, 'CZK', 12)
+POSTHOOK: query: insert into tbl_src values (10, 'EUR', 12), (20, 'EUR', 11), 
(30, 'USD', 100), (40, 'EUR', 10), (50, 'HUF', 30), (60, 'USD', 12), (70, 
'USD', 20), (80, 'PLN', 100), (90, 'PLN', 18), (100, 'CZK', 12), (110, NULL, 
NULL)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@tbl_src
@@ -76,9 +76,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1
-                    Select Operator [SEL_12] (rows=20 width=91)
+                    Select Operator [SEL_12] (rows=22 width=87)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=20 width=91)
+                      TableScan [TS_0] (rows=22 width=87)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -90,7 +90,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=20 width=91)
+                        Select Operator [SEL_14] (rows=22 width=87)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -121,6 +121,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 9      PLN
 10     CZK
 10     EUR
+12     NULL
 20     EUR
 30     USD
 40     EUR
@@ -130,6 +131,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 80     PLN
 90     PLN
 100    CZK
+110    NULL
 PREHOOK: query: create external table tbl_target_bucket (a int, ccy string) 
partitioned by spec (bucket (2, ccy)) stored by iceberg stored as orc
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -168,9 +170,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:iceberg_bucket(_col1, 2)
-                    Select Operator [SEL_12] (rows=20 width=91)
+                    Select Operator [SEL_12] (rows=22 width=87)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=20 width=91)
+                      TableScan [TS_0] (rows=22 width=87)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -182,7 +184,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=20 width=91)
+                        Select Operator [SEL_14] (rows=22 width=87)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -213,6 +215,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 9      PLN
 10     CZK
 10     EUR
+12     NULL
 20     EUR
 30     USD
 40     EUR
@@ -222,6 +225,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 80     PLN
 90     PLN
 100    CZK
+110    NULL
 PREHOOK: query: create external table tbl_target_mixed (a int, ccy string, c 
bigint) partitioned by spec (ccy, bucket (3, c)) stored by iceberg stored as orc
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -260,9 +264,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1, iceberg_bucket(_col2, 3)
-                    Select Operator [SEL_12] (rows=20 width=99)
+                    Select Operator [SEL_12] (rows=22 width=94)
                       Output:["_col0","_col1","_col2"]
-                      TableScan [TS_0] (rows=20 width=99)
+                      TableScan [TS_0] (rows=22 width=94)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -274,7 +278,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=568)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)","min(c)","max(c)","count(c)","compute_bit_vector_hll(c)"]
-                        Select Operator [SEL_14] (rows=20 width=99)
+                        Select Operator [SEL_14] (rows=22 width=94)
                           Output:["a","ccy","c"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -305,6 +309,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 9      PLN     11
 10     CZK     5
 10     EUR     12
+12     NULL    NULL
 20     EUR     11
 30     USD     100
 40     EUR     10
@@ -314,6 +319,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 80     PLN     100
 90     PLN     18
 100    CZK     12
+110    NULL    NULL
 PREHOOK: query: select * from default.tbl_target_mixed.partitions
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tbl_target_mixed
@@ -324,14 +330,15 @@ POSTHOOK: Input: default@tbl_target_mixed
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 {"ccy":"EUR","c_bucket":0}     1       1
 {"ccy":"EUR","c_bucket":1}     2       1
-{"ccy":"HUF","c_bucket":1}     2       1
 {"ccy":"EUR","c_bucket":2}     3       1
 {"ccy":"USD","c_bucket":1}     3       1
 {"ccy":"CZK","c_bucket":1}     1       1
 {"ccy":"USD","c_bucket":0}     2       1
 {"ccy":"USD","c_bucket":2}     1       1
 {"ccy":"CZK","c_bucket":2}     1       1
+{"ccy":"HUF","c_bucket":1}     2       1
 {"ccy":"PLN","c_bucket":2}     1       1
+{"ccy":null,"c_bucket":null}   2       1
 {"ccy":"PLN","c_bucket":0}     2       1
 {"ccy":"PLN","c_bucket":1}     1       1
 PREHOOK: query: select * from default.tbl_target_mixed.files
@@ -342,6 +349,7 @@ POSTHOOK: query: select * from 
default.tbl_target_mixed.files
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tbl_target_mixed
 POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      hdfs://### HDFS PATH ###        ORC     0       
{"ccy":null,"c_bucket":null}    2       417     {1:7,2:5,3:5}   {1:2,2:2,3:2}   
{1:0,2:2,3:2}   {}      {1:

Reply via email to