This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new aa800efbdb7 HIVE-29275: Stats autogather calculates the min statistic 
incorrectly (#6194)
aa800efbdb7 is described below

commit aa800efbdb7f1aacfd382ae28123e3ec9db5ed89
Author: Thomas Rebele <[email protected]>
AuthorDate: Tue Nov 25 09:01:39 2025 +0100

    HIVE-29275: Stats autogather calculates the min statistic incorrectly 
(#6194)
---
 .../org/apache/hadoop/hive/ql/ddl/ShowUtils.java   |  9 +--
 .../llap/llap_decimal64_reader.q.out               | 28 ++++----
 .../clientpositive/llap/stats_histogram.q.out      |  2 +-
 .../clientpositive/llap/stats_histogram_null.q.out |  2 +-
 .../perf/tpcds30tb/tez/query28.q.out               | 60 ++++++++--------
 .../perf/tpcds30tb/tez/query48.q.out               | 16 ++---
 .../perf/tpcds30tb/tez/query49.q.out               | 62 ++++++++--------
 .../hive/metastore/api/utils/DecimalUtils.java     | 12 ++--
 .../merge/DecimalColumnStatsMerger.java            | 19 ++++-
 .../hive/metastore/utils/MetaStoreServerUtils.java | 10 +++
 .../merge/DecimalColumnStatsMergerTest.java        | 82 +++++++++++++---------
 11 files changed, 168 insertions(+), 134 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
index 386bfd97748..e96288369ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
@@ -43,6 +43,7 @@
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.session.SessionState;
@@ -54,7 +55,6 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
-import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
 import java.time.ZoneId;
 import java.util.ArrayList;
@@ -233,12 +233,7 @@ public static String[] extractColumnValues(FieldSchema 
column, boolean isColumnS
   }
 
   public static String convertToString(Decimal val) {
-    if (val == null) {
-      return "";
-    }
-
-    HiveDecimal result = HiveDecimal.create(new BigInteger(val.getUnscaled()), 
val.getScale());
-    return (result != null) ? result.toString() : "";
+    return MetaStoreServerUtils.decimalToString(val);
   }
 
   public static String 
convertToString(org.apache.hadoop.hive.metastore.api.Date val) {
diff --git 
a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out 
b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
index a20cedd10dd..fcf8bf6892b 100644
--- a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
@@ -136,25 +136,25 @@ STAGE PLANS:
                   Statistics: Num rows: 24576 Data size: 5505024 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 2752512 Basic 
stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 24576 Data size: 5505024 Basic 
stats: COMPLETE Column stats: COMPLETE
                     Top N Key Operator
                       sort order: ++
                       keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: 
decimal(38,5))
                       null sort order: zz
-                      Statistics: Num rows: 12288 Data size: 2752512 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 24576 Data size: 5505024 Basic 
stats: COMPLETE Column stats: COMPLETE
                       top n: 2
                       Group By Operator
                         keys: cdecimal1 (type: decimal(10,2)), cdecimal2 
(type: decimal(38,5))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(10,2)), _col1 
(type: decimal(38,5))
                           null sort order: zz
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: 
decimal(10,2)), _col1 (type: decimal(38,5))
-                          Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -179,13 +179,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type: 
decimal(38,5))
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Limit
                   Number of rows: 2
-                  Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -243,25 +243,25 @@ STAGE PLANS:
                   Statistics: Num rows: 24576 Data size: 5505024 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 2752512 Basic 
stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 24576 Data size: 5505024 Basic 
stats: COMPLETE Column stats: COMPLETE
                     Top N Key Operator
                       sort order: ++
                       keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: 
decimal(38,5))
                       null sort order: zz
-                      Statistics: Num rows: 12288 Data size: 2752512 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 24576 Data size: 5505024 Basic 
stats: COMPLETE Column stats: COMPLETE
                       top n: 2
                       Group By Operator
                         keys: cdecimal1 (type: decimal(10,2)), cdecimal2 
(type: decimal(38,5))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(10,2)), _col1 
(type: decimal(38,5))
                           null sort order: zz
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: 
decimal(10,2)), _col1 (type: decimal(38,5))
-                          Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -287,13 +287,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type: 
decimal(38,5))
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Limit
                   Number of rows: 2
-                  Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/stats_histogram.q.out 
b/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
index 5ff94404746..439ebba09c5 100644
--- a/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
+++ b/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
@@ -361,7 +361,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@test_stats
 col_name               e                   
 data_type              decimal(5,2)        
-min                    -10.2               
+min                    -123.2              
 max                    12.2                
 num_nulls              1                   
 distinct_count         11                  
diff --git a/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out 
b/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
index dbde9c67dae..07e48b5888a 100644
--- a/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
@@ -436,7 +436,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@test_stats
 col_name               e                   
 data_type              decimal(5,2)        
-min                    -12.3               
+min                    -123.2              
 max                    12.2                
 num_nulls              1                   
 distinct_count         15                  
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
index d0e8628d188..025630093e3 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
@@ -34,129 +34,129 @@ STAGE PLANS:
                   Statistics: Num rows: 86404891377 Data size: 28054250053192 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 0 AND 5 and (ss_list_price 
BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost 
BETWEEN 14 AND 34)) (type: boolean)
-                    Statistics: Num rows: 1965380184 Data size: 638126687968 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1955303836 Data size: 634855063288 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 1965380184 Data size: 638126687968 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1955303836 Data size: 634855063288 
Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 982690092 Data size: 
223025312544 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 977651918 Data size: 
221881879504 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 982690092 Data size: 
223025312544 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 977651918 Data size: 
221881879504 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 16 AND 20 and 
(ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or 
ss_wholesale_cost BETWEEN 80 AND 100)) (type: boolean)
-                    Statistics: Num rows: 2571445780 Data size: 834906239572 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2551512553 Data size: 828434247976 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 2571445780 Data size: 834906239572 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2551512553 Data size: 828434247976 
Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 1285722890 Data size: 
291799776608 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1275756276 Data size: 
289537815088 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 1285722890 Data size: 
291799776608 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1275756276 Data size: 
289537815088 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 21 AND 25 and 
(ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or 
ss_wholesale_cost BETWEEN 38 AND 58)) (type: boolean)
-                    Statistics: Num rows: 3432340414 Data size: 1114424597248 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3482601258 Data size: 1130743468324 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 3432340414 Data size: 
1114424597248 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3482601258 Data size: 
1130743468324 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 1716170207 Data size: 
389491457960 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1741300629 Data size: 
395194904312 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 1716170207 Data size: 
389491457960 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1741300629 Data size: 
395194904312 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 26 AND 30 and 
(ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or 
ss_wholesale_cost BETWEEN 42 AND 62)) (type: boolean)
-                    Statistics: Num rows: 2913592254 Data size: 945995583960 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2894732905 Data size: 939872262804 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 2913592254 Data size: 945995583960 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2894732905 Data size: 939872262804 
Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 1456796127 Data size: 
330625508552 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1447366452 Data size: 
328485407488 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 1456796127 Data size: 
330625508552 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1447366452 Data size: 
328485407488 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 11 AND 15 and 
(ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or 
ss_wholesale_cost BETWEEN 4 AND 24)) (type: boolean)
-                    Statistics: Num rows: 2457715925 Data size: 797980022328 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2445115019 Data size: 793888714924 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 2457715925 Data size: 797980022328 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2445115019 Data size: 793888714924 
Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 1228857962 Data size: 
278894061568 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1222557509 Data size: 
277464149464 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 1228857962 Data size: 
278894061568 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1222557509 Data size: 
277464149464 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
                   Filter Operator
                     predicate: (ss_quantity BETWEEN 6 AND 10 and 
(ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or 
ss_wholesale_cost BETWEEN 32 AND 52)) (type: boolean)
-                    Statistics: Num rows: 3193426694 Data size: 1036853233656 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3178448591 Data size: 1031990089344 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_list_price (type: decimal(7,2))
                       outputColumnNames: ss_list_price
-                      Statistics: Num rows: 3193426694 Data size: 
1036853233656 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3178448591 Data size: 
1031990089344 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: sum(ss_list_price), count(ss_list_price)
                         keys: ss_list_price (type: decimal(7,2))
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 1596713347 Data size: 
362380262200 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1589224295 Data size: 
360680592952 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: decimal(7,2))
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-                          Statistics: Num rows: 1596713347 Data size: 
362380262200 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1589224295 Data size: 
360680592952 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: decimal(17,2)), 
_col2 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
@@ -168,7 +168,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1228857962 Data size: 278894061568 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1222557509 Data size: 277464149464 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
@@ -204,7 +204,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1596713347 Data size: 362380262200 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1589224295 Data size: 360680592952 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
@@ -240,7 +240,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 982690092 Data size: 223025312544 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 977651918 Data size: 221881879504 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
@@ -276,7 +276,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1285722890 Data size: 291799776608 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1275756276 Data size: 289537815088 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
@@ -312,7 +312,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1716170207 Data size: 389491457960 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1741300629 Data size: 395194904312 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
@@ -394,7 +394,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: decimal(7,2))
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1456796127 Data size: 330625508552 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1447366452 Data size: 328485407488 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col1), count(_col2), count(_col0)
                   mode: partial2
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
index 59b49bfa8fc..bd3b9e37e5a 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
@@ -16,15 +16,15 @@ STAGE PLANS:
                 TableScan
                   alias: store_sales
                   filterExpr: (ss_sales_price BETWEEN 50 AND 200 and 
ss_net_profit is not null and ss_cdemo_sk is not null and ss_addr_sk is not 
null and ss_store_sk is not null) (type: boolean)
-                  probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_66_container, 
bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:3.323733066508898E-4
+                  probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_66_container, 
bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:3.323843839779123E-4
                   Statistics: Num rows: 82510879939 Data size: 20962809999708 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (ss_sales_price BETWEEN 50 AND 200 and 
ss_net_profit is not null and ss_cdemo_sk is not null and ss_addr_sk is not 
null and ss_store_sk is not null) (type: boolean)
-                    Statistics: Num rows: 56248293349 Data size: 
14290506744864 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 56250168542 Data size: 
14290983158452 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_cdemo_sk (type: bigint), ss_addr_sk 
(type: bigint), ss_quantity (type: int), ss_sold_date_sk (type: bigint), 
ss_net_profit BETWEEN 0 AND 2000 (type: boolean), ss_net_profit BETWEEN 150 AND 
3000 (type: boolean), ss_net_profit BETWEEN 50 AND 25000 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
-                      Statistics: Num rows: 56248293349 Data size: 
2223391490876 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 56250168542 Data size: 
2223465613804 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -34,7 +34,7 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 11304950271 Data size: 
335218165588 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 11305327153 Data size: 
335229341020 Basic stats: COMPLETE Column stats: COMPLETE
                         Map Join Operator
                           condition map:
                                Inner Join 0 to 1
@@ -44,7 +44,7 @@ STAGE PLANS:
                           outputColumnNames: _col1, _col2, _col4, _col5, _col6
                           input vertices:
                             1 Map 4
-                          Statistics: Num rows: 322998581 Data size: 
3875982984 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 323009350 Data size: 
3876112212 Basic stats: COMPLETE Column stats: COMPLETE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -54,14 +54,14 @@ STAGE PLANS:
                             outputColumnNames: _col2, _col4, _col5, _col6, 
_col10, _col11, _col12
                             input vertices:
                               1 Map 5
-                            Statistics: Num rows: 27424414 Data size: 
658185940 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 27425328 Data size: 
658207876 Basic stats: COMPLETE Column stats: COMPLETE
                             Filter Operator
                               predicate: ((_col10 and _col4) or (_col11 and 
_col5) or (_col12 and _col6)) (type: boolean)
-                              Statistics: Num rows: 20568309 Data size: 
493639420 Basic stats: COMPLETE Column stats: COMPLETE
+                              Statistics: Num rows: 20568996 Data size: 
493655908 Basic stats: COMPLETE Column stats: COMPLETE
                               Select Operator
                                 expressions: _col2 (type: int)
                                 outputColumnNames: _col2
-                                Statistics: Num rows: 20568309 Data size: 
493639420 Basic stats: COMPLETE Column stats: COMPLETE
+                                Statistics: Num rows: 20568996 Data size: 
493655908 Basic stats: COMPLETE Column stats: COMPLETE
                                 Group By Operator
                                   aggregations: sum(_col2)
                                   minReductionHashAggr: 0.99
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
index 98ae7182489..276726ad496 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
@@ -41,11 +41,11 @@ STAGE PLANS:
                   Statistics: Num rows: 21594638446 Data size: 5441536184068 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((ws_quantity > 0) and (ws_net_profit > 1) and 
(ws_net_paid > 0)) (type: boolean)
-                    Statistics: Num rows: 14321294654 Data size: 3608758871252 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 14390903321 Data size: 3626299247340 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ws_item_sk (type: bigint), ws_order_number 
(type: bigint), ws_quantity (type: int), ws_net_paid (type: decimal(7,2)), 
ws_sold_date_sk (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col5
-                      Statistics: Num rows: 14321294654 Data size: 
2004773870004 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 14390903321 Data size: 
2014518075388 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -55,18 +55,18 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 19
-                        Statistics: Num rows: 243129259 Data size: 31885680632 
Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 244310989 Data size: 32040660996 
Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
                           null sort order: zz
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: bigint)
-                          Statistics: Num rows: 243129259 Data size: 
31885680632 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 244310989 Data size: 
32040660996 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
                         Select Operator
                           expressions: _col0 (type: bigint), _col1 (type: 
bigint), hash(_col0,_col1) (type: int)
                           outputColumnNames: _col0, _col1, _col3
-                          Statistics: Num rows: 243129259 Data size: 
4862585180 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 244310989 Data size: 
4886219780 Basic stats: COMPLETE Column stats: COMPLETE
                           Group By Operator
                             aggregations: min(_col0), max(_col0), min(_col1), 
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
                             minReductionHashAggr: 0.99
@@ -88,17 +88,17 @@ STAGE PLANS:
                   Statistics: Num rows: 2160007345 Data size: 273845125140 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((wr_return_amt > 10000) and wr_item_sk BETWEEN 
DynamicValue(RS[225]_col0) AND DynamicValue(RS[225]_col1) and wr_order_number 
BETWEEN DynamicValue(RS[225]_col2) AND DynamicValue(RS[225]_col3) and 
in_bloom_filter(hash(wr_item_sk,wr_order_number), DynamicValue(RS[225]_col4))) 
(type: boolean)
-                    Statistics: Num rows: 1418116903 Data size: 179788463076 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1420050734 Data size: 180033633704 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: wr_item_sk (type: bigint), wr_order_number 
(type: bigint), wr_return_quantity (type: int), wr_return_amt (type: 
decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1418116903 Data size: 179788463076 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1420050734 Data size: 180033633704 
Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
                         null sort order: zz
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: bigint)
-                        Statistics: Num rows: 1418116903 Data size: 
179788463076 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1420050734 Data size: 
180033633704 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
@@ -110,11 +110,11 @@ STAGE PLANS:
                   Statistics: Num rows: 43005109025 Data size: 10824794628716 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((cs_quantity > 0) and (cs_net_profit > 1) and 
(cs_net_paid > 0)) (type: boolean)
-                    Statistics: Num rows: 28554178173 Data size: 7187358002848 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 28650456622 Data size: 7211592203468 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cs_item_sk (type: bigint), cs_order_number 
(type: bigint), cs_quantity (type: int), cs_net_paid (type: decimal(7,2)), 
cs_sold_date_sk (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col5
-                      Statistics: Num rows: 28554178173 Data size: 
3989290047472 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 28650456622 Data size: 
4002741061804 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -124,18 +124,18 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 19
-                        Statistics: Num rows: 481330829 Data size: 55240772680 
Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 482953772 Data size: 55427032628 
Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
                           null sort order: zz
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: bigint)
-                          Statistics: Num rows: 481330829 Data size: 
55240772680 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 482953772 Data size: 
55427032628 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
                         Select Operator
                           expressions: _col0 (type: bigint), _col1 (type: 
bigint), hash(_col0,_col1) (type: int)
                           outputColumnNames: _col0, _col1, _col3
-                          Statistics: Num rows: 481330829 Data size: 
9626616580 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 482953772 Data size: 
9659075440 Basic stats: COMPLETE Column stats: COMPLETE
                           Group By Operator
                             aggregations: min(_col0), max(_col0), min(_col1), 
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
                             minReductionHashAggr: 0.99
@@ -260,11 +260,11 @@ STAGE PLANS:
                   Statistics: Num rows: 82510879939 Data size: 20349734757316 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((ss_quantity > 0) and (ss_net_profit > 1) and 
(ss_net_paid > 0)) (type: boolean)
-                    Statistics: Num rows: 40994410513 Data size: 
10110489442160 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 41222412506 Data size: 
10166721784872 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ss_item_sk (type: bigint), ss_ticket_number 
(type: bigint), ss_quantity (type: int), ss_net_paid (type: decimal(7,2)), 
ss_sold_date_sk (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col5
-                      Statistics: Num rows: 40994410513 Data size: 
5627405668656 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 41222412506 Data size: 
5658704074760 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -274,18 +274,18 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 19
-                        Statistics: Num rows: 695952488 Data size: 11135239924 
Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 699823225 Data size: 11197171716 
Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
                           null sort order: zz
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: bigint)
-                          Statistics: Num rows: 695952488 Data size: 
11135239924 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 699823225 Data size: 
11197171716 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
                         Select Operator
                           expressions: _col0 (type: bigint), _col1 (type: 
bigint), hash(_col0,_col1) (type: int)
                           outputColumnNames: _col0, _col1, _col3
-                          Statistics: Num rows: 695952488 Data size: 
13919049760 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 699823225 Data size: 
13996464500 Basic stats: COMPLETE Column stats: COMPLETE
                           Group By Operator
                             aggregations: min(_col0), max(_col0), min(_col1), 
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
                             minReductionHashAggr: 0.99
@@ -307,17 +307,17 @@ STAGE PLANS:
                   Statistics: Num rows: 8634166995 Data size: 1104703724476 
Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((sr_return_amt > 10000) and sr_item_sk BETWEEN 
DynamicValue(RS[245]_col0) AND DynamicValue(RS[245]_col1) and sr_ticket_number 
BETWEEN DynamicValue(RS[245]_col2) AND DynamicValue(RS[245]_col3) and 
in_bloom_filter(hash(sr_item_sk,sr_ticket_number), DynamicValue(RS[245]_col4))) 
(type: boolean)
-                    Statistics: Num rows: 4166475379 Data size: 533082215324 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 4238623038 Data size: 542313191336 
Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: sr_item_sk (type: bigint), sr_ticket_number 
(type: bigint), sr_return_quantity (type: int), sr_return_amt (type: 
decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 4166475379 Data size: 533082215324 
Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 4238623038 Data size: 542313191336 
Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
                         null sort order: zz
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: bigint)
-                        Statistics: Num rows: 4166475379 Data size: 
533082215324 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 4238623038 Data size: 
542313191336 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
@@ -363,25 +363,25 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col2, _col3, _col9, _col10
                 input vertices:
                   1 Map 20
-                Statistics: Num rows: 506324466 Data size: 106684518508 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 508031682 Data size: 107066281820 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
                   expressions: _col0 (type: bigint), if(_col9 is not null, 
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10 
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0) 
(type: decimal(7,2))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                  Statistics: Num rows: 506324466 Data size: 106684518508 
Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 508031682 Data size: 107066281820 
Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: sum(_col1), sum(_col2), sum(_col3), 
sum(_col4)
                     keys: _col0 (type: bigint)
                     minReductionHashAggr: 0.99
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                    Statistics: Num rows: 3247596 Data size: 805403808 Basic 
stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3263172 Data size: 809266656 Basic 
stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: bigint)
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: _col0 (type: bigint)
-                      Statistics: Num rows: 3247596 Data size: 805403808 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3263172 Data size: 809266656 Basic 
stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
         Reducer 15 
             Execution mode: vectorized, llap
@@ -509,25 +509,25 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col2, _col3, _col9, _col10
                 input vertices:
                   1 Map 12
-                Statistics: Num rows: 261773150 Data size: 55215206324 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 263404204 Data size: 55595556224 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
                   expressions: _col0 (type: bigint), if(_col9 is not null, 
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10 
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0) 
(type: decimal(7,2))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                  Statistics: Num rows: 261773150 Data size: 55215206324 Basic 
stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 263404204 Data size: 55595556224 Basic 
stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: sum(_col1), sum(_col2), sum(_col3), 
sum(_col4)
                     keys: _col0 (type: bigint)
                     minReductionHashAggr: 0.99
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                    Statistics: Num rows: 1694304 Data size: 420187392 Basic 
stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1709992 Data size: 424078016 Basic 
stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: bigint)
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: _col0 (type: bigint)
-                      Statistics: Num rows: 1694304 Data size: 420187392 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1709992 Data size: 424078016 Basic 
stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
         Reducer 22 
             Execution mode: vectorized, llap
@@ -541,12 +541,12 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col2, _col3, _col9, _col10
                 input vertices:
                   1 Map 27
-                Statistics: Num rows: 695952488 Data size: 69405573924 Basic 
stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 699823225 Data size: 69593030336 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
                   expressions: _col0 (type: bigint), if(_col9 is not null, 
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10 
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0) 
(type: decimal(7,2))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                  Statistics: Num rows: 695952488 Data size: 69405573924 Basic 
stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 699823225 Data size: 69593030336 Basic 
stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: sum(_col1), sum(_col2), sum(_col3), 
sum(_col4)
                     keys: _col0 (type: bigint)
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
index e5d8b0b18f5..8647db58faa 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
@@ -21,6 +21,8 @@
 import java.nio.ByteBuffer;
 import java.math.BigDecimal;
 import java.math.BigInteger;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.metastore.api.Decimal;
 
 /**
@@ -28,12 +30,6 @@
  */
 public class DecimalUtils {
 
-  public static Decimal getDecimal(int number, int scale) {
-    ByteBuffer bb = ByteBuffer.allocate(4);
-    bb.asIntBuffer().put(number);
-    return new Decimal((short) scale, bb);
-  }
-
   public static Decimal getDecimal(ByteBuffer unscaled, short scale) {
     return new Decimal((short) scale, unscaled);
   }
@@ -46,4 +42,8 @@ public static Decimal createThriftDecimal(String s) {
   public static String createJdoDecimalString(Decimal d) {
     return new BigDecimal(new BigInteger(d.getUnscaled()), 
d.getScale()).toString();
   }
+  
+  public static HiveDecimal getHiveDecimal(Decimal decimal) {
+    return HiveDecimal.create(new BigInteger(decimal.getUnscaled()), 
decimal.getScale());
+  }
 }
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 523f848ba44..c47448c8f7d 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -21,15 +21,16 @@
 
 import org.apache.hadoop.hive.common.histogram.KllHistogramEstimator;
 import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils;
 import 
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
 
 import com.google.common.base.MoreObjects;
 
 import static 
org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats;
 
-import org.apache.commons.lang3.ObjectUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -82,7 +83,7 @@ public Decimal getHighValue(DecimalColumnStatsDataInspector 
data) {
   @Override
   public Decimal mergeLowValue(Decimal oldValue, Decimal newValue) {
     if (oldValue != null && newValue != null) {
-      return ObjectUtils.min(oldValue, newValue);
+      return compareDecimals(oldValue, newValue) < 0 ? oldValue : newValue;
     }
     if (oldValue != null || newValue != null) {
       return MoreObjects.firstNonNull(oldValue, newValue);
@@ -93,11 +94,23 @@ public Decimal mergeLowValue(Decimal oldValue, Decimal 
newValue) {
   @Override
   public Decimal mergeHighValue(Decimal oldValue, Decimal newValue) {
     if (oldValue != null && newValue != null) {
-      return ObjectUtils.max(oldValue, newValue);
+      return compareDecimals(oldValue, newValue) < 0 ? newValue : oldValue;
     }
     if (oldValue != null || newValue != null) {
       return MoreObjects.firstNonNull(oldValue, newValue);
     }
     return null;
   }
+
+  /**
+   * Compare two decimals.
+   * @param decimal1 a non-null decimal
+   * @param decimal2 a non-null decimal
+   * @return see {@link java.util.Comparator#compare(Object, Object)}
+   */
+  private int compareDecimals(Decimal decimal1, Decimal decimal2) {
+    HiveDecimal d1 = DecimalUtils.getHiveDecimal(decimal1);
+    HiveDecimal d2 = DecimalUtils.getHiveDecimal(decimal2);
+    return d1.compareTo(d2);
+  }
 }
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
index 84fee0c8fd8..92eb19899ea 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
@@ -68,6 +68,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.metastore.ColumnType;
 import org.apache.hadoop.hive.metastore.ExceptionHandler;
 import org.apache.hadoop.hive.metastore.HiveMetaStore;
@@ -252,6 +253,15 @@ public static double decimalToDouble(Decimal decimal) {
     return new BigDecimal(new BigInteger(decimal.getUnscaled()), 
decimal.getScale()).doubleValue();
   }
 
+  public static String decimalToString(Decimal val) {
+    if (val == null) {
+      return "";
+    }
+
+    HiveDecimal result = HiveDecimal.create(new BigInteger(val.getUnscaled()), 
val.getScale());
+    return (result != null) ? result.toString() : "";
+  }
+
   private static Pattern getPartitionValidationRegex(Configuration conf) {
     return Optional.ofNullable(
             MetastoreConf.getVar(conf, 
MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN))
diff --git 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
index 7e19cbfcad3..90e7bbf811a 100644
--- 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
+++ 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
@@ -23,12 +23,15 @@
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils;
 import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder;
 import 
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import java.nio.ByteBuffer;
+import java.util.Objects;
+
 import static 
org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -37,27 +40,34 @@
 
 @Category(MetastoreUnitTest.class)
 public class DecimalColumnStatsMergerTest {
-
-  private static final Decimal DECIMAL_1 = DecimalUtils.getDecimal(1, 0);
-  private static final Decimal DECIMAL_3 = DecimalUtils.getDecimal(3, 0);
-  private static final Decimal DECIMAL_5 = DecimalUtils.getDecimal(5, 0);
-  private static final Decimal DECIMAL_20 = DecimalUtils.getDecimal(2, 1);
+  private static final Decimal DECIMAL_1 = getDecimal("1", 1, 0);
+  private static final Decimal DECIMAL_3 = getDecimal("3", 3, 0);
+  private static final Decimal DECIMAL_5 = getDecimal("5", 5, 0);
+  private static final Decimal DECIMAL_20 = getDecimal("20", 2, -1);
 
   private static final DecimalColumnStatsDataInspector DATA_3 = new 
DecimalColumnStatsDataInspector();
   private static final DecimalColumnStatsDataInspector DATA_5 = new 
DecimalColumnStatsDataInspector();
-  private static final DecimalColumnStatsDataInspector DATA_20 = new 
DecimalColumnStatsDataInspector();
 
   static {
     DATA_3.setLowValue(DECIMAL_3);
     DATA_3.setHighValue(DECIMAL_3);
     DATA_5.setLowValue(DECIMAL_5);
     DATA_5.setHighValue(DECIMAL_5);
-    DATA_20.setLowValue(DECIMAL_20);
-    DATA_20.setHighValue(DECIMAL_20);
   }
 
   private final DecimalColumnStatsMerger merger = new 
DecimalColumnStatsMerger();
 
+  /**
+   * Creates a decimal and checks its string representation.
+   */
+  private static Decimal getDecimal(String expected, int number, int scale) {
+    ByteBuffer bb = ByteBuffer.allocate(4);
+    bb.asIntBuffer().put(number);
+    Decimal d = new Decimal((short) scale, bb);
+    assertEquals(expected, MetaStoreServerUtils.decimalToString(d));
+    return d;
+  }
+
   @Test
   public void testMergeNullValues() {
     ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new 
ColStatsBuilder<>(Decimal.class)
@@ -180,47 +190,53 @@ public void testMergeNonNullValues() {
 
   @Test
   public void testDecimalCompareEqual() {
-    assertTrue(DECIMAL_3.equals(DECIMAL_3));
+    assertTrue(DECIMAL_3.equals(getDecimal("3", 3, 0)));
+    // the equals method does not check for numerical equality,
+    // e.g., DECIMAL_3 is not equal to getDecimal("3", 30, 1)
   }
 
   @Test
   public void testDecimalCompareDoesntEqual() {
     assertFalse(DECIMAL_3.equals(DECIMAL_5));
+    assertFalse(DECIMAL_3.equals(getDecimal("30", 3, -1)));
   }
 
-  @Test
-  public void testCompareSimple() {
-    DecimalColumnStatsDataInspector data1 = new 
DecimalColumnStatsDataInspector(DATA_3);
-    DecimalColumnStatsDataInspector data2 = new 
DecimalColumnStatsDataInspector(DATA_5);
-    assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1), 
merger.getHighValue(data2)));
-  }
+  private void checkMergedValue(Decimal low, Decimal high) {
+    Objects.requireNonNull(low);
+    Objects.requireNonNull(high);
+    assertTrue(MetaStoreServerUtils.decimalToDouble(low) < 
MetaStoreServerUtils.decimalToDouble(high));
 
-  @Test
-  public void testCompareSimpleFlipped() {
-    DecimalColumnStatsDataInspector data1 = new 
DecimalColumnStatsDataInspector(DATA_5);
-    DecimalColumnStatsDataInspector data2 = new 
DecimalColumnStatsDataInspector(DATA_3);
-    assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1), 
merger.getHighValue(data2)));
+    assertEquals(low, merger.mergeLowValue(low, high));
+    assertEquals(low, merger.mergeLowValue(high, low));
+    assertEquals(high, merger.mergeHighValue(low, high));
+    assertEquals(high, merger.mergeHighValue(high, low));
   }
 
   @Test
-  public void testCompareSimpleReversed() {
-    DecimalColumnStatsDataInspector data1 = new 
DecimalColumnStatsDataInspector(DATA_3);
-    DecimalColumnStatsDataInspector data2 = new 
DecimalColumnStatsDataInspector(DATA_5);
-    assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), 
merger.getLowValue(data2)));
+  public void testCompareSimple() {
+    checkMergedValue(DECIMAL_3, DECIMAL_5);
   }
 
   @Test
-  public void testCompareSimpleFlippedReversed() {
-    DecimalColumnStatsDataInspector data1 = new 
DecimalColumnStatsDataInspector(DATA_5);
-    DecimalColumnStatsDataInspector data2 = new 
DecimalColumnStatsDataInspector(DATA_3);
-    assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), 
merger.getLowValue(data2)));
+  public void testCompareUnscaledValue() {
+    checkMergedValue(DECIMAL_3, DECIMAL_20);
   }
 
   @Test
-  public void testCompareUnscaledValue() {
-    DecimalColumnStatsDataInspector data1 = new 
DecimalColumnStatsDataInspector(DATA_3);
-    DecimalColumnStatsDataInspector data2 = new 
DecimalColumnStatsDataInspector(DATA_20);
-    assertEquals(DECIMAL_20, merger.mergeHighValue(merger.getHighValue(data1), 
merger.getHighValue(data2)));
+  public void testCompareScaledValue() {
+    checkMergedValue(
+        getDecimal("-123.2", -1232, 1),
+        getDecimal("-10.2", -102, 1));
+
+    checkMergedValue(
+        getDecimal("1.02", 102, 2),
+        getDecimal("123.2", 1232, 1)
+    );
+
+    checkMergedValue(
+        getDecimal("1.02", 102, 2),
+        getDecimal("1232000", 1232, -3)
+    );
   }
 
   @Test

Reply via email to