This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new aa800efbdb7 HIVE-29275: Stats autogather calculates the min statistic
incorrectly (#6194)
aa800efbdb7 is described below
commit aa800efbdb7f1aacfd382ae28123e3ec9db5ed89
Author: Thomas Rebele <[email protected]>
AuthorDate: Tue Nov 25 09:01:39 2025 +0100
HIVE-29275: Stats autogather calculates the min statistic incorrectly
(#6194)
---
.../org/apache/hadoop/hive/ql/ddl/ShowUtils.java | 9 +--
.../llap/llap_decimal64_reader.q.out | 28 ++++----
.../clientpositive/llap/stats_histogram.q.out | 2 +-
.../clientpositive/llap/stats_histogram_null.q.out | 2 +-
.../perf/tpcds30tb/tez/query28.q.out | 60 ++++++++--------
.../perf/tpcds30tb/tez/query48.q.out | 16 ++---
.../perf/tpcds30tb/tez/query49.q.out | 62 ++++++++--------
.../hive/metastore/api/utils/DecimalUtils.java | 12 ++--
.../merge/DecimalColumnStatsMerger.java | 19 ++++-
.../hive/metastore/utils/MetaStoreServerUtils.java | 10 +++
.../merge/DecimalColumnStatsMergerTest.java | 82 +++++++++++++---------
11 files changed, 168 insertions(+), 134 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
index 386bfd97748..e96288369ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
@@ -43,6 +43,7 @@
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -54,7 +55,6 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
-import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.time.ZoneId;
import java.util.ArrayList;
@@ -233,12 +233,7 @@ public static String[] extractColumnValues(FieldSchema
column, boolean isColumnS
}
public static String convertToString(Decimal val) {
- if (val == null) {
- return "";
- }
-
- HiveDecimal result = HiveDecimal.create(new BigInteger(val.getUnscaled()),
val.getScale());
- return (result != null) ? result.toString() : "";
+ return MetaStoreServerUtils.decimalToString(val);
}
public static String
convertToString(org.apache.hadoop.hive.metastore.api.Date val) {
diff --git
a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
index a20cedd10dd..fcf8bf6892b 100644
--- a/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out
@@ -136,25 +136,25 @@ STAGE PLANS:
Statistics: Num rows: 24576 Data size: 5505024 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
- Statistics: Num rows: 12288 Data size: 2752512 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24576 Data size: 5505024 Basic
stats: COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: ++
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type:
decimal(38,5))
null sort order: zz
- Statistics: Num rows: 12288 Data size: 2752512 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24576 Data size: 5505024 Basic
stats: COMPLETE Column stats: COMPLETE
top n: 2
Group By Operator
keys: cdecimal1 (type: decimal(10,2)), cdecimal2
(type: decimal(38,5))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(10,2)), _col1
(type: decimal(38,5))
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type:
decimal(10,2)), _col1 (type: decimal(38,5))
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -179,13 +179,13 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type:
decimal(38,5))
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE
Column stats: COMPLETE
Limit
Number of rows: 2
- Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -243,25 +243,25 @@ STAGE PLANS:
Statistics: Num rows: 24576 Data size: 5505024 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
- Statistics: Num rows: 12288 Data size: 2752512 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24576 Data size: 5505024 Basic
stats: COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: ++
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type:
decimal(38,5))
null sort order: zz
- Statistics: Num rows: 12288 Data size: 2752512 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24576 Data size: 5505024 Basic
stats: COMPLETE Column stats: COMPLETE
top n: 2
Group By Operator
keys: cdecimal1 (type: decimal(10,2)), cdecimal2
(type: decimal(38,5))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(10,2)), _col1
(type: decimal(38,5))
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type:
decimal(10,2)), _col1 (type: decimal(38,5))
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -287,13 +287,13 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type:
decimal(38,5))
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE
Column stats: COMPLETE
Limit
Number of rows: 2
- Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 224 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 448 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
b/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
index 5ff94404746..439ebba09c5 100644
--- a/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
+++ b/ql/src/test/results/clientpositive/llap/stats_histogram.q.out
@@ -361,7 +361,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_stats
col_name e
data_type decimal(5,2)
-min -10.2
+min -123.2
max 12.2
num_nulls 1
distinct_count 11
diff --git a/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
b/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
index dbde9c67dae..07e48b5888a 100644
--- a/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out
@@ -436,7 +436,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_stats
col_name e
data_type decimal(5,2)
-min -12.3
+min -123.2
max 12.2
num_nulls 1
distinct_count 15
diff --git
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
index d0e8628d188..025630093e3 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query28.q.out
@@ -34,129 +34,129 @@ STAGE PLANS:
Statistics: Num rows: 86404891377 Data size: 28054250053192
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (ss_quantity BETWEEN 0 AND 5 and (ss_list_price
BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost
BETWEEN 14 AND 34)) (type: boolean)
- Statistics: Num rows: 1965380184 Data size: 638126687968
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1955303836 Data size: 634855063288
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 1965380184 Data size: 638126687968
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1955303836 Data size: 634855063288
Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 982690092 Data size:
223025312544 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 977651918 Data size:
221881879504 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 982690092 Data size:
223025312544 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 977651918 Data size:
221881879504 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Filter Operator
predicate: (ss_quantity BETWEEN 16 AND 20 and
(ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or
ss_wholesale_cost BETWEEN 80 AND 100)) (type: boolean)
- Statistics: Num rows: 2571445780 Data size: 834906239572
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2551512553 Data size: 828434247976
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 2571445780 Data size: 834906239572
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2551512553 Data size: 828434247976
Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1285722890 Data size:
291799776608 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1275756276 Data size:
289537815088 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 1285722890 Data size:
291799776608 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1275756276 Data size:
289537815088 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Filter Operator
predicate: (ss_quantity BETWEEN 21 AND 25 and
(ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or
ss_wholesale_cost BETWEEN 38 AND 58)) (type: boolean)
- Statistics: Num rows: 3432340414 Data size: 1114424597248
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3482601258 Data size: 1130743468324
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 3432340414 Data size:
1114424597248 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3482601258 Data size:
1130743468324 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1716170207 Data size:
389491457960 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1741300629 Data size:
395194904312 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 1716170207 Data size:
389491457960 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1741300629 Data size:
395194904312 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Filter Operator
predicate: (ss_quantity BETWEEN 26 AND 30 and
(ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or
ss_wholesale_cost BETWEEN 42 AND 62)) (type: boolean)
- Statistics: Num rows: 2913592254 Data size: 945995583960
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2894732905 Data size: 939872262804
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 2913592254 Data size: 945995583960
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2894732905 Data size: 939872262804
Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1456796127 Data size:
330625508552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1447366452 Data size:
328485407488 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 1456796127 Data size:
330625508552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1447366452 Data size:
328485407488 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Filter Operator
predicate: (ss_quantity BETWEEN 11 AND 15 and
(ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or
ss_wholesale_cost BETWEEN 4 AND 24)) (type: boolean)
- Statistics: Num rows: 2457715925 Data size: 797980022328
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2445115019 Data size: 793888714924
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 2457715925 Data size: 797980022328
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2445115019 Data size: 793888714924
Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1228857962 Data size:
278894061568 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1222557509 Data size:
277464149464 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 1228857962 Data size:
278894061568 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1222557509 Data size:
277464149464 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Filter Operator
predicate: (ss_quantity BETWEEN 6 AND 10 and
(ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or
ss_wholesale_cost BETWEEN 32 AND 52)) (type: boolean)
- Statistics: Num rows: 3193426694 Data size: 1036853233656
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3178448591 Data size: 1031990089344
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_list_price (type: decimal(7,2))
outputColumnNames: ss_list_price
- Statistics: Num rows: 3193426694 Data size:
1036853233656 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3178448591 Data size:
1031990089344 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(ss_list_price), count(ss_list_price)
keys: ss_list_price (type: decimal(7,2))
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1596713347 Data size:
362380262200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1589224295 Data size:
360680592952 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: decimal(7,2))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type:
decimal(7,2))
- Statistics: Num rows: 1596713347 Data size:
362380262200 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1589224295 Data size:
360680592952 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(17,2)),
_col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -168,7 +168,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1228857962 Data size: 278894061568 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1222557509 Data size: 277464149464 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
@@ -204,7 +204,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1596713347 Data size: 362380262200 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1589224295 Data size: 360680592952 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
@@ -240,7 +240,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 982690092 Data size: 223025312544 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 977651918 Data size: 221881879504 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
@@ -276,7 +276,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1285722890 Data size: 291799776608 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1275756276 Data size: 289537815088 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
@@ -312,7 +312,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1716170207 Data size: 389491457960 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1741300629 Data size: 395194904312 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
@@ -394,7 +394,7 @@ STAGE PLANS:
keys: KEY._col0 (type: decimal(7,2))
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1456796127 Data size: 330625508552 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1447366452 Data size: 328485407488 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), count(_col2), count(_col0)
mode: partial2
diff --git
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
index 59b49bfa8fc..bd3b9e37e5a 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query48.q.out
@@ -16,15 +16,15 @@ STAGE PLANS:
TableScan
alias: store_sales
filterExpr: (ss_sales_price BETWEEN 50 AND 200 and
ss_net_profit is not null and ss_cdemo_sk is not null and ss_addr_sk is not
null and ss_store_sk is not null) (type: boolean)
- probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_66_container,
bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:3.323733066508898E-4
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_66_container,
bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:3.323843839779123E-4
Statistics: Num rows: 82510879939 Data size: 20962809999708
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (ss_sales_price BETWEEN 50 AND 200 and
ss_net_profit is not null and ss_cdemo_sk is not null and ss_addr_sk is not
null and ss_store_sk is not null) (type: boolean)
- Statistics: Num rows: 56248293349 Data size:
14290506744864 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 56250168542 Data size:
14290983158452 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_cdemo_sk (type: bigint), ss_addr_sk
(type: bigint), ss_quantity (type: int), ss_sold_date_sk (type: bigint),
ss_net_profit BETWEEN 0 AND 2000 (type: boolean), ss_net_profit BETWEEN 150 AND
3000 (type: boolean), ss_net_profit BETWEEN 50 AND 25000 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6
- Statistics: Num rows: 56248293349 Data size:
2223391490876 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 56250168542 Data size:
2223465613804 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -34,7 +34,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4, _col5,
_col6
input vertices:
1 Map 3
- Statistics: Num rows: 11304950271 Data size:
335218165588 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 11305327153 Data size:
335229341020 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -44,7 +44,7 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col4, _col5, _col6
input vertices:
1 Map 4
- Statistics: Num rows: 322998581 Data size:
3875982984 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 323009350 Data size:
3876112212 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -54,14 +54,14 @@ STAGE PLANS:
outputColumnNames: _col2, _col4, _col5, _col6,
_col10, _col11, _col12
input vertices:
1 Map 5
- Statistics: Num rows: 27424414 Data size:
658185940 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 27425328 Data size:
658207876 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((_col10 and _col4) or (_col11 and
_col5) or (_col12 and _col6)) (type: boolean)
- Statistics: Num rows: 20568309 Data size:
493639420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20568996 Data size:
493655908 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: int)
outputColumnNames: _col2
- Statistics: Num rows: 20568309 Data size:
493639420 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20568996 Data size:
493655908 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col2)
minReductionHashAggr: 0.99
diff --git
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
index 98ae7182489..276726ad496 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query49.q.out
@@ -41,11 +41,11 @@ STAGE PLANS:
Statistics: Num rows: 21594638446 Data size: 5441536184068
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((ws_quantity > 0) and (ws_net_profit > 1) and
(ws_net_paid > 0)) (type: boolean)
- Statistics: Num rows: 14321294654 Data size: 3608758871252
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14390903321 Data size: 3626299247340
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ws_item_sk (type: bigint), ws_order_number
(type: bigint), ws_quantity (type: int), ws_net_paid (type: decimal(7,2)),
ws_sold_date_sk (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col5
- Statistics: Num rows: 14321294654 Data size:
2004773870004 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14390903321 Data size:
2014518075388 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -55,18 +55,18 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 19
- Statistics: Num rows: 243129259 Data size: 31885680632
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 244310989 Data size: 32040660996
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type:
bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: bigint)
- Statistics: Num rows: 243129259 Data size:
31885680632 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 244310989 Data size:
32040660996 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Select Operator
expressions: _col0 (type: bigint), _col1 (type:
bigint), hash(_col0,_col1) (type: int)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 243129259 Data size:
4862585180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 244310989 Data size:
4886219780 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(_col0), max(_col0), min(_col1),
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
minReductionHashAggr: 0.99
@@ -88,17 +88,17 @@ STAGE PLANS:
Statistics: Num rows: 2160007345 Data size: 273845125140
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((wr_return_amt > 10000) and wr_item_sk BETWEEN
DynamicValue(RS[225]_col0) AND DynamicValue(RS[225]_col1) and wr_order_number
BETWEEN DynamicValue(RS[225]_col2) AND DynamicValue(RS[225]_col3) and
in_bloom_filter(hash(wr_item_sk,wr_order_number), DynamicValue(RS[225]_col4)))
(type: boolean)
- Statistics: Num rows: 1418116903 Data size: 179788463076
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1420050734 Data size: 180033633704
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: wr_item_sk (type: bigint), wr_order_number
(type: bigint), wr_return_quantity (type: int), wr_return_amt (type:
decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1418116903 Data size: 179788463076
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1420050734 Data size: 180033633704
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type:
bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: bigint)
- Statistics: Num rows: 1418116903 Data size:
179788463076 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1420050734 Data size:
180033633704 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -110,11 +110,11 @@ STAGE PLANS:
Statistics: Num rows: 43005109025 Data size: 10824794628716
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((cs_quantity > 0) and (cs_net_profit > 1) and
(cs_net_paid > 0)) (type: boolean)
- Statistics: Num rows: 28554178173 Data size: 7187358002848
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 28650456622 Data size: 7211592203468
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cs_item_sk (type: bigint), cs_order_number
(type: bigint), cs_quantity (type: int), cs_net_paid (type: decimal(7,2)),
cs_sold_date_sk (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col5
- Statistics: Num rows: 28554178173 Data size:
3989290047472 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 28650456622 Data size:
4002741061804 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -124,18 +124,18 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 19
- Statistics: Num rows: 481330829 Data size: 55240772680
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 482953772 Data size: 55427032628
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type:
bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: bigint)
- Statistics: Num rows: 481330829 Data size:
55240772680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 482953772 Data size:
55427032628 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Select Operator
expressions: _col0 (type: bigint), _col1 (type:
bigint), hash(_col0,_col1) (type: int)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 481330829 Data size:
9626616580 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 482953772 Data size:
9659075440 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(_col0), max(_col0), min(_col1),
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
minReductionHashAggr: 0.99
@@ -260,11 +260,11 @@ STAGE PLANS:
Statistics: Num rows: 82510879939 Data size: 20349734757316
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((ss_quantity > 0) and (ss_net_profit > 1) and
(ss_net_paid > 0)) (type: boolean)
- Statistics: Num rows: 40994410513 Data size:
10110489442160 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 41222412506 Data size:
10166721784872 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_item_sk (type: bigint), ss_ticket_number
(type: bigint), ss_quantity (type: int), ss_net_paid (type: decimal(7,2)),
ss_sold_date_sk (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col5
- Statistics: Num rows: 40994410513 Data size:
5627405668656 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 41222412506 Data size:
5658704074760 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -274,18 +274,18 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 19
- Statistics: Num rows: 695952488 Data size: 11135239924
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 699823225 Data size: 11197171716
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type:
bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: bigint)
- Statistics: Num rows: 695952488 Data size:
11135239924 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 699823225 Data size:
11197171716 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Select Operator
expressions: _col0 (type: bigint), _col1 (type:
bigint), hash(_col0,_col1) (type: int)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 695952488 Data size:
13919049760 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 699823225 Data size:
13996464500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(_col0), max(_col0), min(_col1),
max(_col1), bloom_filter(_col3, expectedEntries=1000000)
minReductionHashAggr: 0.99
@@ -307,17 +307,17 @@ STAGE PLANS:
Statistics: Num rows: 8634166995 Data size: 1104703724476
Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((sr_return_amt > 10000) and sr_item_sk BETWEEN
DynamicValue(RS[245]_col0) AND DynamicValue(RS[245]_col1) and sr_ticket_number
BETWEEN DynamicValue(RS[245]_col2) AND DynamicValue(RS[245]_col3) and
in_bloom_filter(hash(sr_item_sk,sr_ticket_number), DynamicValue(RS[245]_col4)))
(type: boolean)
- Statistics: Num rows: 4166475379 Data size: 533082215324
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4238623038 Data size: 542313191336
Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: sr_item_sk (type: bigint), sr_ticket_number
(type: bigint), sr_return_quantity (type: int), sr_return_amt (type:
decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 4166475379 Data size: 533082215324
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4238623038 Data size: 542313191336
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type:
bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: bigint)
- Statistics: Num rows: 4166475379 Data size:
533082215324 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4238623038 Data size:
542313191336 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -363,25 +363,25 @@ STAGE PLANS:
outputColumnNames: _col0, _col2, _col3, _col9, _col10
input vertices:
1 Map 20
- Statistics: Num rows: 506324466 Data size: 106684518508 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 508031682 Data size: 107066281820 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
expressions: _col0 (type: bigint), if(_col9 is not null,
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0)
(type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 506324466 Data size: 106684518508
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 508031682 Data size: 107066281820
Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), sum(_col2), sum(_col3),
sum(_col4)
keys: _col0 (type: bigint)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 3247596 Data size: 805403808 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3263172 Data size: 809266656 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 3247596 Data size: 805403808 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3263172 Data size: 809266656 Basic
stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
Reducer 15
Execution mode: vectorized, llap
@@ -509,25 +509,25 @@ STAGE PLANS:
outputColumnNames: _col0, _col2, _col3, _col9, _col10
input vertices:
1 Map 12
- Statistics: Num rows: 261773150 Data size: 55215206324 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 263404204 Data size: 55595556224 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
expressions: _col0 (type: bigint), if(_col9 is not null,
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0)
(type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 261773150 Data size: 55215206324 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 263404204 Data size: 55595556224 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), sum(_col2), sum(_col3),
sum(_col4)
keys: _col0 (type: bigint)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1694304 Data size: 420187392 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1709992 Data size: 424078016 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 1694304 Data size: 420187392 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1709992 Data size: 424078016 Basic
stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))
Reducer 22
Execution mode: vectorized, llap
@@ -541,12 +541,12 @@ STAGE PLANS:
outputColumnNames: _col0, _col2, _col3, _col9, _col10
input vertices:
1 Map 27
- Statistics: Num rows: 695952488 Data size: 69405573924 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 699823225 Data size: 69593030336 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
expressions: _col0 (type: bigint), if(_col9 is not null,
_col9, 0) (type: int), if(_col2 is not null, _col2, 0) (type: int), if(_col10
is not null, _col10, 0) (type: decimal(7,2)), if(_col3 is not null, _col3, 0)
(type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 695952488 Data size: 69405573924 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 699823225 Data size: 69593030336 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col1), sum(_col2), sum(_col3),
sum(_col4)
keys: _col0 (type: bigint)
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
index e5d8b0b18f5..8647db58faa 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/api/utils/DecimalUtils.java
@@ -21,6 +21,8 @@
import java.nio.ByteBuffer;
import java.math.BigDecimal;
import java.math.BigInteger;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.Decimal;
/**
@@ -28,12 +30,6 @@
*/
public class DecimalUtils {
- public static Decimal getDecimal(int number, int scale) {
- ByteBuffer bb = ByteBuffer.allocate(4);
- bb.asIntBuffer().put(number);
- return new Decimal((short) scale, bb);
- }
-
public static Decimal getDecimal(ByteBuffer unscaled, short scale) {
return new Decimal((short) scale, unscaled);
}
@@ -46,4 +42,8 @@ public static Decimal createThriftDecimal(String s) {
public static String createJdoDecimalString(Decimal d) {
return new BigDecimal(new BigInteger(d.getUnscaled()),
d.getScale()).toString();
}
+
+ public static HiveDecimal getHiveDecimal(Decimal decimal) {
+ return HiveDecimal.create(new BigInteger(decimal.getUnscaled()),
decimal.getScale());
+ }
}
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
index 523f848ba44..c47448c8f7d 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java
@@ -21,15 +21,16 @@
import org.apache.hadoop.hive.common.histogram.KllHistogramEstimator;
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils;
import
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
import com.google.common.base.MoreObjects;
import static
org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats;
-import org.apache.commons.lang3.ObjectUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -82,7 +83,7 @@ public Decimal getHighValue(DecimalColumnStatsDataInspector
data) {
@Override
public Decimal mergeLowValue(Decimal oldValue, Decimal newValue) {
if (oldValue != null && newValue != null) {
- return ObjectUtils.min(oldValue, newValue);
+ return compareDecimals(oldValue, newValue) < 0 ? oldValue : newValue;
}
if (oldValue != null || newValue != null) {
return MoreObjects.firstNonNull(oldValue, newValue);
@@ -93,11 +94,23 @@ public Decimal mergeLowValue(Decimal oldValue, Decimal
newValue) {
@Override
public Decimal mergeHighValue(Decimal oldValue, Decimal newValue) {
if (oldValue != null && newValue != null) {
- return ObjectUtils.max(oldValue, newValue);
+ return compareDecimals(oldValue, newValue) < 0 ? newValue : oldValue;
}
if (oldValue != null || newValue != null) {
return MoreObjects.firstNonNull(oldValue, newValue);
}
return null;
}
+
+ /**
+ * Compare two decimals.
+ * @param decimal1 a non-null decimal
+ * @param decimal2 a non-null decimal
+ * @return see {@link java.util.Comparator#compare(Object, Object)}
+ */
+ private int compareDecimals(Decimal decimal1, Decimal decimal2) {
+ HiveDecimal d1 = DecimalUtils.getHiveDecimal(decimal1);
+ HiveDecimal d2 = DecimalUtils.getHiveDecimal(decimal2);
+ return d1.compareTo(d2);
+ }
}
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
index 84fee0c8fd8..92eb19899ea 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
@@ -68,6 +68,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.ColumnType;
import org.apache.hadoop.hive.metastore.ExceptionHandler;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
@@ -252,6 +253,15 @@ public static double decimalToDouble(Decimal decimal) {
return new BigDecimal(new BigInteger(decimal.getUnscaled()),
decimal.getScale()).doubleValue();
}
+ public static String decimalToString(Decimal val) {
+ if (val == null) {
+ return "";
+ }
+
+ HiveDecimal result = HiveDecimal.create(new BigInteger(val.getUnscaled()),
val.getScale());
+ return (result != null) ? result.toString() : "";
+ }
+
private static Pattern getPartitionValidationRegex(Configuration conf) {
return Optional.ofNullable(
MetastoreConf.getVar(conf,
MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN))
diff --git
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
index 7e19cbfcad3..90e7bbf811a 100644
---
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
+++
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java
@@ -23,12 +23,15 @@
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Decimal;
-import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils;
import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder;
import
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+
import static
org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -37,27 +40,34 @@
@Category(MetastoreUnitTest.class)
public class DecimalColumnStatsMergerTest {
-
- private static final Decimal DECIMAL_1 = DecimalUtils.getDecimal(1, 0);
- private static final Decimal DECIMAL_3 = DecimalUtils.getDecimal(3, 0);
- private static final Decimal DECIMAL_5 = DecimalUtils.getDecimal(5, 0);
- private static final Decimal DECIMAL_20 = DecimalUtils.getDecimal(2, 1);
+ private static final Decimal DECIMAL_1 = getDecimal("1", 1, 0);
+ private static final Decimal DECIMAL_3 = getDecimal("3", 3, 0);
+ private static final Decimal DECIMAL_5 = getDecimal("5", 5, 0);
+ private static final Decimal DECIMAL_20 = getDecimal("20", 2, -1);
private static final DecimalColumnStatsDataInspector DATA_3 = new
DecimalColumnStatsDataInspector();
private static final DecimalColumnStatsDataInspector DATA_5 = new
DecimalColumnStatsDataInspector();
- private static final DecimalColumnStatsDataInspector DATA_20 = new
DecimalColumnStatsDataInspector();
static {
DATA_3.setLowValue(DECIMAL_3);
DATA_3.setHighValue(DECIMAL_3);
DATA_5.setLowValue(DECIMAL_5);
DATA_5.setHighValue(DECIMAL_5);
- DATA_20.setLowValue(DECIMAL_20);
- DATA_20.setHighValue(DECIMAL_20);
}
private final DecimalColumnStatsMerger merger = new
DecimalColumnStatsMerger();
+ /**
+ * Creates a decimal and checks its string representation.
+ */
+ private static Decimal getDecimal(String expected, int number, int scale) {
+ ByteBuffer bb = ByteBuffer.allocate(4);
+ bb.asIntBuffer().put(number);
+ Decimal d = new Decimal((short) scale, bb);
+ assertEquals(expected, MetaStoreServerUtils.decimalToString(d));
+ return d;
+ }
+
@Test
public void testMergeNullValues() {
ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new
ColStatsBuilder<>(Decimal.class)
@@ -180,47 +190,53 @@ public void testMergeNonNullValues() {
@Test
public void testDecimalCompareEqual() {
- assertTrue(DECIMAL_3.equals(DECIMAL_3));
+ assertTrue(DECIMAL_3.equals(getDecimal("3", 3, 0)));
+ // the equals method does not check for numerical equality,
+ // e.g., DECIMAL_3 is not equal to getDecimal("3", 30, 1)
}
@Test
public void testDecimalCompareDoesntEqual() {
assertFalse(DECIMAL_3.equals(DECIMAL_5));
+ assertFalse(DECIMAL_3.equals(getDecimal("30", 3, -1)));
}
- @Test
- public void testCompareSimple() {
- DecimalColumnStatsDataInspector data1 = new
DecimalColumnStatsDataInspector(DATA_3);
- DecimalColumnStatsDataInspector data2 = new
DecimalColumnStatsDataInspector(DATA_5);
- assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1),
merger.getHighValue(data2)));
- }
+ private void checkMergedValue(Decimal low, Decimal high) {
+ Objects.requireNonNull(low);
+ Objects.requireNonNull(high);
+ assertTrue(MetaStoreServerUtils.decimalToDouble(low) <
MetaStoreServerUtils.decimalToDouble(high));
- @Test
- public void testCompareSimpleFlipped() {
- DecimalColumnStatsDataInspector data1 = new
DecimalColumnStatsDataInspector(DATA_5);
- DecimalColumnStatsDataInspector data2 = new
DecimalColumnStatsDataInspector(DATA_3);
- assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1),
merger.getHighValue(data2)));
+ assertEquals(low, merger.mergeLowValue(low, high));
+ assertEquals(low, merger.mergeLowValue(high, low));
+ assertEquals(high, merger.mergeHighValue(low, high));
+ assertEquals(high, merger.mergeHighValue(high, low));
}
@Test
- public void testCompareSimpleReversed() {
- DecimalColumnStatsDataInspector data1 = new
DecimalColumnStatsDataInspector(DATA_3);
- DecimalColumnStatsDataInspector data2 = new
DecimalColumnStatsDataInspector(DATA_5);
- assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1),
merger.getLowValue(data2)));
+ public void testCompareSimple() {
+ checkMergedValue(DECIMAL_3, DECIMAL_5);
}
@Test
- public void testCompareSimpleFlippedReversed() {
- DecimalColumnStatsDataInspector data1 = new
DecimalColumnStatsDataInspector(DATA_5);
- DecimalColumnStatsDataInspector data2 = new
DecimalColumnStatsDataInspector(DATA_3);
- assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1),
merger.getLowValue(data2)));
+ public void testCompareUnscaledValue() {
+ checkMergedValue(DECIMAL_3, DECIMAL_20);
}
@Test
- public void testCompareUnscaledValue() {
- DecimalColumnStatsDataInspector data1 = new
DecimalColumnStatsDataInspector(DATA_3);
- DecimalColumnStatsDataInspector data2 = new
DecimalColumnStatsDataInspector(DATA_20);
- assertEquals(DECIMAL_20, merger.mergeHighValue(merger.getHighValue(data1),
merger.getHighValue(data2)));
+ public void testCompareScaledValue() {
+ checkMergedValue(
+ getDecimal("-123.2", -1232, 1),
+ getDecimal("-10.2", -102, 1));
+
+ checkMergedValue(
+ getDecimal("1.02", 102, 2),
+ getDecimal("123.2", 1232, 1)
+ );
+
+ checkMergedValue(
+ getDecimal("1.02", 102, 2),
+ getDecimal("1232000", 1232, -3)
+ );
}
@Test