This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8572c12 HIVE-25805: Wrong result when rebuilding MV with count(col)
incrementally (Krisztian Kasa, reviewed by Zoltan Haindrich)
8572c12 is described below
commit 8572c1201e1d483eb03c7e413f4ff7f9b6f4a3d2
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Dec 21 14:37:34 2021 +0100
HIVE-25805: Wrong result when rebuilding MV with count(col) incrementally
(Krisztian Kasa, reviewed by Zoltan Haindrich)
---
...regateInsertDeleteIncrementalRewritingRule.java | 18 +-
.../materialized_view_create_rewrite_6.q | 14 +-
.../llap/materialized_view_create_rewrite_6.q.out | 208 ++++++++++-----------
3 files changed, 118 insertions(+), 122 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
index d4d4bdd..601cbfc 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
@@ -139,7 +139,14 @@ public class
HiveAggregateInsertDeleteIncrementalRewritingRule extends HiveAggre
switch (aggregateCall.getAggregation().getKind()) {
case COUNT:
aggFunction = SqlStdOperatorTable.SUM;
- argument = relBuilder.literal(1);
+
+ // count(*)
+ if (aggregateCall.getArgList().isEmpty()) {
+ argument = relBuilder.literal(1);
+ } else {
+ // count(<column_name>)
+ argument = genArgumentForCountColumn(relBuilder, aggInput,
aggregateCall.getArgList().get(0));
+ }
break;
case SUM:
aggFunction = SqlStdOperatorTable.SUM;
@@ -171,6 +178,15 @@ public class
HiveAggregateInsertDeleteIncrementalRewritingRule extends HiveAggre
countIdx);
}
+ private RexNode genArgumentForCountColumn(RelBuilder relBuilder, RelNode
aggInput, int argumentIdx) {
+ RexBuilder rexBuilder = relBuilder.getRexBuilder();
+ RexNode countArg = rexBuilder.makeInputRef(
+ aggInput.getRowType().getFieldList().get(argumentIdx).getType(),
argumentIdx);
+ RexNode isNull = rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
countArg);
+ return rexBuilder.makeCall(
+ SqlStdOperatorTable.CASE, isNull, relBuilder.literal(0),
relBuilder.literal(1));
+ }
+
/**
* Extend {@link
HiveAggregateIncrementalRewritingRuleBase.IncrementalComputePlan} with
countStarIndex. It is
* required for building the filter condition that distribute the rows to
the insert/update/delete branches.
diff --git
a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
index e7d3449..6024657 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
@@ -21,13 +21,13 @@ insert into t2(a, b) values ('sum0', 0);
-- Aggregate with count(*): incremental rebuild should be triggered even if
there were deletes from source table
create materialized view mat1 stored as orc TBLPROPERTIES
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a;
explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a;
@@ -54,7 +54,7 @@ delete from t1 where a like '%add/remove';
-- view can not be used
explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a;
@@ -68,16 +68,12 @@ alter materialized view mat1 rebuild;
-- the view should be up to date and used
explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a;
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a;
drop materialized view mat1;
-
-select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a;
diff --git
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
index 19f810b..f299bbd 100644
---
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
+++
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
@@ -86,7 +86,7 @@ POSTHOOK: Output: default@t2
POSTHOOK: Lineage: t2.a SCRIPT []
POSTHOOK: Lineage: t2.b SCRIPT []
PREHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
PREHOOK: type: CREATE_MATERIALIZED_VIEW
@@ -95,7 +95,7 @@ PREHOOK: Input: default@t2
PREHOOK: Output: database:default
PREHOOK: Output: default@mat1
POSTHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
@@ -104,10 +104,12 @@ POSTHOOK: Input: default@t2
POSTHOOK: Output: database:default
POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1._c1 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int,
comment:null), ]
-POSTHOOK: Lineage: mat1._c2 EXPRESSION [(t1)t1.null, (t2)t2.null, ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int,
comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int,
comment:null), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION [(t1)t1.null, (t2)t2.null, ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15),
comment:null), ]
PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
PREHOOK: type: QUERY
@@ -116,7 +118,7 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
POSTHOOK: type: QUERY
@@ -125,8 +127,7 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
CBO PLAN:
-HiveProject(a=[$0], _c1=[$1])
- HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
PREHOOK: query: delete from t1 where b = 1
PREHOOK: type: QUERY
@@ -202,7 +203,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Output: default@t1
PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
PREHOOK: type: QUERY
@@ -210,7 +211,7 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
POSTHOOK: type: QUERY
@@ -218,14 +219,15 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
CBO PLAN:
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
- HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
- HiveProject(a=[$0], b=[$1])
- HiveFilter(condition=[IS NOT NULL($0)])
- HiveTableScan(table=[[default, t1]], table:alias=[t1])
- HiveProject(a=[$0])
- HiveFilter(condition=[IS NOT NULL($0)])
- HiveTableScan(table=[[default, t2]], table:alias=[t2])
+HiveProject(a=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[/(CAST($1):DOUBLE, $2)],
_o__c4=[$3])
+ HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)],
agg#2=[count()])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(a=[$0], b=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(a=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
PREHOOK: query: explain cbo
alter materialized view mat1 rebuild
@@ -246,14 +248,14 @@ POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
CBO PLAN:
-HiveProject(a0=[$4], $f1=[CASE(IS NULL($1), $5, IS NULL($5), $1, +($5, $1))],
$f2=[CASE(IS NULL($2), $6, +($6, $2))])
- HiveFilter(condition=[OR(AND($3, OR(AND(IS NULL($2), =($6, 0)), AND(=(+($6,
$2), 0), IS NOT NULL($2)))), AND(IS NULL($3), OR(AND(IS NULL($2), >($6, 0)),
AND(>(+($6, $2), 0), IS NOT NULL($2)))), AND($3, OR(AND(IS NULL($2), >($6, 0)),
AND(>(+($6, $2), 0), IS NOT NULL($2)))))])
- HiveJoin(condition=[IS NOT DISTINCT FROM($0, $4)], joinType=[right],
algorithm=[none], cost=[not available])
- HiveProject(a=[$0], _c1=[$1], _c2=[$2], $f3=[true])
+HiveProject(a=[$5], _o__c1=[CASE(IS NULL($1), $6, IS NULL($6), $1, +($6,
$1))], _o__c2=[CASE(IS NULL($2), $7, +($7, $2))], _o__c3=[/(CAST(CASE(IS
NULL($1), $6, IS NULL($6), $1, +($6, $1))):DOUBLE, CASE(IS NULL($2), $7, +($7,
$2)))], _o__c4=[CASE(IS NULL($3), $8, +($8, $3))])
+ HiveFilter(condition=[OR(AND($4, OR(AND(IS NULL($3), =($8, 0)), AND(=(+($8,
$3), 0), IS NOT NULL($3)))), AND(IS NULL($4), OR(AND(IS NULL($3), >($8, 0)),
AND(>(+($8, $3), 0), IS NOT NULL($3)))), AND($4, OR(AND(IS NULL($3), >($8, 0)),
AND(>(+($8, $3), 0), IS NOT NULL($3)))))])
+ HiveJoin(condition=[IS NOT DISTINCT FROM($0, $5)], joinType=[right],
algorithm=[none], cost=[not available])
+ HiveProject(a=[$0], _c1=[$1], _c2=[$2], _c4=[$4], $f4=[true])
HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
- HiveProject(a=[$0], $f1=[$1], $f2=[$2])
- HiveAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[SUM($2)])
- HiveProject(a=[$0], $f3=[CASE(OR($2, $5), *(-1, $1), $1)],
$f4=[CASE(OR($2, $5), -1, 1)])
+ HiveProject(a=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+ HiveAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[SUM($2)],
agg#2=[SUM($3)])
+ HiveProject(a=[$0], $f3=[CASE(OR($2, $5), *(-1, $1), $1)],
$f4=[CASE(OR($2, $5), *(-1, CASE(IS NULL($1), 0, 1)), CASE(IS NULL($1), 0,
1))], $f5=[CASE(OR($2, $5), -1, 1)])
HiveJoin(condition=[AND(=($0, $4), OR($3, $6))], joinType=[inner],
algorithm=[none], cost=[not available])
HiveProject(a=[$0], b=[$1], ROW__IS__DELETED=[$6], <=[<(3,
$5.writeid)])
HiveFilter(condition=[IS NOT NULL($0)])
@@ -308,18 +310,18 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: default.mat1
- Statistics: Num rows: 5 Data size: 532 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 572 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: a (type: char(15)), _c1 (type: bigint), _c2
(type: bigint), true (type: boolean), ROW__ID (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 5 Data size: 932 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: a (type: char(15)), _c1 (type: bigint), _c2
(type: bigint), _c4 (type: bigint), true (type: boolean), ROW__ID (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 972 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 5 Data size: 932 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: boolean), _col4 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 5 Data size: 972 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: bigint), _col4 (type: boolean), _col5 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
@@ -380,13 +382,13 @@ STAGE PLANS:
0 _col0 (type: char(15))
1 _col0 (type: char(15))
nullSafes: [true]
- outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6,
_col7
- Statistics: Num rows: 5 Data size: 1017 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9
+ Statistics: Num rows: 5 Data size: 1097 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: (_col3 and ((_col2 is null and (_col7 = 0L)) or
(((_col7 + _col2) = 0) and _col2 is not null))) (type: boolean)
- Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: (_col4 and ((_col3 is null and (_col9 = 0L)) or
(((_col9 + _col3) = 0) and _col3 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: _col4 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ expressions: _col5 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 76 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
@@ -396,15 +398,15 @@ STAGE PLANS:
Map-reduce partition columns: UDFToInteger(_col0) (type:
int)
Statistics: Num rows: 1 Data size: 76 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col3 is null and ((_col2 is null and (_col7 >
0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
- Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: (_col4 is null and ((_col3 is null and (_col9 >
0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: _col5 (type: char(15)), CASE WHEN (_col1 is
null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END
(type: bigint), CASE WHEN (_col2 is null) THEN (_col7) ELSE ((_col7 + _col2))
END (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 109 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: _col6 (type: char(15)), CASE WHEN (_col1 is
null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END
(type: bigint), CASE WHEN (_col2 is null) THEN (_col8) ELSE ((_col8 + _col2))
END (type: bigint), (UDFToDouble(CASE WHEN (_col1 is null) THEN (_col7) WHEN
(_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END) / CASE WHEN (_col2 is
null) THEN (_col8) ELSE ((_col8 + _col2)) END) (type: double), CASE WHEN (_col3
is null) THEN (_col9) [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 125 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 109 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 125 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format:
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -412,34 +414,34 @@ STAGE PLANS:
name: default.mat1
Write Type: INSERT
Select Operator
- expressions: _col0 (type: char(15)), _col1 (type:
bigint), _col2 (type: bigint)
- outputColumnNames: a, _c1, _c2
- Statistics: Num rows: 1 Data size: 109 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: _col0 (type: char(15)), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint)
+ outputColumnNames: a, _c1, _c2, _c3, _c4
+ Statistics: Num rows: 1 Data size: 125 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: max(length(a)),
avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a),
min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2),
max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+ aggregations: max(length(a)),
avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a),
min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2),
max(_c2), count(_c2), compute_bit_vector_hll(_c2), min(_c3), max(_c3),
count(_c3), compute_bit_vector_hll(_c3), min(_c4), max(_c4), count(_c4),
compute_bit_vector_hll(_c4)
minReductionHashAggr: 0.4
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14,
_col15, _col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 912 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 1 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type:
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type:
bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint),
_col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type:
bigint), _col11 (type: bigint), _col12 (type: binary)
+ Statistics: Num rows: 1 Data size: 912 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type:
bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint),
_col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type:
bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double),
_col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17
(type: bigint), _col18 (typ [...]
Filter Operator
- predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or
(((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
- Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or
(((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: _col4 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: char(15)), CASE
WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE
((_col6 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col7)
ELSE ((_col7 + _col2)) END (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 185 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: _col5 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col6 (type: char(15)), CASE
WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE
((_col7 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col8)
ELSE ((_col8 + _col2)) END (type: bigint), (UDFToDouble(CASE WHEN (_col1 is
null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1))
END) / CASE WHEN (_col2 is null) THEN (_col8) ELSE ((_col8 + _co [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 201 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>)
null sort order: a
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type:
int)
- Statistics: Num rows: 1 Data size: 185 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: char(15)), _col2 (type:
bigint), _col3 (type: bigint)
+ Statistics: Num rows: 1 Data size: 201 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: char(15)), _col2 (type:
bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -460,17 +462,17 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), avg(VALUE._col1),
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4),
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7),
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10),
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ aggregations: max(VALUE._col0), avg(VALUE._col1),
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4),
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7),
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10),
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13),
max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16),
min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 844 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: 'STRING' (type: string),
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double),
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0)
(type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type:
bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary),
'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17
- Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE
Column stats: COMPLETE
+ expressions: 'STRING' (type: string),
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double),
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0)
(type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type:
bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary),
'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25,
_col26, _col27, _col28, _col29
+ Statistics: Num rows: 1 Data size: 1324 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 794 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1324 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -479,12 +481,12 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type:
char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE
Column stats: COMPLETE
+ expressions: KEY.reducesinkkey0 (type:
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type:
char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3
(type: double), VALUE._col4 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE
Column stats: COMPLETE
table:
input format:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format:
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -504,39 +506,39 @@ STAGE PLANS:
residual filter predicates: {(_col3 or _col6)}
Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: char(15)), CASE WHEN ((_col2 or
_col5)) THEN ((-1 * _col1)) ELSE (_col1) END (type: int), CASE WHEN ((_col2 or
_col5)) THEN (-1) ELSE (1) END (type: int)
- outputColumnNames: _col0, _col1, _col2
+ expressions: _col0 (type: char(15)), CASE WHEN ((_col2 or
_col5)) THEN ((-1 * _col1)) ELSE (_col1) END (type: int), CASE WHEN ((_col2 or
_col5)) THEN ((-1 * CASE WHEN (_col1 is null) THEN (0) ELSE (1) END)) ELSE
(CASE WHEN (_col1 is null) THEN (0) ELSE (1) END) END (type: int), CASE WHEN
((_col2 or _col5)) THEN (-1) ELSE (1) END (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col1), sum(_col2)
+ aggregations: sum(_col1), sum(_col2), sum(_col3)
keys: _col0 (type: char(15))
minReductionHashAggr: 0.4
mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 545 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 585 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 5 Data size: 545 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type:
bigint)
+ Statistics: Num rows: 5 Data size: 585 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: bigint)
Reducer 8
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2)
keys: KEY._col0 (type: char(15))
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE
Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: bigint)
Stage: Stage-4
Dependency Collection
@@ -591,8 +593,8 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
Column Stats Desc:
- Columns: a, _c1, _c2
- Column Types: char(15), bigint, bigint
+ Columns: a, _c1, _c2, _c3, _c4
+ Column Types: char(15), bigint, bigint, double, bigint
Table: default.mat1
PREHOOK: query: alter materialized view mat1 rebuild
@@ -612,10 +614,12 @@ POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1._c1 EXPRESSION
[(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null),
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
-POSTHOOK: Lineage: mat1._c2 EXPRESSION
[(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null),
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION
[(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null),
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION
[(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null),
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t1)t1.FieldSchema(name:b, type:int, comment:null),
(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION
[(mat1)default.mat1.FieldSchema(name:_c4, type:bigint, comment:null),
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:),
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15),
comment:null), ]
PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
PREHOOK: type: QUERY
@@ -624,7 +628,7 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
join t2 on (t1.a = t2.a)
group by t1.a
POSTHOOK: type: QUERY
@@ -633,10 +637,9 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
CBO PLAN:
-HiveProject(a=[$0], _c1=[$1])
- HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
-PREHOOK: query: select t1.a, sum(t1.b) from t1
+PREHOOK: query: select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from
t1
join t2 on (t1.a = t2.a)
group by t1.a
PREHOOK: type: QUERY
@@ -644,7 +647,7 @@ PREHOOK: Input: default@mat1
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
-POSTHOOK: query: select t1.a, sum(t1.b) from t1
+POSTHOOK: query: select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from
t1
join t2 on (t1.a = t2.a)
group by t1.a
POSTHOOK: type: QUERY
@@ -652,11 +655,11 @@ POSTHOOK: Input: default@mat1
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
-add 5
-null_add NULL
-null_update NULL
-sum0 0
-update 7
+add 5 1 5.0 1
+null_add NULL 0 NULL 1
+null_update NULL 0 NULL 2
+sum0 0 1 0.0 1
+update 7 2 3.5 2
PREHOOK: query: drop materialized view mat1
PREHOOK: type: DROP_MATERIALIZED_VIEW
PREHOOK: Input: default@mat1
@@ -665,22 +668,3 @@ POSTHOOK: query: drop materialized view mat1
POSTHOOK: type: DROP_MATERIALIZED_VIEW
POSTHOOK: Input: default@mat1
POSTHOOK: Output: default@mat1
-PREHOOK: query: select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t2
-#### A masked pattern was here ####
-POSTHOOK: query: select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t2
-#### A masked pattern was here ####
-add 5
-null_add NULL
-null_update NULL
-sum0 0
-update 7