[hive] branch master updated: HIVE-25805: Wrong result when rebuilding MV with count(col) incrementally (Krisztian Kasa, reviewed by Zoltan Haindrich)

krisztiankasa Tue, 21 Dec 2021 05:37:53 -0800

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 8572c12  HIVE-25805: Wrong result when rebuilding MV with count(col) 
incrementally (Krisztian Kasa, reviewed by Zoltan Haindrich)
8572c12 is described below

commit 8572c1201e1d483eb03c7e413f4ff7f9b6f4a3d2
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Dec 21 14:37:34 2021 +0100

    HIVE-25805: Wrong result when rebuilding MV with count(col) incrementally 
(Krisztian Kasa, reviewed by Zoltan Haindrich)
---
 ...regateInsertDeleteIncrementalRewritingRule.java |  18 +-
 .../materialized_view_create_rewrite_6.q           |  14 +-
 .../llap/materialized_view_create_rewrite_6.q.out  | 208 ++++++++++-----------
 3 files changed, 118 insertions(+), 122 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
index d4d4bdd..601cbfc 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveAggregateInsertDeleteIncrementalRewritingRule.java
@@ -139,7 +139,14 @@ public class 
HiveAggregateInsertDeleteIncrementalRewritingRule extends HiveAggre
       switch (aggregateCall.getAggregation().getKind()) {
         case COUNT:
           aggFunction = SqlStdOperatorTable.SUM;
-          argument = relBuilder.literal(1);
+
+          // count(*)
+          if (aggregateCall.getArgList().isEmpty()) {
+            argument = relBuilder.literal(1);
+          } else {
+            // count(<column_name>)
+            argument = genArgumentForCountColumn(relBuilder, aggInput, 
aggregateCall.getArgList().get(0));
+          }
           break;
         case SUM:
           aggFunction = SqlStdOperatorTable.SUM;
@@ -171,6 +178,15 @@ public class 
HiveAggregateInsertDeleteIncrementalRewritingRule extends HiveAggre
             countIdx);
   }
 
+  private RexNode genArgumentForCountColumn(RelBuilder relBuilder, RelNode 
aggInput, int argumentIdx) {
+    RexBuilder rexBuilder = relBuilder.getRexBuilder();
+    RexNode countArg = rexBuilder.makeInputRef(
+            aggInput.getRowType().getFieldList().get(argumentIdx).getType(), 
argumentIdx);
+    RexNode isNull = rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, 
countArg);
+    return rexBuilder.makeCall(
+            SqlStdOperatorTable.CASE, isNull, relBuilder.literal(0), 
relBuilder.literal(1));
+  }
+
   /**
    * Extend {@link 
HiveAggregateIncrementalRewritingRuleBase.IncrementalComputePlan} with 
countStarIndex. It is
    * required for building the filter condition that distribute the rows to 
the insert/update/delete branches.
diff --git 
a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q 
b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
index e7d3449..6024657 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
@@ -21,13 +21,13 @@ insert into t2(a, b) values ('sum0', 0);
 
 -- Aggregate with count(*): incremental rebuild should be triggered even if 
there were deletes from source table
 create materialized view mat1 stored as orc TBLPROPERTIES 
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a;
 
 
 explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a;
 
@@ -54,7 +54,7 @@ delete from t1 where a like '%add/remove';
 
 -- view can not be used
 explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a;
 
@@ -68,16 +68,12 @@ alter materialized view mat1 rebuild;
 
 -- the view should be up to date and used
 explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a;
 
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a;
 
 drop materialized view mat1;
-
-select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a;
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
index 19f810b..f299bbd 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
@@ -86,7 +86,7 @@ POSTHOOK: Output: default@t2
 POSTHOOK: Lineage: t2.a SCRIPT []
 POSTHOOK: Lineage: t2.b SCRIPT []
 PREHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES 
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 PREHOOK: type: CREATE_MATERIALIZED_VIEW
@@ -95,7 +95,7 @@ PREHOOK: Input: default@t2
 PREHOOK: Output: database:default
 PREHOOK: Output: default@mat1
 POSTHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES 
('transactional'='true') as
-select t1.a, sum(t1.b), count(*) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 POSTHOOK: type: CREATE_MATERIALIZED_VIEW
@@ -104,10 +104,12 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@mat1
 POSTHOOK: Lineage: mat1._c1 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int, 
comment:null), ]
-POSTHOOK: Lineage: mat1._c2 EXPRESSION [(t1)t1.null, (t2)t2.null, ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int, 
comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int, 
comment:null), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION [(t1)t1.null, (t2)t2.null, ]
 POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), 
comment:null), ]
 PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 PREHOOK: type: QUERY
@@ -116,7 +118,7 @@ PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 #### A masked pattern was here ####
 POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 POSTHOOK: type: QUERY
@@ -125,8 +127,7 @@ POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 #### A masked pattern was here ####
 CBO PLAN:
-HiveProject(a=[$0], _c1=[$1])
-  HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
 
 PREHOOK: query: delete from t1 where b = 1
 PREHOOK: type: QUERY
@@ -202,7 +203,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 POSTHOOK: Output: default@t1
 PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 PREHOOK: type: QUERY
@@ -210,7 +211,7 @@ PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 #### A masked pattern was here ####
 POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 POSTHOOK: type: QUERY
@@ -218,14 +219,15 @@ POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 #### A masked pattern was here ####
 CBO PLAN:
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
-  HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], 
cost=[not available])
-    HiveProject(a=[$0], b=[$1])
-      HiveFilter(condition=[IS NOT NULL($0)])
-        HiveTableScan(table=[[default, t1]], table:alias=[t1])
-    HiveProject(a=[$0])
-      HiveFilter(condition=[IS NOT NULL($0)])
-        HiveTableScan(table=[[default, t2]], table:alias=[t2])
+HiveProject(a=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[/(CAST($1):DOUBLE, $2)], 
_o__c4=[$3])
+  HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)], 
agg#2=[count()])
+    HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveProject(a=[$0], b=[$1])
+        HiveFilter(condition=[IS NOT NULL($0)])
+          HiveTableScan(table=[[default, t1]], table:alias=[t1])
+      HiveProject(a=[$0])
+        HiveFilter(condition=[IS NOT NULL($0)])
+          HiveTableScan(table=[[default, t2]], table:alias=[t2])
 
 PREHOOK: query: explain cbo
 alter materialized view mat1 rebuild
@@ -246,14 +248,14 @@ POSTHOOK: Output: default@mat1
 POSTHOOK: Output: default@mat1
 POSTHOOK: Output: default@mat1
 CBO PLAN:
-HiveProject(a0=[$4], $f1=[CASE(IS NULL($1), $5, IS NULL($5), $1, +($5, $1))], 
$f2=[CASE(IS NULL($2), $6, +($6, $2))])
-  HiveFilter(condition=[OR(AND($3, OR(AND(IS NULL($2), =($6, 0)), AND(=(+($6, 
$2), 0), IS NOT NULL($2)))), AND(IS NULL($3), OR(AND(IS NULL($2), >($6, 0)), 
AND(>(+($6, $2), 0), IS NOT NULL($2)))), AND($3, OR(AND(IS NULL($2), >($6, 0)), 
AND(>(+($6, $2), 0), IS NOT NULL($2)))))])
-    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $4)], joinType=[right], 
algorithm=[none], cost=[not available])
-      HiveProject(a=[$0], _c1=[$1], _c2=[$2], $f3=[true])
+HiveProject(a=[$5], _o__c1=[CASE(IS NULL($1), $6, IS NULL($6), $1, +($6, 
$1))], _o__c2=[CASE(IS NULL($2), $7, +($7, $2))], _o__c3=[/(CAST(CASE(IS 
NULL($1), $6, IS NULL($6), $1, +($6, $1))):DOUBLE, CASE(IS NULL($2), $7, +($7, 
$2)))], _o__c4=[CASE(IS NULL($3), $8, +($8, $3))])
+  HiveFilter(condition=[OR(AND($4, OR(AND(IS NULL($3), =($8, 0)), AND(=(+($8, 
$3), 0), IS NOT NULL($3)))), AND(IS NULL($4), OR(AND(IS NULL($3), >($8, 0)), 
AND(>(+($8, $3), 0), IS NOT NULL($3)))), AND($4, OR(AND(IS NULL($3), >($8, 0)), 
AND(>(+($8, $3), 0), IS NOT NULL($3)))))])
+    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $5)], joinType=[right], 
algorithm=[none], cost=[not available])
+      HiveProject(a=[$0], _c1=[$1], _c2=[$2], _c4=[$4], $f4=[true])
         HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
-      HiveProject(a=[$0], $f1=[$1], $f2=[$2])
-        HiveAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[SUM($2)])
-          HiveProject(a=[$0], $f3=[CASE(OR($2, $5), *(-1, $1), $1)], 
$f4=[CASE(OR($2, $5), -1, 1)])
+      HiveProject(a=[$0], $f1=[$1], $f2=[$2], $f3=[$3])
+        HiveAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[SUM($2)], 
agg#2=[SUM($3)])
+          HiveProject(a=[$0], $f3=[CASE(OR($2, $5), *(-1, $1), $1)], 
$f4=[CASE(OR($2, $5), *(-1, CASE(IS NULL($1), 0, 1)), CASE(IS NULL($1), 0, 
1))], $f5=[CASE(OR($2, $5), -1, 1)])
             HiveJoin(condition=[AND(=($0, $4), OR($3, $6))], joinType=[inner], 
algorithm=[none], cost=[not available])
               HiveProject(a=[$0], b=[$1], ROW__IS__DELETED=[$6], <=[<(3, 
$5.writeid)])
                 HiveFilter(condition=[IS NOT NULL($0)])
@@ -308,18 +310,18 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: default.mat1
-                  Statistics: Num rows: 5 Data size: 532 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 5 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: a (type: char(15)), _c1 (type: bigint), _c2 
(type: bigint), true (type: boolean), ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
-                    outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                    Statistics: Num rows: 5 Data size: 932 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    expressions: a (type: char(15)), _c1 (type: bigint), _c2 
(type: bigint), _c4 (type: bigint), true (type: boolean), ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 5 Data size: 972 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: char(15))
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: _col0 (type: char(15))
-                      Statistics: Num rows: 5 Data size: 932 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: boolean), _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      Statistics: Num rows: 5 Data size: 972 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: bigint), _col4 (type: boolean), _col5 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
         Map 6 
@@ -380,13 +382,13 @@ STAGE PLANS:
                   0 _col0 (type: char(15))
                   1 _col0 (type: char(15))
                 nullSafes: [true]
-                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, 
_col7
-                Statistics: Num rows: 5 Data size: 1017 Basic stats: COMPLETE 
Column stats: COMPLETE
+                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9
+                Statistics: Num rows: 5 Data size: 1097 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Filter Operator
-                  predicate: (_col3 and ((_col2 is null and (_col7 = 0L)) or 
(((_col7 + _col2) = 0) and _col2 is not null))) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  predicate: (_col4 and ((_col3 is null and (_col9 = 0L)) or 
(((_col9 + _col3) = 0) and _col3 is not null))) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    expressions: _col5 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
@@ -396,15 +398,15 @@ STAGE PLANS:
                       Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
                       Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: (_col3 is null and ((_col2 is null and (_col7 > 
0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  predicate: (_col4 is null and ((_col3 is null and (_col9 > 
0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: _col5 (type: char(15)), CASE WHEN (_col1 is 
null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END 
(type: bigint), CASE WHEN (_col2 is null) THEN (_col7) ELSE ((_col7 + _col2)) 
END (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 109 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    expressions: _col6 (type: char(15)), CASE WHEN (_col1 is 
null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END 
(type: bigint), CASE WHEN (_col2 is null) THEN (_col8) ELSE ((_col8 + _col2)) 
END (type: bigint), (UDFToDouble(CASE WHEN (_col1 is null) THEN (_col7) WHEN 
(_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END) / CASE WHEN (_col2 is 
null) THEN (_col8) ELSE ((_col8 + _col2)) END) (type: double), CASE WHEN (_col3 
is null) THEN (_col9) [...]
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                    Statistics: Num rows: 1 Data size: 125 Basic stats: 
COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 1 Data size: 109 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 125 Basic stats: 
COMPLETE Column stats: COMPLETE
                       table:
                           input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -412,34 +414,34 @@ STAGE PLANS:
                           name: default.mat1
                       Write Type: INSERT
                     Select Operator
-                      expressions: _col0 (type: char(15)), _col1 (type: 
bigint), _col2 (type: bigint)
-                      outputColumnNames: a, _c1, _c2
-                      Statistics: Num rows: 1 Data size: 109 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      expressions: _col0 (type: char(15)), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint)
+                      outputColumnNames: a, _c1, _c2, _c3, _c4
+                      Statistics: Num rows: 1 Data size: 125 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        aggregations: max(length(a)), 
avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), 
min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2), 
max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+                        aggregations: max(length(a)), 
avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), 
min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2), 
max(_c2), count(_c2), compute_bit_vector_hll(_c2), min(_c3), max(_c3), 
count(_c3), compute_bit_vector_hll(_c3), min(_c4), max(_c4), count(_c4), 
compute_bit_vector_hll(_c4)
                         minReductionHashAggr: 0.4
                         mode: hash
-                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                        Statistics: Num rows: 1 Data size: 576 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20
+                        Statistics: Num rows: 1 Data size: 912 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           null sort order: 
                           sort order: 
-                          Statistics: Num rows: 1 Data size: 576 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          value expressions: _col0 (type: int), _col1 (type: 
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: 
bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), 
_col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: 
bigint), _col11 (type: bigint), _col12 (type: binary)
+                          Statistics: Num rows: 1 Data size: 912 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: int), _col1 (type: 
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: 
bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), 
_col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: 
bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), 
_col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17 
(type: bigint), _col18 (typ [...]
                 Filter Operator
-                  predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or 
(((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or 
(((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: char(15)), CASE 
WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE 
((_col6 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col7) 
ELSE ((_col7 + _col2)) END (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 185 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    expressions: _col5 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col6 (type: char(15)), CASE 
WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE 
((_col7 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col8) 
ELSE ((_col8 + _col2)) END (type: bigint), (UDFToDouble(CASE WHEN (_col1 is 
null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) 
END) / CASE WHEN (_col2 is null) THEN (_col8) ELSE ((_col8 + _co [...]
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 1 Data size: 201 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       null sort order: a
                       sort order: +
                       Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
-                      Statistics: Num rows: 1 Data size: 185 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col1 (type: char(15)), _col2 (type: 
bigint), _col3 (type: bigint)
+                      Statistics: Num rows: 1 Data size: 201 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: char(15)), _col2 (type: 
bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint)
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -460,17 +462,17 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: max(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+                aggregations: max(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13), 
max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), 
min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
                 mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
-                Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE 
Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
+                Statistics: Num rows: 1 Data size: 844 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: 'STRING' (type: string), 
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), 
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) 
(type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: 
bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 
'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17
-                  Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  expressions: 'STRING' (type: string), 
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), 
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) 
(type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: 
bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 
'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, 
_col26, _col27, _col28, _col29
+                  Statistics: Num rows: 1 Data size: 1324 Basic stats: 
COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 794 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 1324 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -479,12 +481,12 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
-                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: 
char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE 
Column stats: COMPLETE
+                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: 
char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 
(type: double), VALUE._col4 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE 
Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE 
Column stats: COMPLETE
                   table:
                       input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -504,39 +506,39 @@ STAGE PLANS:
                 residual filter predicates: {(_col3 or _col6)}
                 Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: _col0 (type: char(15)), CASE WHEN ((_col2 or 
_col5)) THEN ((-1 * _col1)) ELSE (_col1) END (type: int), CASE WHEN ((_col2 or 
_col5)) THEN (-1) ELSE (1) END (type: int)
-                  outputColumnNames: _col0, _col1, _col2
+                  expressions: _col0 (type: char(15)), CASE WHEN ((_col2 or 
_col5)) THEN ((-1 * _col1)) ELSE (_col1) END (type: int), CASE WHEN ((_col2 or 
_col5)) THEN ((-1 * CASE WHEN (_col1 is null) THEN (0) ELSE (1) END)) ELSE 
(CASE WHEN (_col1 is null) THEN (0) ELSE (1) END) END (type: int), CASE WHEN 
((_col2 or _col5)) THEN (-1) ELSE (1) END (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
-                    aggregations: sum(_col1), sum(_col2)
+                    aggregations: sum(_col1), sum(_col2), sum(_col3)
                     keys: _col0 (type: char(15))
                     minReductionHashAggr: 0.4
                     mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 5 Data size: 545 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 5 Data size: 585 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: char(15))
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: _col0 (type: char(15))
-                      Statistics: Num rows: 5 Data size: 545 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
+                      Statistics: Num rows: 5 Data size: 585 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: bigint)
         Reducer 8 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: sum(VALUE._col0), sum(VALUE._col1)
+                aggregations: sum(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2)
                 keys: KEY._col0 (type: char(15))
                 mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE 
Column stats: COMPLETE
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: char(15))
                   null sort order: z
                   sort order: +
                   Map-reduce partition columns: _col0 (type: char(15))
-                  Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint), _col2 (type: bigint)
+                  Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: bigint)
 
   Stage: Stage-4
     Dependency Collection
@@ -591,8 +593,8 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
-          Columns: a, _c1, _c2
-          Column Types: char(15), bigint, bigint
+          Columns: a, _c1, _c2, _c3, _c4
+          Column Types: char(15), bigint, bigint, double, bigint
           Table: default.mat1
 
 PREHOOK: query: alter materialized view mat1 rebuild
@@ -612,10 +614,12 @@ POSTHOOK: Output: default@mat1
 POSTHOOK: Output: default@mat1
 POSTHOOK: Output: default@mat1
 POSTHOOK: Lineage: mat1._c1 EXPRESSION 
[(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), 
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
-POSTHOOK: Lineage: mat1._c2 EXPRESSION 
[(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), 
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION 
[(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), 
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION 
[(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), 
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t1)t1.FieldSchema(name:b, type:int, comment:null), 
(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION 
[(mat1)default.mat1.FieldSchema(name:_c4, type:bigint, comment:null), 
(t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), 
(t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
 POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), 
comment:null), ]
 PREHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 PREHOOK: type: QUERY
@@ -624,7 +628,7 @@ PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 #### A masked pattern was here ####
 POSTHOOK: query: explain cbo
-select t1.a, sum(t1.b) from t1
+select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 POSTHOOK: type: QUERY
@@ -633,10 +637,9 @@ POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 #### A masked pattern was here ####
 CBO PLAN:
-HiveProject(a=[$0], _c1=[$1])
-  HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
 
-PREHOOK: query: select t1.a, sum(t1.b) from t1
+PREHOOK: query: select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from 
t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 PREHOOK: type: QUERY
@@ -644,7 +647,7 @@ PREHOOK: Input: default@mat1
 PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 #### A masked pattern was here ####
-POSTHOOK: query: select t1.a, sum(t1.b) from t1
+POSTHOOK: query: select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from 
t1
 join t2 on (t1.a = t2.a)
 group by t1.a
 POSTHOOK: type: QUERY
@@ -652,11 +655,11 @@ POSTHOOK: Input: default@mat1
 POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 #### A masked pattern was here ####
-add                    5
-null_add               NULL
-null_update            NULL
-sum0                   0
-update                 7
+add                    5       1       5.0     1
+null_add               NULL    0       NULL    1
+null_update            NULL    0       NULL    2
+sum0                   0       1       0.0     1
+update                 7       2       3.5     2
 PREHOOK: query: drop materialized view mat1
 PREHOOK: type: DROP_MATERIALIZED_VIEW
 PREHOOK: Input: default@mat1
@@ -665,22 +668,3 @@ POSTHOOK: query: drop materialized view mat1
 POSTHOOK: type: DROP_MATERIALIZED_VIEW
 POSTHOOK: Input: default@mat1
 POSTHOOK: Output: default@mat1
-PREHOOK: query: select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t2
-#### A masked pattern was here ####
-POSTHOOK: query: select t1.a, sum(t1.b) from t1
-join t2 on (t1.a = t2.a)
-group by t1.a
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t2
-#### A masked pattern was here ####
-add                    5
-null_add               NULL
-null_update            NULL
-sum0                   0
-update                 7

[hive] branch master updated: HIVE-25805: Wrong result when rebuilding MV with count(col) incrementally (Krisztian Kasa, reviewed by Zoltan Haindrich)

Reply via email to