[hive] branch master updated: HIVE-19653: Incorrect predicate pushdown for groupby with grouping sets (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)

jcamacho Thu, 11 Jun 2020 17:43:25 -0700

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new ad3fe8c  HIVE-19653: Incorrect predicate pushdown for groupby with 
grouping sets (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
ad3fe8c is described below

commit ad3fe8c66dcece3a3ded3c99b019570603eca698
Author: dengzh <[email protected]>
AuthorDate: Fri Jun 12 08:42:08 2020 +0800

    HIVE-19653: Incorrect predicate pushdown for groupby with grouping sets 
(Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
---
 .../apache/hadoop/hive/ql/ppd/OpProcFactory.java   |  99 ++++
 .../hadoop/hive/ql/ppd/PredicatePushDown.java      |   4 +
 .../groupby_grouping_sets_pushdown1.q              |  42 ++
 .../llap/groupby_grouping_sets_pushdown1.q.out     | 645 +++++++++++++++++++++
 4 files changed, 790 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index b01f74d..6c66260 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -28,8 +28,10 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Stack;
 
+import javolution.util.FastBitSet;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
@@ -55,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -733,6 +736,98 @@ public final class OpProcFactory {
     }
   }
 
+  public static class GroupByPPD extends DefaultPPD implements 
SemanticNodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      super.process(nd, stack, procCtx, nodeOutputs);
+      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
+      GroupByDesc groupByDesc = ((GroupByOperator)nd).getConf();
+      ExprWalkerInfo prunedPred = owi.getPrunedPreds((Operator<? extends 
OperatorDesc>) nd);
+      if (prunedPred == null || !prunedPred.hasAnyCandidates() ||
+          !groupByDesc.isGroupingSetsPresent()) {
+        return null;
+      }
+
+      List<Long> groupingSets = groupByDesc.getListGroupingSets();
+      Map<String, List<ExprNodeDesc>> candidates = 
prunedPred.getFinalCandidates();
+      FastBitSet[] fastBitSets = new FastBitSet[groupingSets.size()];
+      int groupingSetPosition = groupByDesc.getGroupingSetPosition();
+      for (int pos = 0; pos < fastBitSets.length; pos ++) {
+        fastBitSets[pos] = 
GroupByOperator.groupingSet2BitSet(groupingSets.get(pos),
+            groupingSetPosition);
+      }
+      List<ExprNodeDesc> groupByKeys = 
((GroupByOperator)nd).getConf().getKeys();
+      Map<ExprNodeDesc, ExprNodeDesc> newToOldExprMap = 
prunedPred.getNewToOldExprMap();
+      Map<String, List<ExprNodeDesc>> nonFinalCandidates = new HashMap<String, 
List<ExprNodeDesc>>();
+      Iterator<Map.Entry<String, List<ExprNodeDesc>>> iter = 
candidates.entrySet().iterator();
+      while (iter.hasNext()) {
+        Map.Entry<String, List<ExprNodeDesc>> entry = iter.next();
+        List<ExprNodeDesc> residualExprs = new ArrayList<ExprNodeDesc>();
+        List<ExprNodeDesc> finalCandidates = new ArrayList<ExprNodeDesc>();
+        List<ExprNodeDesc> exprs = entry.getValue();
+        for (ExprNodeDesc expr : exprs) {
+          if (canPredPushdown(expr, groupByKeys, fastBitSets, 
groupingSetPosition)) {
+            finalCandidates.add(expr);
+          } else {
+            residualExprs.add(newToOldExprMap.get(expr));
+          }
+        }
+        if (!residualExprs.isEmpty()) {
+          nonFinalCandidates.put(entry.getKey(), residualExprs);
+        }
+
+        if (finalCandidates.isEmpty()) {
+          iter.remove();
+        } else {
+          exprs.clear();
+          exprs.addAll(finalCandidates);
+        }
+      }
+      
+      if (!nonFinalCandidates.isEmpty()) {
+        createFilter((Operator) nd, nonFinalCandidates, owi);
+      }
+      return null;
+    }
+
+    private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> 
groupByKeys,
+        FastBitSet[] bitSets, int groupingSetPosition) {
+      List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
+      extractCols(expr, columns);
+      for (ExprNodeDesc col : columns) {
+        int index = groupByKeys.indexOf(col);
+        assert index >= 0;
+        for (FastBitSet bitset : bitSets) {
+          int keyPos = bitset.nextClearBit(0);
+          while (keyPos < groupingSetPosition && keyPos != index) {
+            keyPos = bitset.nextClearBit(keyPos + 1);
+          }
+          // If the column has not be found in grouping sets, the expr should 
not be pushed down
+          if (keyPos != index) {
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    // Extract columns from expression
+    private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
+      if (expr instanceof ExprNodeColumnDesc) {
+        columns.add(expr);
+      }
+
+      if (expr instanceof ExprNodeGenericFuncDesc) {
+        List<ExprNodeDesc> children = expr.getChildren();
+        for (int i = 0; i < children.size(); ++i) {
+          extractCols(children.get(i), columns);
+        }
+      }
+    }
+  }
+
   /**
    * Default processor which just merges its children.
    */
@@ -1093,6 +1188,10 @@ public final class OpProcFactory {
     return new ReduceSinkPPD();
   }
 
+  public static SemanticNodeProcessor getGBYProc() {
+    return new GroupByPPD();
+  }
+
   private OpProcFactory() {
     // prevent instantiation
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
index 4cf86bb..22e79e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
@@ -25,6 +25,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
@@ -122,6 +123,9 @@ public class PredicatePushDown extends Transform {
     opRules.put(new RuleRegExp("R10",
         ReduceSinkOperator.getOperatorName() + "%"),
         OpProcFactory.getRSProc());
+    opRules.put(new RuleRegExp("R11",
+        GroupByOperator.getOperatorName() + "%"),
+        OpProcFactory.getGBYProc());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
diff --git 
a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q 
b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
new file mode 100644
index 0000000..ce2c68c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -0,0 +1,42 @@
+SET hive.cbo.enable=false;
+
+CREATE TABLE T1(a STRING, b STRING, s BIGINT);
+INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456);
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL;
+
+SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL;
+
+SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
new file mode 100644
index 0000000..2d71757
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -0,0 +1,645 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.s SCRIPT []
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Select Operator
+                    expressions: a (type: string), b (type: string), s (type: 
bigint)
+                    outputColumnNames: a, b, s
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Filter Operator
+                        isSamplingPred: false
+                        predicate: _col0 is not null (type: boolean)
+                        Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                          null sort order: zzz
+                          numBuckets: -1
+                          sort order: +++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                          Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col3 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1,_col2
+                          columns.types string:string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: a is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        bucketingVersion: 2
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                        null sort order: zzz
+                        numBuckets: -1
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        tag: -1
+                        value expressions: _col3 (type: bigint)
+                        auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1,_col2
+                          columns.types string:string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: a is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Filter Operator
+                        isSamplingPred: false
+                        predicate: _col1 is not null (type: boolean)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                          null sort order: zzz
+                          numBuckets: -1
+                          sort order: +++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col3 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Filter Operator
+                  isSamplingPred: false
+                  predicate: (_col3 > 100L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      bucketingVersion: 2
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            bucketing_version -1
+                            columns _col0,_col1,_col2
+                            columns.types string:string:bigint
+                            escape.delim \
+                            
hive.serialization.extend.additional.nesting.levels true
+                            serialization.escape.crlf true
+                            serialization.format 1
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Select Operator
+                    expressions: a (type: string), b (type: string), s (type: 
bigint)
+                    outputColumnNames: a, b, s
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Filter Operator
+                        isSamplingPred: false
+                        predicate: (_col0 is not null or _col1 is not null) 
(type: boolean)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                          null sort order: zzz
+                          numBuckets: -1
+                          sort order: +++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col3 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Filter Operator
+                  isSamplingPred: false
+                  predicate: (_col3 > 100L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      bucketingVersion: 2
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            bucketing_version -1
+                            columns _col0,_col1,_col2
+                            columns.types string:string:bigint
+                            escape.delim \
+                            
hive.serialization.extend.additional.nesting.levels true
+                            serialization.escape.crlf true
+                            serialization.format 1
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL   bbb     123456
+aaa    bbb     123456
+aaa    NULL    123456
+PREHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa    NULL    123456

[hive] branch master updated: HIVE-19653: Incorrect predicate pushdown for groupby with grouping sets (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)

Reply via email to