This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ad3fe8c HIVE-19653: Incorrect predicate pushdown for groupby with
grouping sets (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
ad3fe8c is described below
commit ad3fe8c66dcece3a3ded3c99b019570603eca698
Author: dengzh <[email protected]>
AuthorDate: Fri Jun 12 08:42:08 2020 +0800
HIVE-19653: Incorrect predicate pushdown for groupby with grouping sets
(Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
---
.../apache/hadoop/hive/ql/ppd/OpProcFactory.java | 99 ++++
.../hadoop/hive/ql/ppd/PredicatePushDown.java | 4 +
.../groupby_grouping_sets_pushdown1.q | 42 ++
.../llap/groupby_grouping_sets_pushdown1.q.out | 645 +++++++++++++++++++++
4 files changed, 790 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index b01f74d..6c66260 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -28,8 +28,10 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
+import javolution.util.FastBitSet;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -55,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -733,6 +736,98 @@ public final class OpProcFactory {
}
}
+ public static class GroupByPPD extends DefaultPPD implements
SemanticNodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ super.process(nd, stack, procCtx, nodeOutputs);
+ OpWalkerInfo owi = (OpWalkerInfo) procCtx;
+ GroupByDesc groupByDesc = ((GroupByOperator)nd).getConf();
+ ExprWalkerInfo prunedPred = owi.getPrunedPreds((Operator<? extends
OperatorDesc>) nd);
+ if (prunedPred == null || !prunedPred.hasAnyCandidates() ||
+ !groupByDesc.isGroupingSetsPresent()) {
+ return null;
+ }
+
+ List<Long> groupingSets = groupByDesc.getListGroupingSets();
+ Map<String, List<ExprNodeDesc>> candidates =
prunedPred.getFinalCandidates();
+ FastBitSet[] fastBitSets = new FastBitSet[groupingSets.size()];
+ int groupingSetPosition = groupByDesc.getGroupingSetPosition();
+ for (int pos = 0; pos < fastBitSets.length; pos ++) {
+ fastBitSets[pos] =
GroupByOperator.groupingSet2BitSet(groupingSets.get(pos),
+ groupingSetPosition);
+ }
+ List<ExprNodeDesc> groupByKeys =
((GroupByOperator)nd).getConf().getKeys();
+ Map<ExprNodeDesc, ExprNodeDesc> newToOldExprMap =
prunedPred.getNewToOldExprMap();
+ Map<String, List<ExprNodeDesc>> nonFinalCandidates = new HashMap<String,
List<ExprNodeDesc>>();
+ Iterator<Map.Entry<String, List<ExprNodeDesc>>> iter =
candidates.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry<String, List<ExprNodeDesc>> entry = iter.next();
+ List<ExprNodeDesc> residualExprs = new ArrayList<ExprNodeDesc>();
+ List<ExprNodeDesc> finalCandidates = new ArrayList<ExprNodeDesc>();
+ List<ExprNodeDesc> exprs = entry.getValue();
+ for (ExprNodeDesc expr : exprs) {
+ if (canPredPushdown(expr, groupByKeys, fastBitSets,
groupingSetPosition)) {
+ finalCandidates.add(expr);
+ } else {
+ residualExprs.add(newToOldExprMap.get(expr));
+ }
+ }
+ if (!residualExprs.isEmpty()) {
+ nonFinalCandidates.put(entry.getKey(), residualExprs);
+ }
+
+ if (finalCandidates.isEmpty()) {
+ iter.remove();
+ } else {
+ exprs.clear();
+ exprs.addAll(finalCandidates);
+ }
+ }
+
+ if (!nonFinalCandidates.isEmpty()) {
+ createFilter((Operator) nd, nonFinalCandidates, owi);
+ }
+ return null;
+ }
+
+ private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc>
groupByKeys,
+ FastBitSet[] bitSets, int groupingSetPosition) {
+ List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
+ extractCols(expr, columns);
+ for (ExprNodeDesc col : columns) {
+ int index = groupByKeys.indexOf(col);
+ assert index >= 0;
+ for (FastBitSet bitset : bitSets) {
+ int keyPos = bitset.nextClearBit(0);
+ while (keyPos < groupingSetPosition && keyPos != index) {
+ keyPos = bitset.nextClearBit(keyPos + 1);
+ }
+ // If the column has not be found in grouping sets, the expr should
not be pushed down
+ if (keyPos != index) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ // Extract columns from expression
+ private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ columns.add(expr);
+ }
+
+ if (expr instanceof ExprNodeGenericFuncDesc) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (int i = 0; i < children.size(); ++i) {
+ extractCols(children.get(i), columns);
+ }
+ }
+ }
+ }
+
/**
* Default processor which just merges its children.
*/
@@ -1093,6 +1188,10 @@ public final class OpProcFactory {
return new ReduceSinkPPD();
}
+ public static SemanticNodeProcessor getGBYProc() {
+ return new GroupByPPD();
+ }
+
private OpProcFactory() {
// prevent instantiation
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
index 4cf86bb..22e79e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java
@@ -25,6 +25,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
@@ -122,6 +123,9 @@ public class PredicatePushDown extends Transform {
opRules.put(new RuleRegExp("R10",
ReduceSinkOperator.getOperatorName() + "%"),
OpProcFactory.getRSProc());
+ opRules.put(new RuleRegExp("R11",
+ GroupByOperator.getOperatorName() + "%"),
+ OpProcFactory.getGBYProc());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
diff --git
a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
new file mode 100644
index 0000000..ce2c68c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -0,0 +1,42 @@
+SET hive.cbo.enable=false;
+
+CREATE TABLE T1(a STRING, b STRING, s BIGINT);
+INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456);
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL;
+
+SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL;
+
+SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL;
\ No newline at end of file
diff --git
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
new file mode 100644
index 0000000..2d71757
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -0,0 +1,645 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.s SCRIPT []
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b))
+) t WHERE a IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE
Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ expressions: a (type: string), b (type: string), s (type:
bigint)
+ outputColumnNames: a, b, s
+ Statistics: Num rows: 1 Data size: 182 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type:
bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string),
KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE
Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string),
_col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels
true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE a IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE
Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type:
bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string),
KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string),
_col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels
true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL AND b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE
Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type:
bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string),
KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col3 > 100L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string),
_col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+
hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING sum(s) > 100
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE
Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ expressions: a (type: string), b (type: string), s (type:
bigint)
+ outputColumnNames: a, b, s
+ Statistics: Num rows: 1 Data size: 182 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type:
bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col0 is not null or _col1 is not null)
(type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string),
KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col3 > 100L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string),
_col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+
hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b WITH CUBE
+) t WHERE a IS NOT NULL OR b IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL bbb 123456
+aaa bbb 123456
+aaa NULL 123456
+PREHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE b IS NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa NULL 123456