hive git commit: HIVE-14002: Extend limit propagation to subsequent RS operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Thu, 16 Jun 2016 03:46:46 -0700

Repository: hive
Updated Branches:
  refs/heads/master 4c57ed35f -> 1ffa2429a



HIVE-14002: Extend limit propagation to subsequent RS operators (Jesus Camacho 
Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ffa2429
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ffa2429
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ffa2429

Branch: refs/heads/master
Commit: 1ffa2429ab367a43e18484abb80fd8d21ee285a9
Parents: 4c57ed3
Author: Jesus Camacho Rodriguez <[email protected]>
Authored: Thu Jun 16 11:44:31 2016 +0100
Committer: Jesus Camacho Rodriguez <[email protected]>
Committed: Thu Jun 16 11:44:31 2016 +0100

----------------------------------------------------------------------
 .../ql/optimizer/LimitPushdownOptimizer.java    |   77 +
 .../queries/clientpositive/limit_pushdown3.q    |   67 +
 .../clientpositive/limit_pushdown3.q.out        | 1395 ++++++++++++++++++
 3 files changed, 1539 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1ffa2429/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
index 4ca2d7d..644fa49 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.LimitDesc;
 
 /**
@@ -96,6 +98,11 @@ public class LimitPushdownOptimizer extends Transform {
         ".*" +
         LimitOperator.getOperatorName() + "%"),
         new TopNReducer());
+    opRules.put(new RuleRegExp("R2",
+        ReduceSinkOperator.getOperatorName() + "%" +
+        ".*" +
+        ReduceSinkOperator.getOperatorName() + "%"),
+        new TopNPropagator());
 
     LimitPushdownContext context = new LimitPushdownContext(pctx.getConf());
     Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
@@ -143,6 +150,76 @@ public class LimitPushdownOptimizer extends Transform {
     }
   }
 
+  private static class TopNPropagator implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack,
+        NodeProcessorCtx procCtx, Object... nodeOutputs) throws 
SemanticException {
+      ReduceSinkOperator cRS = (ReduceSinkOperator) nd;
+      if (cRS.getConf().getTopN() == -1) {
+        // No limit, nothing to propagate, we just bail out
+        return false;
+      }
+      ReduceSinkOperator pRS = null;
+      for (int i = stack.size() - 2 ; i >= 0; i--) {
+        Operator<?> operator = (Operator<?>) stack.get(i);
+        if (operator.getNumChild() != 1) {
+          return false; // multi-GBY single-RS (TODO)
+        }
+        if (operator instanceof ReduceSinkOperator) {
+          pRS = (ReduceSinkOperator) operator;
+          break;
+        }
+        if (!operator.acceptLimitPushdown()) {
+          return false;
+        }
+      }
+      if (pRS != null) {
+        if (OperatorUtils.findOperators(pRS, GroupByOperator.class).size() > 
1){
+          // Not safe to continue for RS-GBY-GBY-LIM kind of pipelines. See 
HIVE-10607 for more.
+          return false;
+        }
+        if (!checkKeys(cRS.getConf().getKeyCols(), pRS.getConf().getKeyCols(), 
cRS, pRS)) {
+          // Keys are not the same; bail out
+          return false;
+        }
+        pRS.getConf().setTopN(cRS.getConf().getTopN());
+        pRS.getConf().setTopNMemoryUsage(cRS.getConf().getTopNMemoryUsage());
+        if (pRS.getNumChild() == 1 && pRS.getChildren().get(0) instanceof 
GroupByOperator) {
+          pRS.getConf().setMapGroupBy(true);
+        }
+      }
+      return true;
+    }
+  }
+
+  private static boolean checkKeys(List<ExprNodeDesc> cKeys, 
List<ExprNodeDesc> pKeys,
+      ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException 
{
+    if (cKeys == null || cKeys.isEmpty()) {
+      if (pKeys != null && !pKeys.isEmpty()) {
+        return false;
+      }
+      return true;
+    }
+    if (pKeys == null || pKeys.isEmpty()) {
+      return false;
+    }
+    if (cKeys.size() < pKeys.size()) {
+      return false;
+    }
+    for (int i = 0; i < pKeys.size(); i++) {
+      ExprNodeDesc expr = ExprNodeDescUtils.backtrack(cKeys.get(i), cRS, pRS);
+      if (expr == null) {
+        // cKey is not present in parent
+        return false;
+      }
+      if (!expr.isSame(pKeys.get(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   private static class LimitPushdownContext implements NodeProcessorCtx {
 
     private final float threshold;

http://git-wip-us.apache.org/repos/asf/hive/blob/1ffa2429/ql/src/test/queries/clientpositive/limit_pushdown3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_pushdown3.q 
b/ql/src/test/queries/clientpositive/limit_pushdown3.q
new file mode 100644
index 0000000..bb76682
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/limit_pushdown3.q
@@ -0,0 +1,67 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.limit.pushdown.memory.usage=0.3f;
+set hive.optimize.reducededuplication.min.reducer=4;
+
+explain
+select key,value from src order by key limit 20;
+select key,value from src order by key limit 20;
+
+explain
+select key,value from src order by key desc limit 20;
+select key,value from src order by key desc limit 20;
+
+explain
+select value, sum(key + 1) as sum from src group by value order by value limit 
20;
+select value, sum(key + 1) as sum from src group by value order by value limit 
20;
+
+-- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 20;
+select value,avg(key + 1) from src group by value order by value limit 20;
+
+-- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 20;
+select distinct(cdouble) as dis from alltypesorc order by dis limit 20;
+
+explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20;
+
+explain 
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from 
alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint 
limit 20;
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from 
alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint 
limit 20;
+
+-- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from 
alltypesorc group by ctinyint order by ctinyint limit 20;
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from 
alltypesorc group by ctinyint order by ctinyint limit 20;
+
+-- limit zero
+explain
+select key,value from src order by key limit 0;
+select key,value from src order by key limit 0;
+
+-- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 20;
+select value, sum(key) as sum from src group by value order by sum limit 20;
+
+set hive.map.aggr=false;
+-- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 20;
+select value, sum(key) as sum from src group by value order by value limit 20;
+
+set hive.limit.pushdown.memory.usage=0.00002f;
+
+-- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by 
key limit 100;
+select key,value,value,value,value,value,value,value,value from src order by 
key limit 100;
+
+-- flush for group-by
+explain
+select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100;
+select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100;

http://git-wip-us.apache.org/repos/asf/hive/blob/1ffa2429/ql/src/test/results/clientpositive/limit_pushdown3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_pushdown3.q.out 
b/ql/src/test/results/clientpositive/limit_pushdown3.q.out
new file mode 100644
index 0000000..215cd89
--- /dev/null
+++ b/ql/src/test/results/clientpositive/limit_pushdown3.q.out
@@ -0,0 +1,1395 @@
+PREHOOK: query: explain
+select key,value from src order by key limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key,value from src order by key limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                TopN Hash Memory Usage: 0.3
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key,value from src order by key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0
+10     val_10
+100    val_100
+100    val_100
+103    val_103
+103    val_103
+104    val_104
+104    val_104
+105    val_105
+11     val_11
+111    val_111
+113    val_113
+113    val_113
+114    val_114
+116    val_116
+118    val_118
+118    val_118
+119    val_119
+PREHOOK: query: explain
+select key,value from src order by key desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key,value from src order by key desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: -
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                TopN Hash Memory Usage: 0.3
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key,value from src order by key desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98     val_98
+98     val_98
+97     val_97
+97     val_97
+96     val_96
+95     val_95
+95     val_95
+92     val_92
+90     val_90
+90     val_90
+90     val_90
+9      val_9
+87     val_87
+86     val_86
+85     val_85
+84     val_84
+84     val_84
+83     val_83
+83     val_83
+82     val_82
+PREHOOK: query: explain
+select value, sum(key + 1) as sum from src group by value order by value limit 
20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select value, sum(key + 1) as sum from src group by value order by value limit 
20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: value (type: string), (UDFToDouble(key) + 1.0) 
(type: double)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+                  value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select value, sum(key + 1) as sum from src group by value 
order by value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key + 1) as sum from src group by value 
order by value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0  3.0
+val_10 11.0
+val_100        202.0
+val_103        208.0
+val_104        210.0
+val_105        106.0
+val_11 12.0
+val_111        112.0
+val_113        228.0
+val_114        115.0
+val_116        117.0
+val_118        238.0
+val_119        360.0
+val_12 26.0
+val_120        242.0
+val_125        252.0
+val_126        127.0
+val_128        387.0
+val_129        260.0
+val_131        132.0
+PREHOOK: query: -- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: value (type: string), (UDFToDouble(key) + 1.0) 
(type: double)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: avg(_col1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+                  value expressions: _col1 (type: 
struct<count:bigint,sum:double,input:double>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: avg(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select value,avg(key + 1) from src group by value order by 
value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value,avg(key + 1) from src group by value order by 
value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0  1.0
+val_10 11.0
+val_100        101.0
+val_103        104.0
+val_104        105.0
+val_105        106.0
+val_11 12.0
+val_111        112.0
+val_113        114.0
+val_114        115.0
+val_116        117.0
+val_118        119.0
+val_119        120.0
+val_12 13.0
+val_120        121.0
+val_125        126.0
+val_126        127.0
+val_128        129.0
+val_129        130.0
+val_131        132.0
+PREHOOK: query: -- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cdouble (type: double)
+              outputColumnNames: cdouble
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: cdouble (type: double)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: double)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.3
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: double)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: double)
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis 
limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis 
limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL
+-16379.0
+-16373.0
+-16372.0
+-16369.0
+-16355.0
+-16339.0
+-16324.0
+-16311.0
+-16310.0
+-16309.0
+-16307.0
+-16306.0
+-16305.0
+-16300.0
+-16296.0
+-16280.0
+-16277.0
+-16274.0
+-16269.0
+PREHOOK: query: explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cdouble (type: double)
+              outputColumnNames: ctinyint, cdouble
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT cdouble)
+                keys: ctinyint (type: tinyint), cdouble (type: double)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint), _col1 (type: double)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: tinyint)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: 
bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL   2932
+-64    24
+-63    19
+-62    27
+-61    25
+-60    27
+-59    31
+-58    23
+-57    35
+-56    36
+-55    29
+-54    26
+-53    22
+-52    33
+-51    21
+-50    30
+-49    26
+-48    29
+-47    22
+-46    24
+PREHOOK: query: explain 
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from 
alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint 
limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from 
alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint 
limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cdouble (type: double)
+              outputColumnNames: ctinyint, cdouble
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: ctinyint (type: tinyint), cdouble (type: double)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint), _col1 (type: double)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: tinyint)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col1)
+            keys: _col0 (type: tinyint)
+            mode: complete
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Statistics: Num rows: 3072 Data size: 660491 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: 
bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint, count(cdouble) from (select ctinyint, cdouble 
from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by 
ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(cdouble) from (select ctinyint, 
cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order 
by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL   2932
+-64    24
+-63    19
+-62    27
+-61    25
+-60    27
+-59    31
+-58    23
+-57    35
+-56    36
+-55    29
+-54    26
+-53    22
+-52    33
+-51    21
+-50    30
+-49    26
+-48    29
+-47    22
+-46    24
+PREHOOK: query: -- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from 
alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from 
alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cstring1 (type: string), 
cstring2 (type: string)
+              outputColumnNames: ctinyint, cstring1, cstring2
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT cstring1), count(DISTINCT 
cstring2)
+                keys: ctinyint (type: tinyint), cstring1 (type: string), 
cstring2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint), _col1 (type: 
string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: tinyint)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT 
KEY._col1:1._col0)
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: 
bigint), VALUE._col1 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cstring1)), 
count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint 
limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cstring1)), 
count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint 
limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL   3065    3
+-64    3       13
+-63    3       16
+-62    3       23
+-61    3       25
+-60    3       25
+-59    3       27
+-58    3       24
+-57    3       23
+-56    3       22
+-55    3       21
+-54    3       21
+-53    3       17
+-52    3       21
+-51    1012    1045
+-50    3       25
+-49    3       24
+-48    3       27
+-47    3       23
+-46    3       19
+PREHOOK: query: -- limit zero
+explain
+select key,value from src order by key limit 0
+PREHOOK: type: QUERY
+POSTHOOK: query: -- limit zero
+explain
+select key,value from src order by key limit 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 0
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key,value from src order by key limit 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key limit 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+PREHOOK: query: -- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: value (type: string), key (type: string)
+              outputColumnNames: value, key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: sum(key)
+                keys: value (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: double)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: 
double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select value, sum(key) as sum from src group by value order by 
sum limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key) as sum from src group by value order 
by sum limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0  0.0
+val_2  2.0
+val_4  4.0
+val_8  8.0
+val_9  9.0
+val_10 10.0
+val_11 11.0
+val_5  15.0
+val_17 17.0
+val_19 19.0
+val_20 20.0
+val_12 24.0
+val_27 27.0
+val_28 28.0
+val_30 30.0
+val_15 30.0
+val_33 33.0
+val_34 34.0
+val_18 36.0
+val_41 41.0
+PREHOOK: query: -- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: value (type: string)
+                sort order: +
+                Map-reduce partition columns: value (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                TopN Hash Memory Usage: 0.3
+                value expressions: key (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+              value expressions: _col1 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select value, sum(key) as sum from src group by value order by 
value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key) as sum from src group by value order 
by value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0  0.0
+val_10 10.0
+val_100        200.0
+val_103        206.0
+val_104        208.0
+val_105        105.0
+val_11 11.0
+val_111        111.0
+val_113        226.0
+val_114        114.0
+val_116        116.0
+val_118        236.0
+val_119        357.0
+val_12 24.0
+val_120        240.0
+val_125        250.0
+val_126        126.0
+val_128        384.0
+val_129        258.0
+val_131        131.0
+PREHOOK: query: -- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by 
key limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: -- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by 
key limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                TopN Hash Memory Usage: 2.0E-5
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 
(type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), 
VALUE._col0 (type: string), VALUE._col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 100
+            Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key,value,value,value,value,value,value,value,value 
from src order by key limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value,value,value,value,value,value,value,value 
from src order by key limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0   val_0   val_0   val_0   val_0   val_0   val_0   val_0
+0      val_0   val_0   val_0   val_0   val_0   val_0   val_0   val_0
+0      val_0   val_0   val_0   val_0   val_0   val_0   val_0   val_0
+10     val_10  val_10  val_10  val_10  val_10  val_10  val_10  val_10
+100    val_100 val_100 val_100 val_100 val_100 val_100 val_100 val_100
+100    val_100 val_100 val_100 val_100 val_100 val_100 val_100 val_100
+103    val_103 val_103 val_103 val_103 val_103 val_103 val_103 val_103
+103    val_103 val_103 val_103 val_103 val_103 val_103 val_103 val_103
+104    val_104 val_104 val_104 val_104 val_104 val_104 val_104 val_104
+104    val_104 val_104 val_104 val_104 val_104 val_104 val_104 val_104
+105    val_105 val_105 val_105 val_105 val_105 val_105 val_105 val_105
+11     val_11  val_11  val_11  val_11  val_11  val_11  val_11  val_11
+111    val_111 val_111 val_111 val_111 val_111 val_111 val_111 val_111
+113    val_113 val_113 val_113 val_113 val_113 val_113 val_113 val_113
+113    val_113 val_113 val_113 val_113 val_113 val_113 val_113 val_113
+114    val_114 val_114 val_114 val_114 val_114 val_114 val_114 val_114
+116    val_116 val_116 val_116 val_116 val_116 val_116 val_116 val_116
+118    val_118 val_118 val_118 val_118 val_118 val_118 val_118 val_118
+118    val_118 val_118 val_118 val_118 val_118 val_118 val_118 val_118
+119    val_119 val_119 val_119 val_119 val_119 val_119 val_119 val_119
+119    val_119 val_119 val_119 val_119 val_119 val_119 val_119 val_119
+119    val_119 val_119 val_119 val_119 val_119 val_119 val_119 val_119
+12     val_12  val_12  val_12  val_12  val_12  val_12  val_12  val_12
+12     val_12  val_12  val_12  val_12  val_12  val_12  val_12  val_12
+120    val_120 val_120 val_120 val_120 val_120 val_120 val_120 val_120
+120    val_120 val_120 val_120 val_120 val_120 val_120 val_120 val_120
+125    val_125 val_125 val_125 val_125 val_125 val_125 val_125 val_125
+125    val_125 val_125 val_125 val_125 val_125 val_125 val_125 val_125
+126    val_126 val_126 val_126 val_126 val_126 val_126 val_126 val_126
+128    val_128 val_128 val_128 val_128 val_128 val_128 val_128 val_128
+128    val_128 val_128 val_128 val_128 val_128 val_128 val_128 val_128
+128    val_128 val_128 val_128 val_128 val_128 val_128 val_128 val_128
+129    val_129 val_129 val_129 val_129 val_129 val_129 val_129 val_129
+129    val_129 val_129 val_129 val_129 val_129 val_129 val_129 val_129
+131    val_131 val_131 val_131 val_131 val_131 val_131 val_131 val_131
+133    val_133 val_133 val_133 val_133 val_133 val_133 val_133 val_133
+134    val_134 val_134 val_134 val_134 val_134 val_134 val_134 val_134
+134    val_134 val_134 val_134 val_134 val_134 val_134 val_134 val_134
+136    val_136 val_136 val_136 val_136 val_136 val_136 val_136 val_136
+137    val_137 val_137 val_137 val_137 val_137 val_137 val_137 val_137
+137    val_137 val_137 val_137 val_137 val_137 val_137 val_137 val_137
+138    val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138    val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138    val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138    val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+143    val_143 val_143 val_143 val_143 val_143 val_143 val_143 val_143
+145    val_145 val_145 val_145 val_145 val_145 val_145 val_145 val_145
+146    val_146 val_146 val_146 val_146 val_146 val_146 val_146 val_146
+146    val_146 val_146 val_146 val_146 val_146 val_146 val_146 val_146
+149    val_149 val_149 val_149 val_149 val_149 val_149 val_149 val_149
+149    val_149 val_149 val_149 val_149 val_149 val_149 val_149 val_149
+15     val_15  val_15  val_15  val_15  val_15  val_15  val_15  val_15
+15     val_15  val_15  val_15  val_15  val_15  val_15  val_15  val_15
+150    val_150 val_150 val_150 val_150 val_150 val_150 val_150 val_150
+152    val_152 val_152 val_152 val_152 val_152 val_152 val_152 val_152
+152    val_152 val_152 val_152 val_152 val_152 val_152 val_152 val_152
+153    val_153 val_153 val_153 val_153 val_153 val_153 val_153 val_153
+155    val_155 val_155 val_155 val_155 val_155 val_155 val_155 val_155
+156    val_156 val_156 val_156 val_156 val_156 val_156 val_156 val_156
+157    val_157 val_157 val_157 val_157 val_157 val_157 val_157 val_157
+158    val_158 val_158 val_158 val_158 val_158 val_158 val_158 val_158
+160    val_160 val_160 val_160 val_160 val_160 val_160 val_160 val_160
+162    val_162 val_162 val_162 val_162 val_162 val_162 val_162 val_162
+163    val_163 val_163 val_163 val_163 val_163 val_163 val_163 val_163
+164    val_164 val_164 val_164 val_164 val_164 val_164 val_164 val_164
+164    val_164 val_164 val_164 val_164 val_164 val_164 val_164 val_164
+165    val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165
+165    val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165
+166    val_166 val_166 val_166 val_166 val_166 val_166 val_166 val_166
+167    val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+167    val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+167    val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+168    val_168 val_168 val_168 val_168 val_168 val_168 val_168 val_168
+169    val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169    val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169    val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169    val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+17     val_17  val_17  val_17  val_17  val_17  val_17  val_17  val_17
+170    val_170 val_170 val_170 val_170 val_170 val_170 val_170 val_170
+172    val_172 val_172 val_172 val_172 val_172 val_172 val_172 val_172
+172    val_172 val_172 val_172 val_172 val_172 val_172 val_172 val_172
+174    val_174 val_174 val_174 val_174 val_174 val_174 val_174 val_174
+174    val_174 val_174 val_174 val_174 val_174 val_174 val_174 val_174
+175    val_175 val_175 val_175 val_175 val_175 val_175 val_175 val_175
+175    val_175 val_175 val_175 val_175 val_175 val_175 val_175 val_175
+176    val_176 val_176 val_176 val_176 val_176 val_176 val_176 val_176
+176    val_176 val_176 val_176 val_176 val_176 val_176 val_176 val_176
+177    val_177 val_177 val_177 val_177 val_177 val_177 val_177 val_177
+178    val_178 val_178 val_178 val_178 val_178 val_178 val_178 val_178
+179    val_179 val_179 val_179 val_179 val_179 val_179 val_179 val_179
+179    val_179 val_179 val_179 val_179 val_179 val_179 val_179 val_179
+18     val_18  val_18  val_18  val_18  val_18  val_18  val_18  val_18
+18     val_18  val_18  val_18  val_18  val_18  val_18  val_18  val_18
+180    val_180 val_180 val_180 val_180 val_180 val_180 val_180 val_180
+181    val_181 val_181 val_181 val_181 val_181 val_181 val_181 val_181
+183    val_183 val_183 val_183 val_183 val_183 val_183 val_183 val_183
+186    val_186 val_186 val_186 val_186 val_186 val_186 val_186 val_186
+187    val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+187    val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+187    val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+PREHOOK: query: -- flush for group-by
+explain
+select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: -- flush for group-by
+explain
+select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: concat(key, value, value, value, value, value, 
value, value, value, value) (type: string), key (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: double)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              TopN Hash Memory Usage: 2.0E-5
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: double)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 100
+            Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(key) as sum from src group by 
concat(key,value,value,value,value,value,value,value,value,value) order by sum 
limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0.0
+2.0
+4.0
+8.0
+9.0
+10.0
+11.0
+15.0
+17.0
+19.0
+20.0
+24.0
+27.0
+28.0
+30.0
+30.0
+33.0
+34.0
+36.0
+41.0
+43.0
+44.0
+47.0
+48.0
+52.0
+53.0
+54.0
+57.0
+64.0
+65.0
+66.0
+69.0
+74.0
+74.0
+77.0
+78.0
+80.0
+82.0
+84.0
+85.0
+86.0
+87.0
+92.0
+96.0
+102.0
+105.0
+105.0
+111.0
+114.0
+116.0
+116.0
+126.0
+131.0
+133.0
+134.0
+136.0
+143.0
+144.0
+145.0
+150.0
+152.0
+153.0
+155.0
+156.0
+157.0
+158.0
+160.0
+162.0
+163.0
+166.0
+166.0
+168.0
+168.0
+170.0
+177.0
+178.0
+180.0
+181.0
+183.0
+186.0
+189.0
+190.0
+190.0
+192.0
+194.0
+194.0
+196.0
+196.0
+200.0
+201.0
+202.0
+206.0
+208.0
+210.0
+214.0
+218.0
+222.0
+226.0
+226.0
+228.0

hive git commit: HIVE-14002: Extend limit propagation to subsequent RS operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to