[3/3] hive git commit: HIVE-20210 : Simple Fetch optimizer should lead to MapReduce when filter on non-partition column and conversion is minimal (Jeffery Yan via Ashutosh Chauhan)

hashutosh Mon, 30 Jul 2018 15:37:05 -0700

HIVE-20210 : Simple Fetch optimizer should lead to MapReduce when filter on 
non-partition column and conversion is minimal (Jeffery Yan via Ashutosh 
Chauhan)


Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/65f02d2f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/65f02d2f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/65f02d2f

Branch: refs/heads/master
Commit: 65f02d2f99b990cb28fd6a832fa3425042e60a04
Parents: 6fa9f63
Author: Jeffrey(Xilang) Yan <[email protected]>
Authored: Mon Jul 30 15:35:56 2018 -0700
Committer: Ashutosh Chauhan <[email protected]>
Committed: Mon Jul 30 15:35:56 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/SimpleFetchOptimizer.java |  31 +-
 .../test/queries/clientpositive/nonmr_fetch.q   |   4 +
 .../clientpositive/annotate_stats_part.q.out    | 117 ++++---
 .../clientpositive/cbo_rp_simple_select.q.out   | 232 +++++++++-----
 .../clientpositive/cbo_simple_select.q.out      | 232 +++++++++-----
 .../clientpositive/filter_in_or_dup.q.out       | 117 ++++---
 .../test/results/clientpositive/input42.q.out   | 148 ++++++---
 .../results/clientpositive/input_part9.q.out    |  75 +++--
 .../clientpositive/list_bucket_dml_1.q.out      |  72 +++--
 .../clientpositive/list_bucket_dml_11.q.out     |  72 +++--
 .../clientpositive/list_bucket_dml_12.q.out     | 144 ++++++---
 .../clientpositive/list_bucket_dml_13.q.out     |  72 +++--
 .../clientpositive/list_bucket_dml_2.q.out      |  72 +++--
 .../clientpositive/list_bucket_dml_3.q.out      |  72 +++--
 .../clientpositive/list_bucket_dml_4.q.out      |  72 +++--
 .../clientpositive/list_bucket_dml_5.q.out      |  75 +++--
 .../clientpositive/list_bucket_dml_6.q.out      |  75 +++--
 .../clientpositive/list_bucket_dml_7.q.out      |  75 +++--
 .../clientpositive/list_bucket_dml_8.q.out      |  75 +++--
 .../clientpositive/list_bucket_dml_9.q.out      |  72 +++--
 .../list_bucket_query_multiskew_1.q.out         | 288 ++++++++++++-----
 .../list_bucket_query_multiskew_2.q.out         | 316 +++++++++++++++----
 .../list_bucket_query_multiskew_3.q.out         | 216 +++++++++----
 .../list_bucket_query_oneskew_1.q.out           | 216 +++++++++----
 .../list_bucket_query_oneskew_2.q.out           | 144 ++++++---
 .../list_bucket_query_oneskew_3.q.out           | 166 ++++++++--
 .../results/clientpositive/nonmr_fetch.q.out    | 108 ++++++-
 .../results/clientpositive/ppr_pushdown3.q.out  |  39 ++-
 .../clientpositive/rand_partitionpruner3.q.out  | 142 ++++++---
 .../truncate_column_list_bucket.q.out           | 144 ++++++---
 .../results/clientpositive/union_view.q.out     | 117 ++++---
 31 files changed, 2814 insertions(+), 986 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index ffd47a2..89f6ee1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -211,11 +211,38 @@ public class SimpleFetchOptimizer extends Transform {
         bypassFilter = !pctx.getPrunedPartitions(alias, 
ts).hasUnknownPartitions();
       }
     }
-    if (!aggressive && !bypassFilter) {
+
+    boolean onlyPruningFilter = bypassFilter;
+    Operator<?> op = ts;
+    while (onlyPruningFilter) {
+      if (op instanceof FileSinkOperator || op.getChildOperators() == null) {
+        break;
+      } else if (op.getChildOperators().size() != 1) {
+        onlyPruningFilter = false;
+        break;
+      } else {
+        op = op.getChildOperators().get(0);
+      }
+
+      if (op instanceof FilterOperator) {
+        ExprNodeDesc predicate = ((FilterOperator) 
op).getConf().getPredicate();
+        if (predicate instanceof ExprNodeConstantDesc
+                && "boolean".equals(predicate.getTypeInfo().getTypeName())) {
+          continue;
+        } else if (PartitionPruner.onlyContainsPartnCols(table, predicate)) {
+          continue;
+        } else {
+          onlyPruningFilter = false;
+        }
+      }
+    }
+
+    if (!aggressive && !onlyPruningFilter) {
       return null;
     }
+
     PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
-    FetchData fetch = new FetchData(ts, parent, table, partitions, 
splitSample, bypassFilter);
+    FetchData fetch = new FetchData(ts, parent, table, partitions, 
splitSample, onlyPruningFilter);
     return checkOperators(fetch, aggressive, bypassFilter);
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/queries/clientpositive/nonmr_fetch.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nonmr_fetch.q 
b/ql/src/test/queries/clientpositive/nonmr_fetch.q
index 1b5ab56..5b7e61e 100644
--- a/ql/src/test/queries/clientpositive/nonmr_fetch.q
+++ b/ql/src/test/queries/clientpositive/nonmr_fetch.q
@@ -15,6 +15,10 @@ select * from srcpart where ds='2008-04-08' AND hr='11' 
limit 10;
 explain select key from src limit 10;
 select key from src limit 10;
 
+-- negative, filter on partition column and non-partition column
+explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10;
+select * from srcpart where ds='2008-04-08' AND key > 100 limit 10;
+
 -- negative, filter on non-partition column
 explain select * from srcpart where key > 100 limit 10;
 select * from srcpart where key > 100 limit 10;

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 29ef214..bafc6de 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -441,73 +441,112 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select locid from loc_orc_n4 where locid>0 and 
year='2001'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc_n4
+            filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Filter Operator
+              predicate: (locid > 0) (type: boolean)
+              Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: locid (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: loc_orc_n4
-          filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
-          Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-          Filter Operator
-            predicate: (locid > 0) (type: boolean)
-            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-            Select Operator
-              expressions: locid (type: int)
-              outputColumnNames: _col0
-              Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and 
year='2001'
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and 
year='2001'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc_n4
+            filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Filter Operator
+              predicate: (locid > 0) (type: boolean)
+              Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: locid (type: int), '2001' (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: loc_orc_n4
-          filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
-          Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-          Filter Operator
-            predicate: (locid > 0) (type: boolean)
-            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-            Select Operator
-              expressions: locid (type: int), '2001' (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: explain select * from (select locid,year from loc_orc_n4) test 
where locid>0 and year='2001'
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from (select locid,year from loc_orc_n4) 
test where locid>0 and year='2001'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc_n4
+            filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Filter Operator
+              predicate: (locid > 0) (type: boolean)
+              Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: locid (type: int), '2001' (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: loc_orc_n4
-          filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
-          Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-          Filter Operator
-            predicate: (locid > 0) (type: boolean)
-            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column 
stats: COMPLETE
-            Select Operator
-              expressions: locid (type: int), '2001' (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
index d12b5f6..2e7d796 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
@@ -746,25 +746,38 @@ PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN 
(cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = c_int) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = c_int) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = c_int) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = c_int) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
@@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = (2 * c_int)) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = (2 * c_int)) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = (2 * c_int)) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = (2 * c_int)) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is 0
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
@@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = c_int) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = c_int) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = c_int) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = c_int) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
@@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = null) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = null) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), null 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3624 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = null) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = null) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), null 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- rewrite to NULL
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 
2*cbo_t2.c_int)
@@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 
2*cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- no rewrite
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
@@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int) IN (c_int, 0) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, 0) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int) IN (c_int, 0) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: key, value, c_int, c_float, c_boolean, dt
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- no rewrite
 

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_simple_select.q.out 
b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
index 588d924..33f0e71 100644
--- a/ql/src/test/results/clientpositive/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
@@ -746,25 +746,38 @@ PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN 
(cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = c_int) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = c_int) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = c_int) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = c_int) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
@@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = (2 * c_int)) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = (2 * c_int)) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = (2 * c_int)) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = (2 * c_int)) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is 0
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
@@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = c_int) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = c_int) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = c_int) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = c_int) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
@@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int = null) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int = null) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), null 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3624 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int = null) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int = null) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), null 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- rewrite to NULL
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 
2*cbo_t2.c_int)
@@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 
2*cbo_t2.c_int)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- no rewrite
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
@@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite
 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t2
+            filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+            Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (c_int) IN (c_int, 0) (type: boolean)
+              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 3660 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: cbo_t2
-          filterExpr: (c_int) IN (c_int, 0) (type: boolean)
-          Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE 
Column stats: COMPLETE
-          Filter Operator
-            predicate: (c_int) IN (c_int, 0) (type: boolean)
-            Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-            Select Operator
-              expressions: key (type: string), value (type: string), c_int 
(type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-              Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE 
Column stats: COMPLETE
-              ListSink
+        ListSink
 
 PREHOOK: query: -- no rewrite
 

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out 
b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
index b821717..b50027d 100644
--- a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
+++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
@@ -11,25 +11,38 @@ WHERE (f.key = '1' OR f.key='2')
 AND f.key IN ('1', '2')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: f
+            filterExpr: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key) IN ('1', '2') (type: boolean)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 131 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: f
-          filterExpr: (key) IN ('1', '2') (type: boolean)
-          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
-          Filter Operator
-            predicate: (key) IN ('1', '2') (type: boolean)
-            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: EXPLAIN
 SELECT f.key
@@ -44,25 +57,38 @@ WHERE (f.key = '1' OR f.key = '2')
 AND f.key IN ('1', '2', '3')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: f
+            filterExpr: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key) IN ('1', '2') (type: boolean)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 131 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: f
-          filterExpr: (key) IN ('1', '2') (type: boolean)
-          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
-          Filter Operator
-            predicate: (key) IN ('1', '2') (type: boolean)
-            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: EXPLAIN
 SELECT f.key
@@ -77,23 +103,36 @@ WHERE (f.key = '1' OR f.key='2' OR f.key='3')
 AND f.key IN ('1', '2')
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: f
+            filterExpr: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key) IN ('1', '2') (type: boolean)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 10 Data size: 131 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: f
-          filterExpr: (key) IN ('1', '2') (type: boolean)
-          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
-          Filter Operator
-            predicate: (key) IN ('1', '2') (type: boolean)
-            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input42.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input42.q.out 
b/ql/src/test/results/clientpositive/input42.q.out
index df98800..98c2fd0 100644
--- a/ql/src/test/results/clientpositive/input42.q.out
+++ b/ql/src/test/results/clientpositive/input42.q.out
@@ -1143,14 +1143,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' 
AS STRING) AS `ds`, `hr`
 FROM `default`.`srcpart`
 WHERE `ds` = '2008-04-08' AND `key` < 200
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) 
(type: boolean)
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (UDFToDouble(key) < 200.0D) (type: boolean)
+              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3
+                        columns.types string:string:string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -1197,7 +1238,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+#### A masked pattern was here ####
           Partition
+            base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -1244,21 +1287,15 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+      Truncated Path -> Alias:
+        /srcpart/ds=2008-04-08/hr=11 [a]
+        /srcpart/ds=2008-04-08/hr=12 [a]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: a
-          filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) 
(type: boolean)
-          Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (UDFToDouble(key) < 200.0D) (type: boolean)
-            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200
 PREHOOK: type: QUERY
@@ -1660,14 +1697,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' 
AS STRING) AS `ds`, `hr`
 FROM `default`.`srcpart`
 WHERE `ds` = '2008-04-08' AND RAND(100) < 0.1
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (rand(100) < 0.1D) (type: boolean)
+              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3
+                        columns.types string:string:string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -1714,7 +1791,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+#### A masked pattern was here ####
           Partition
+            base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -1761,20 +1840,15 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+      Truncated Path -> Alias:
+        /srcpart/ds=2008-04-08/hr=11 [a]
+        /srcpart/ds=2008-04-08/hr=12 [a]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: a
-          Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (rand(100) < 0.1D) (type: boolean)
-            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) 
< 0.1
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input_part9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_part9.q.out 
b/ql/src/test/results/clientpositive/input_part9.q.out
index 9440167..5ecaeb1 100644
--- a/ql/src/test/results/clientpositive/input_part9.q.out
+++ b/ql/src/test/results/clientpositive/input_part9.q.out
@@ -8,14 +8,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS 
STRING) AS `ds`, `hr`
 FROM `default`.`srcpart`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: x
+            filterExpr: ((ds = '2008-04-08') and key is not null) (type: 
boolean)
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3
+                        columns.types string:string:string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -62,7 +103,9 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+#### A masked pattern was here ####
           Partition
+            base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -109,21 +152,15 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.srcpart
             name: default.srcpart
+      Truncated Path -> Alias:
+        /srcpart/ds=2008-04-08/hr=11 [x]
+        /srcpart/ds=2008-04-08/hr=12 [x]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: x
-          filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean)
-          Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: key is not null (type: boolean)
-            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string), value (type: string), 
'2008-04-08' (type: string), hr (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = 
'2008-04-08'
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
index d13edd6..226e778 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
@@ -409,14 +409,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, 
`value`
 FROM `default`.`list_bucketing_dynamic_part_n0`
 WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484'
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: list_bucketing_dynamic_part_n0
+            filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = 
'484')) (type: boolean)
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (key = '484') (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: '484' (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1
+                        columns.types string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: key=484
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             partition values:
@@ -463,21 +504,14 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.list_bucketing_dynamic_part_n0
             name: default.list_bucketing_dynamic_part_n0
+      Truncated Path -> Alias:
+        /list_bucketing_dynamic_part_n0/ds=2008-04-08/hr=11/key=484 
[list_bucketing_dynamic_part_n0]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: list_bucketing_dynamic_part_n0
-          filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) 
(type: boolean)
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (key = '484') (type: boolean)
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: '484' (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select key, value from list_bucketing_dynamic_part_n0 where 
ds='2008-04-08' and hr='11' and key = "484"
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
index 44b712b..11f7dc1 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
@@ -290,14 +290,55 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_466' AS STRING) AS 
`value`
 FROM `default`.`list_bucketing_static_part_n3`
 WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `value` = 'val_466'
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: list_bucketing_static_part_n3
+            filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 
'val_466')) (type: boolean)
+            Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (value = 'val_466') (type: boolean)
+              Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string), 'val_466' (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2406 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 250 Data size: 2406 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1
+                        columns.types string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: value=val_466
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -344,21 +385,14 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_static_part_n3
             name: default.list_bucketing_static_part_n3
+      Truncated Path -> Alias:
+        /list_bucketing_static_part_n3/ds=2008-04-08/hr=11/value=val_466 
[list_bucketing_static_part_n3]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: list_bucketing_static_part_n3
-          filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 
'val_466')) (type: boolean)
-          Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (value = 'val_466') (type: boolean)
-            Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string), 'val_466' (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select key, value from list_bucketing_static_part_n3 where 
ds='2008-04-08' and hr='11' and value = "val_466"
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
index f5e643e..f1c20e5 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
@@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS 
`col2`, `col3`, CAST('val
 FROM `default`.`list_bucketing_mul_col_n0`
 WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '466' AND `col4` = 
'val_466'
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: list_bucketing_mul_col_n0
+            filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
+            Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: ((col2 = '466') and (col4 = 'val_466')) (type: 
boolean)
+              Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: col1 (type: string), '466' (type: string), col3 
(type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' 
(type: string), '11' (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+                Statistics: Num rows: 125 Data size: 1578 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 125 Data size: 1578 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+                        columns.types 
string:string:string:string:string:string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: col4=val_466
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -352,21 +393,14 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_mul_col_n0
             name: default.list_bucketing_mul_col_n0
+      Truncated Path -> Alias:
+        /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=466/col4=val_466 
[list_bucketing_mul_col_n0]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: list_bucketing_mul_col_n0
-          filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
-          Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
-            Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: col1 (type: string), '466' (type: string), col3 
(type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' 
(type: string), '11' (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
-              Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select * from list_bucketing_mul_col_n0 
 where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
@@ -395,14 +429,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('382' AS STRING) AS 
`col2`, `col3`, CAST('val
 FROM `default`.`list_bucketing_mul_col_n0`
 WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '382' AND `col4` = 
'val_382'
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Partition Description:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: list_bucketing_mul_col_n0
+            filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
+            Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: ((col2 = '382') and (col4 = 'val_382')) (type: 
boolean)
+              Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: col1 (type: string), '382' (type: string), col3 
(type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' 
(type: string), '11' (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+                Statistics: Num rows: 125 Data size: 1578 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 125 Data size: 1578 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+                        columns.types 
string:string:string:string:string:string:string
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
           Partition
+            base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -449,21 +524,14 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_mul_col_n0
             name: default.list_bucketing_mul_col_n0
+      Truncated Path -> Alias:
+        
/list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
 [list_bucketing_mul_col_n0]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
       Processor Tree:
-        TableScan
-          alias: list_bucketing_mul_col_n0
-          filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
-          Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE 
Column stats: NONE
-          GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
-            Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: col1 (type: string), '382' (type: string), col3 
(type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' 
(type: string), '11' (type: string)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
-              Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE 
Column stats: NONE
-              ListSink
+        ListSink
 
 PREHOOK: query: select * from list_bucketing_mul_col_n0 
 where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"

[3/3] hive git commit: HIVE-20210 : Simple Fetch optimizer should lead to MapReduce when filter on non-partition column and conversion is minimal (Jeffery Yan via Ashutosh Chauhan)

Reply via email to