hive git commit: HIVE-13287: Add logic to estimate stats for IN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Fri, 15 Apr 2016 04:04:02 -0700

Repository: hive
Updated Branches:
  refs/heads/master 833a7d158 -> 3fec161da



HIVE-13287: Add logic to estimate stats for IN operator (Jesus Camacho 
Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3fec161d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3fec161d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3fec161d

Branch: refs/heads/master
Commit: 3fec161dad40860d493dff203f9da3925226bb8e
Parents: 833a7d1
Author: Jesus Camacho Rodriguez <[email protected]>
Authored: Wed Mar 23 21:00:41 2016 +0000
Committer: Jesus Camacho Rodriguez <[email protected]>
Committed: Fri Apr 15 12:01:26 2016 +0100

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java | 107 ++++++++++++++++++-
 .../clientpositive/filter_cond_pushdown.q.out   |   6 +-
 .../groupby_multi_single_reducer3.q.out         |   8 +-
 .../llap/dynamic_partition_pruning_2.q.out      |  30 +++---
 ql/src/test/results/clientpositive/pcs.q.out    |   6 +-
 .../results/clientpositive/perf/query17.q.out   |   8 +-
 .../results/clientpositive/perf/query29.q.out   |   8 +-
 .../results/clientpositive/perf/query46.q.out   |  10 +-
 .../results/clientpositive/perf/query89.q.out   |   4 +-
 .../results/clientpositive/pointlookup.q.out    |  12 +--
 .../results/clientpositive/pointlookup2.q.out   |  16 +--
 .../results/clientpositive/pointlookup3.q.out   |   8 +-
 .../spark/groupby_multi_single_reducer3.q.out   |   8 +-
 .../tez/dynamic_partition_pruning_2.q.out       |  30 +++---
 14 files changed, 180 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index c4fc5ca..320dc10 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -53,6 +54,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
@@ -64,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
@@ -76,19 +79,24 @@ import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 public class StatsRulesProcFactory {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(StatsRulesProcFactory.class.getName());
   private static final boolean isDebugEnabled = LOG.isDebugEnabled();
 
+
   /**
    * Collect basic statistics like number of rows, data size and column level 
statistics from the
    * table. Also sets the state of the available statistics. Basic and column 
statistics can have
@@ -299,7 +307,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException {
+        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException, SemanticException {
       long newNumRows = 0;
       Statistics andStats = null;
 
@@ -338,6 +346,9 @@ public class StatsRulesProcFactory {
               evaluatedRowCount = newNumRows;
             }
           }
+        } else if (udf instanceof GenericUDFIn) {
+          // for IN clause
+          newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop);
         } else if (udf instanceof GenericUDFOPNot) {
           newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
         } else if (udf instanceof GenericUDFOPNotNull) {
@@ -375,9 +386,97 @@ public class StatsRulesProcFactory {
       return newNumRows;
     }
 
+    private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, 
AnnotateStatsProcCtx aspCtx,
+            List<String> neededCols, FilterOperator fop) throws 
SemanticException {
+
+      long numRows = stats.getNumRows();
+
+      ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
+
+      // 1. It is an IN operator, check if it uses STRUCT
+      List<ExprNodeDesc> children = fd.getChildren();
+      List<ExprNodeDesc> columns = Lists.newArrayList();
+      List<ColStatistics> columnStats = Lists.newArrayList();
+      List<Set<ExprNodeDescEqualityWrapper>> values = Lists.newArrayList();
+      ExprNodeDesc columnsChild = children.get(0);
+      boolean multiColumn;
+      if (columnsChild instanceof ExprNodeGenericFuncDesc &&
+              ((ExprNodeGenericFuncDesc) columnsChild).getGenericUDF() 
instanceof GenericUDFStruct) {
+        for (int j = 0; j < columnsChild.getChildren().size(); j++) {
+          ExprNodeDesc columnChild = columnsChild.getChildren().get(j);
+          // If column is not column reference , we bail out
+          if (!(columnChild instanceof ExprNodeColumnDesc)) {
+            // Default
+            return numRows / 2;
+          }
+          columns.add(columnChild);
+          final String columnName = 
((ExprNodeColumnDesc)columnChild).getColumn();
+          // if column name is not contained in needed column list then it
+          // is a partition column. We do not need to evaluate partition 
columns
+          // in filter expression since it will be taken care by partition 
pruner
+          if (neededCols != null && !neededCols.contains(columnName)) {
+            // Default
+            return numRows / 2;
+          }
+          columnStats.add(stats.getColumnStatisticsFromColName(columnName));
+          values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet());
+        }
+        multiColumn = true;
+      } else {
+        // If column is not column reference , we bail out
+        if (!(columnsChild instanceof ExprNodeColumnDesc)) {
+          // Default
+          return numRows / 2;
+        }
+        columns.add(columnsChild);
+        final String columnName = 
((ExprNodeColumnDesc)columnsChild).getColumn();
+        // if column name is not contained in needed column list then it
+        // is a partition column. We do not need to evaluate partition columns
+        // in filter expression since it will be taken care by partition pruner
+        if (neededCols != null && !neededCols.contains(columnName)) {
+          // Default
+          return numRows / 2;
+        }
+        columnStats.add(stats.getColumnStatisticsFromColName(columnName));
+        values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet());
+        multiColumn = false;
+      }
+
+      // 2. Extract columns and values
+      for (int i = 1; i < children.size(); i++) {
+        ExprNodeDesc child = children.get(i);
+        // If value is not a constant, we bail out
+        if (!(child instanceof ExprNodeConstantDesc)) {
+          // Default
+          return numRows / 2;
+        }
+        if (multiColumn) {
+          ExprNodeConstantDesc constantChild = (ExprNodeConstantDesc) child;
+          List<?> items = (List<?>) 
constantChild.getWritableObjectInspector().getWritableConstantValue();
+          List<TypeInfo> structTypes = ((StructTypeInfo) 
constantChild.getTypeInfo()).getAllStructFieldTypeInfos();
+          for (int j = 0; j < structTypes.size(); j++) {
+            ExprNodeConstantDesc constant = new 
ExprNodeConstantDesc(structTypes.get(j), items.get(j));
+            values.get(j).add(new ExprNodeDescEqualityWrapper(constant));
+          }
+        } else {
+          values.get(0).add(new ExprNodeDescEqualityWrapper(child));
+        }
+      }
+
+      // 3. Calculate IN selectivity
+      float factor = 1;
+      for (int i = 0; i < columnStats.size(); i++) {
+        long dvs = columnStats.get(i) == null ? 0 : 
columnStats.get(i).getCountDistint();
+        // ( num of distinct vals for col / num of rows ) * num of distinct 
vals for col in IN clause
+        float columnFactor = dvs == 0 ? 0.5f : ((float)dvs / numRows) * 
values.get(i).size();
+        factor *= columnFactor;
+      }
+      return Math.round( (double)numRows * factor);
+    }
+
     private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator 
fop)
-        throws CloneNotSupportedException {
+        throws CloneNotSupportedException, SemanticException {
 
       long numRows = stats.getNumRows();
 
@@ -676,7 +775,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException {
+        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException, SemanticException {
 
       long numRows = stats.getNumRows();
 
@@ -761,7 +860,7 @@ public class StatsRulesProcFactory {
         } else if (udf instanceof GenericUDFOPNull) {
           return evaluateColEqualsNullExpr(stats, genFunc);
         } else if (udf instanceof GenericUDFOPAnd || udf instanceof 
GenericUDFOPOr
-            || udf instanceof GenericUDFOPNot) {
+            || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) {
           return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, 
evaluatedRowCount);
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out 
b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index f48a5a4..132b590 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -442,14 +442,14 @@ STAGE PLANS:
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
           Filter Operator
             predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) 
IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = 
'2008-04-08')) (type: boolean)
-            Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE 
Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 343 Data size: 3643 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 344 Data size: 3654 Basic stats: 
COMPLETE Column stats: NONE
                 table:
                     input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                     output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out 
b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
index 5362390..c5488de 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
           Filter Operator
             predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and 
(KEY._col0) IN (100, 150, 200)) (type: boolean)
-            Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE 
Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
@@ -93,7 +93,7 @@ STAGE PLANS:
                       name: default.e1
           Filter Operator
             predicate: ((VALUE._col0) IN ('val_400', 'val_500') and 
(KEY._col0) IN (400, 450)) (type: boolean)
-            Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE 
Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
@@ -404,7 +404,7 @@ STAGE PLANS:
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
           Filter Operator
             predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and 
(KEY._col0) IN (100, 150, 200)) (type: boolean)
-            Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE 
Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)
@@ -425,7 +425,7 @@ STAGE PLANS:
                       name: default.e1
           Filter Operator
             predicate: ((VALUE._col0) IN ('val_400', 'val_500') and 
(KEY._col0) IN (400, 450)) (type: boolean)
-            Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE 
Column stats: NONE
             Group By Operator
               aggregations: count()
               keys: KEY._col0 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
index 6f93b6a..db3b85d 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
@@ -208,31 +208,31 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                           Dynamic Partitioning Event Operator
                             Target column: dim_shops_id (int)
                             Target Input: agg
                             Partition key expr: dim_shops_id
-                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                             Target Vertex: Map 1
             Execution mode: llap
             LLAP IO: no inputs
@@ -382,16 +382,16 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -757,31 +757,31 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                           Dynamic Partitioning Event Operator
                             Target column: dim_shops_id (int)
                             Target Input: agg
                             Partition key expr: dim_shops_id
-                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                             Target Vertex: Map 1
             Execution mode: llap
             LLAP IO: no inputs

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out 
b/ql/src/test/results/clientpositive/pcs.q.out
index a1382f1..d6d2431 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -921,17 +921,17 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col0,_col8)) IN (const 
struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) 
(type: boolean)
-            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
               expressions: _col2 (type: string), _col6 (type: int)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
               File Output Operator
                 compressed: false
                 GlobalTableId: 0
 #### A masked pattern was here ####
                 NumFilesPerFileSink: 1
-                Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
                 table:
                     input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query17.q.out 
b/ql/src/test/results/clientpositive/perf/query17.q.out
index f98ed99..1b5a640 100644
--- a/ql/src/test/results/clientpositive/perf/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/query17.q.out
@@ -71,9 +71,9 @@ Stage-0
                                 <-Map 15 [SIMPLE_EDGE]
                                   SHUFFLE [RS_37]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_17] (rows=36524 
width=1119)
+                                    Select Operator [SEL_17] (rows=36525 
width=1119)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_95] (rows=36524 
width=1119)
+                                      Filter Operator [FIL_95] (rows=36525 
width=1119)
                                         predicate:((d_quarter_name) IN 
('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
                                         TableScan [TS_15] (rows=73049 
width=1119)
                                           
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"]
@@ -85,9 +85,9 @@ Stage-0
                                     <-Map 14 [SIMPLE_EDGE]
                                       SHUFFLE [RS_34]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_14] (rows=36524 
width=1119)
+                                        Select Operator [SEL_14] (rows=36525 
width=1119)
                                           Output:["_col0"]
-                                          Filter Operator [FIL_94] (rows=36524 
width=1119)
+                                          Filter Operator [FIL_94] (rows=36525 
width=1119)
                                             predicate:((d_quarter_name) IN 
('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
                                             TableScan [TS_12] (rows=73049 
width=1119)
                                               
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"]

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query29.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query29.q.out 
b/ql/src/test/results/clientpositive/perf/query29.q.out
index 0f4116a..39aca92 100644
--- a/ql/src/test/results/clientpositive/perf/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/query29.q.out
@@ -52,7 +52,7 @@ Stage-0
                         <-Reducer 7 [SIMPLE_EDGE]
                           SHUFFLE [RS_42]
                             PartitionCols:_col1
-                            Merge Join Operator [MERGEJOIN_102] (rows=44193 
width=1119)
+                            Merge Join Operator [MERGEJOIN_102] (rows=44194 
width=1119)
                               
Conds:RS_39._col3=RS_40._col0(Inner),Output:["_col1","_col5","_col10","_col14","_col24","_col25"]
                             <-Map 16 [SIMPLE_EDGE]
                               SHUFFLE [RS_40]
@@ -66,14 +66,14 @@ Stage-0
                             <-Reducer 6 [SIMPLE_EDGE]
                               SHUFFLE [RS_39]
                                 PartitionCols:_col3
-                                Merge Join Operator [MERGEJOIN_101] 
(rows=40176 width=1119)
+                                Merge Join Operator [MERGEJOIN_101] 
(rows=40177 width=1119)
                                   
Conds:RS_36._col11=RS_37._col0(Inner),Output:["_col1","_col3","_col5","_col10","_col14"]
                                 <-Map 15 [SIMPLE_EDGE]
                                   SHUFFLE [RS_37]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_17] (rows=36524 
width=1119)
+                                    Select Operator [SEL_17] (rows=36525 
width=1119)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_94] (rows=36524 
width=1119)
+                                      Filter Operator [FIL_94] (rows=36525 
width=1119)
                                         predicate:((d_year) IN (2000, 2001, 
2002) and d_date_sk is not null)
                                         TableScan [TS_15] (rows=73049 
width=1119)
                                           
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query46.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query46.q.out 
b/ql/src/test/results/clientpositive/perf/query46.q.out
index 2bd87aa..11804c9 100644
--- a/ql/src/test/results/clientpositive/perf/query46.q.out
+++ b/ql/src/test/results/clientpositive/perf/query46.q.out
@@ -83,7 +83,7 @@ Stage-0
                                     <-Reducer 4 [SIMPLE_EDGE]
                                       SHUFFLE [RS_24]
                                         PartitionCols:_col3
-                                        Merge Join Operator [MERGEJOIN_87] 
(rows=24305 width=1119)
+                                        Merge Join Operator [MERGEJOIN_87] 
(rows=24306 width=1119)
                                           
Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"]
                                         <-Map 12 [SIMPLE_EDGE]
                                           SHUFFLE [RS_22]
@@ -97,7 +97,7 @@ Stage-0
                                         <-Reducer 3 [SIMPLE_EDGE]
                                           SHUFFLE [RS_21]
                                             PartitionCols:_col2
-                                            Merge Join Operator [MERGEJOIN_86] 
(rows=22096 width=1119)
+                                            Merge Join Operator [MERGEJOIN_86] 
(rows=22097 width=1119)
                                               
Conds:RS_18._col4=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
                                             <-Map 11 [SIMPLE_EDGE]
                                               SHUFFLE [RS_19]
@@ -111,7 +111,7 @@ Stage-0
                                             <-Reducer 2 [SIMPLE_EDGE]
                                               SHUFFLE [RS_18]
                                                 PartitionCols:_col4
-                                                Merge Join Operator 
[MERGEJOIN_85] (rows=20088 width=1119)
+                                                Merge Join Operator 
[MERGEJOIN_85] (rows=20089 width=1119)
                                                   
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
                                                 <-Map 1 [SIMPLE_EDGE]
                                                   SHUFFLE [RS_15]
@@ -125,9 +125,9 @@ Stage-0
                                                 <-Map 10 [SIMPLE_EDGE]
                                                   SHUFFLE [RS_16]
                                                     PartitionCols:_col0
-                                                    Select Operator [SEL_5] 
(rows=18262 width=1119)
+                                                    Select Operator [SEL_5] 
(rows=18263 width=1119)
                                                       Output:["_col0"]
-                                                      Filter Operator [FIL_79] 
(rows=18262 width=1119)
+                                                      Filter Operator [FIL_79] 
(rows=18263 width=1119)
                                                         predicate:((d_dow) IN 
(6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null)
                                                         TableScan [TS_3] 
(rows=73049 width=1119)
                                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query89.q.out 
b/ql/src/test/results/clientpositive/perf/query89.q.out
index 75f7385..de91d9b 100644
--- a/ql/src/test/results/clientpositive/perf/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/query89.q.out
@@ -117,9 +117,9 @@ Stage-0
                                         <-Map 9 [SIMPLE_EDGE]
                                           SHUFFLE [RS_16]
                                             PartitionCols:_col0
-                                            Select Operator [SEL_8] 
(rows=36524 width=1119)
+                                            Select Operator [SEL_8] 
(rows=36525 width=1119)
                                               Output:["_col0","_col2"]
-                                              Filter Operator [FIL_49] 
(rows=36524 width=1119)
+                                              Filter Operator [FIL_49] 
(rows=36525 width=1119)
                                                 predicate:((d_year) IN (2000) 
and d_date_sk is not null)
                                                 TableScan [TS_6] (rows=73049 
width=1119)
                                                   
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out 
b/ql/src/test/results/clientpositive/pointlookup.q.out
index 460cc74..78dd7bc 100644
--- a/ql/src/test/results/clientpositive/pointlookup.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -111,14 +111,14 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (struct(key,value)) IN (const struct('0','8'), const 
struct('1','5'), const struct('2','6'), const struct('3','8'), const 
struct('4','1'), const struct('5','6'), const struct('6','1'), const 
struct('7','1'), const struct('8','1'), const struct('9','1'), const 
struct('10','3')) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -177,14 +177,14 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (struct(key,value)) IN (const struct('0','8'), const 
struct('1','5'), const struct('2','6'), const struct('3','8'), const 
struct('4','1'), const struct('5','6'), const struct('6','1'), const 
struct('7','1'), const struct('8','1'), const struct('9','1'), const 
struct('10','3')) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out 
b/ql/src/test/results/clientpositive/pointlookup2.q.out
index 869e4cd..6fc6e7f 100644
--- a/ql/src/test/results/clientpositive/pointlookup2.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -1169,7 +1169,7 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), 
const struct('2000-04-09',2)) (type: boolean)
-            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -1197,7 +1197,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 
(type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), 
_col3 (type: string)
               auto parallelism: false
@@ -1231,13 +1231,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), 
KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), 
KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1590,7 +1590,7 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
-            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column 
stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -1618,7 +1618,7 @@ STAGE PLANS:
               key expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
               value expressions: _col2 (type: string), _col4 (type: int), 
_col5 (type: string)
               auto parallelism: false
@@ -1652,13 +1652,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 
(type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), 
VALUE._col1 (type: int), VALUE._col2 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column 
stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out 
b/ql/src/test/results/clientpositive/pointlookup3.q.out
index e98ba76..2b25b39 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -1337,7 +1337,7 @@ STAGE PLANS:
           Filter Operator
             isSamplingPred: false
             predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), 
const struct('2000-04-09',2)) (type: boolean)
-            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -1365,7 +1365,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 
(type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), 
_col3 (type: string), _col6 (type: string), _col7 (type: string)
               auto parallelism: false
@@ -1399,13 +1399,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), 
KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), 
KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 
(type: string), VALUE._col4 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7
-          Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
index 7bb3ff2..982d719 100644
--- 
a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
@@ -78,7 +78,7 @@ STAGE PLANS:
                 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                 Filter Operator
                   predicate: ((VALUE._col0) IN ('val_100', 'val_200', 
'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
-                  Statistics: Num rows: 62 Data size: 658 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 63 Data size: 669 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
@@ -99,7 +99,7 @@ STAGE PLANS:
                             name: default.e1
                 Filter Operator
                   predicate: ((VALUE._col0) IN ('val_400', 'val_500') and 
(KEY._col0) IN (400, 450)) (type: boolean)
-                  Statistics: Num rows: 62 Data size: 658 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 63 Data size: 669 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
@@ -422,7 +422,7 @@ STAGE PLANS:
                 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                 Filter Operator
                   predicate: ((VALUE._col0) IN ('val_100', 'val_200', 
'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
-                  Statistics: Num rows: 62 Data size: 658 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 63 Data size: 669 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)
@@ -443,7 +443,7 @@ STAGE PLANS:
                             name: default.e1
                 Filter Operator
                   predicate: ((VALUE._col0) IN ('val_400', 'val_500') and 
(KEY._col0) IN (400, 450)) (type: boolean)
-                  Statistics: Num rows: 62 Data size: 658 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 63 Data size: 669 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     keys: KEY._col0 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out 
b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
index e129795..71b7ee3 100644
--- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
@@ -206,31 +206,31 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                           Dynamic Partitioning Event Operator
                             Target column: dim_shops_id (int)
                             Target Input: agg
                             Partition key expr: dim_shops_id
-                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                             Target Vertex: Map 1
         Reducer 2 
             Reduce Operator Tree:
@@ -374,16 +374,16 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
         Reducer 2 
             Reduce Operator Tree:
@@ -735,31 +735,31 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
                     predicate: ((label) IN ('foo', 'bar') and id is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), label (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
                       Select Operator
                         expressions: _col0 (type: int)
                         outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                           Dynamic Partitioning Event Operator
                             Target column: dim_shops_id (int)
                             Target Input: agg
                             Partition key expr: dim_shops_id
-                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            Statistics: Num rows: 2 Data size: 10 Basic stats: 
COMPLETE Column stats: NONE
                             Target Vertex: Map 1
         Reducer 2 
             Reduce Operator Tree:

hive git commit: HIVE-13287: Add logic to estimate stats for IN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to