[2/2] hive git commit: HIVE-17465 Statistics: Drill-down filters don't reduce row-counts progressively (Vineet Garg, reviewed by Ashutosh Chauhan)

vgarg Mon, 18 Sep 2017 13:21:21 -0700

HIVE-17465 Statistics: Drill-down filters don't reduce row-counts progressively 
(Vineet Garg, reviewed by Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20b84523
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20b84523
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20b84523

Branch: refs/heads/master
Commit: 20b84523d7ad277bd711d4dbe081cc5d2315a9dd
Parents: 527d13b
Author: Vineet Garg <vg...@apache.com>
Authored: Mon Sep 18 13:20:31 2017 -0700
Committer: Vineet Garg <vg...@apache.com>
Committed: Mon Sep 18 13:20:31 2017 -0700

----------------------------------------------------------------------
 data/files/filterCard.txt                       | 101 +++++++++++++++++++
 .../stats/annotation/StatsRulesProcFactory.java |  89 ++++++++--------
 .../annotate_stats_deep_filters.q.out           |   4 +-
 .../annotate_stats_join_pkfk.q.out              |  48 ++++-----
 .../clientpositive/llap/explainuser_1.q.out     |  54 +++++-----
 .../llap/vector_decimal_cast.q.out              |   4 +-
 .../clientpositive/llap/vector_if_expr.q.out    |  10 +-
 .../clientpositive/llap/vectorization_0.q.out   |  12 +--
 .../clientpositive/llap/vectorization_10.q.out  |   6 +-
 .../clientpositive/llap/vectorization_17.q.out  |  10 +-
 .../clientpositive/llap/vectorization_7.q.out   |  16 +--
 .../llap/vectorization_short_regress.q.out      |   8 +-
 .../llap/vectorized_nested_mapjoin.q.out        |  12 +--
 .../results/clientpositive/perf/query23.q.out   |   2 +-
 .../clientpositive/spark/vectorization_10.q.out |   6 +-
 .../clientpositive/spark/vectorization_12.q.out |  18 ++--
 .../clientpositive/spark/vectorization_17.q.out |  10 +-
 .../clientpositive/vectorization_10.q.out       |   6 +-
 .../clientpositive/vectorization_12.q.out       |  18 ++--
 .../clientpositive/vectorization_17.q.out       |  10 +-
 .../clientpositive/vectorization_7.q.out        |  16 +--
 21 files changed, 280 insertions(+), 180 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/data/files/filterCard.txt
----------------------------------------------------------------------
diff --git a/data/files/filterCard.txt b/data/files/filterCard.txt
new file mode 100644
index 0000000..6246bfb
--- /dev/null
+++ b/data/files/filterCard.txt
@@ -0,0 +1,101 @@
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+10,10,10
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20
+20,20,20

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 458e8b3..a4f60ac 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -282,7 +282,7 @@ public class StatsRulesProcFactory {
 
           // evaluate filter expression and update statistics
           long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
-              neededCols, fop, 0);
+              neededCols, fop, parentStats.getNumRows());
           Statistics st = parentStats.clone();
 
           if (satisfyPrecondition(parentStats)) {
@@ -320,13 +320,13 @@ public class StatsRulesProcFactory {
 
     protected long evaluateExpression(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        Operator<?> op, long evaluatedRowCount) throws 
CloneNotSupportedException, SemanticException {
+        Operator<?> op, long currNumRows) throws CloneNotSupportedException, 
SemanticException {
       long newNumRows = 0;
       Statistics andStats = null;
 
-      if (stats.getNumRows() <= 1 || stats.getDataSize() <= 0) {
+      if (currNumRows <= 1 || stats.getDataSize() <= 0) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Estimating row count for " + pred + " Original num rows: 
" + stats.getNumRows() +
+          LOG.debug("Estimating row count for " + pred + " Original num rows: 
" + currNumRows +
               " Original data size: " + stats.getDataSize() + " New num rows: 
1");
         }
         return 1;
@@ -342,41 +342,40 @@ public class StatsRulesProcFactory {
           aspCtx.setAndExprStats(andStats);
 
           // evaluate children
+          long evaluatedRowCount = currNumRows;
           for (ExprNodeDesc child : genFunc.getChildren()) {
-            newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child,
+            evaluatedRowCount = evaluateChildExpr(aspCtx.getAndExprStats(), 
child,
                 aspCtx, neededCols, op, evaluatedRowCount);
-            if (satisfyPrecondition(aspCtx.getAndExprStats())) {
-              updateStats(aspCtx.getAndExprStats(), newNumRows, true, op);
-            } else {
-              updateStats(aspCtx.getAndExprStats(), newNumRows, false, op);
-            }
+          }
+          newNumRows = evaluatedRowCount;
+          if (satisfyPrecondition(aspCtx.getAndExprStats())) {
+            updateStats(aspCtx.getAndExprStats(), newNumRows, true, op);
+          } else {
+            updateStats(aspCtx.getAndExprStats(), newNumRows, false, op);
           }
         } else if (udf instanceof GenericUDFOPOr) {
           // for OR condition independently compute and update stats.
           for (ExprNodeDesc child : genFunc.getChildren()) {
-            // early exit if OR evaluation yields more rows than input rows
-            if (evaluatedRowCount >= stats.getNumRows()) {
-              evaluatedRowCount = stats.getNumRows();
-            } else {
               newNumRows = StatsUtils.safeAdd(
-                  evaluateChildExpr(stats, child, aspCtx, neededCols, op, 
evaluatedRowCount),
+                  evaluateChildExpr(stats, child, aspCtx, neededCols, op, 
currNumRows),
                   newNumRows);
-              evaluatedRowCount = newNumRows;
-            }
+          }
+          if(newNumRows > currNumRows) {
+            newNumRows = currNumRows;
           }
         } else if (udf instanceof GenericUDFIn) {
           // for IN clause
-          newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, op);
+          newNumRows = evaluateInExpr(stats, pred, currNumRows, aspCtx, 
neededCols, op);
         } else if (udf instanceof GenericUDFBetween) {
           // for BETWEEN clause
-          newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, 
op);
+          newNumRows = evaluateBetweenExpr(stats, pred, currNumRows, aspCtx, 
neededCols, op);
         } else if (udf instanceof GenericUDFOPNot) {
-          newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, op);
+          newNumRows = evaluateNotExpr(stats, pred, currNumRows, aspCtx, 
neededCols, op);
         } else if (udf instanceof GenericUDFOPNotNull) {
-          return evaluateNotNullExpr(stats, genFunc);
+          return evaluateNotNullExpr(stats, genFunc, currNumRows);
         } else {
           // single predicate condition
-          newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, 
evaluatedRowCount);
+          newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, 
op,currNumRows);
         }
       } else if (pred instanceof ExprNodeColumnDesc) {
 
@@ -415,10 +414,10 @@ public class StatsRulesProcFactory {
       return newNumRows;
     }
 
-    private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, 
AnnotateStatsProcCtx aspCtx,
+    private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long 
currNumRows, AnnotateStatsProcCtx aspCtx,
             List<String> neededCols, Operator<?> op) throws SemanticException {
 
-      long numRows = stats.getNumRows();
+      long numRows = currNumRows;
 
       ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
 
@@ -505,7 +504,7 @@ public class StatsRulesProcFactory {
       return Math.round( (double) numRows * factor * inFactor);
     }
 
-    private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, 
AnnotateStatsProcCtx aspCtx,
+    private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long 
currNumRows, AnnotateStatsProcCtx aspCtx,
             List<String> neededCols, Operator<?> op) throws SemanticException, 
CloneNotSupportedException {
       final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
       final boolean invert = Boolean.TRUE.equals(
@@ -517,7 +516,7 @@ public class StatsRulesProcFactory {
       // Short circuit and return the current number of rows if this is a
       // synthetic predicate with dynamic values
       if (leftExpression instanceof ExprNodeDynamicValueDesc) {
-        return stats.getNumRows();
+        return currNumRows;
       }
 
       // We transform the BETWEEN clause to AND clause (with NOT on top in 
invert is true).
@@ -534,14 +533,14 @@ public class StatsRulesProcFactory {
           new GenericUDFOPNot(), Lists.newArrayList(newExpression));
       }
 
-      return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 
0);
+      return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 
currNumRows);
     }
 
-    private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
+    private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long 
currNumRows,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op)
         throws CloneNotSupportedException, SemanticException {
 
-      long numRows = stats.getNumRows();
+      long numRows = currNumRows;
 
       // if the evaluate yields true then pass all rows else pass 0 rows
       if (pred instanceof ExprNodeGenericFuncDesc) {
@@ -553,7 +552,7 @@ public class StatsRulesProcFactory {
             long newNumRows = 0;
             for (ExprNodeDesc child : genFunc.getChildren()) {
               newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols,
-                  op, 0);
+                  op, numRows);
             }
             return numRows - newNumRows;
           } else if (leaf instanceof ExprNodeConstantDesc) {
@@ -585,9 +584,9 @@ public class StatsRulesProcFactory {
       return numRows / 2;
     }
 
-    private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc 
pred) {
+    private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc 
pred, long currNumRows) {
 
-      long numRows = stats.getNumRows();
+      long numRows = currNumRows;
 
       if (pred instanceof ExprNodeGenericFuncDesc) {
 
@@ -609,9 +608,9 @@ public class StatsRulesProcFactory {
       return numRows / 2;
     }
 
-    private long evaluateNotNullExpr(Statistics parentStats, 
ExprNodeGenericFuncDesc pred) {
+    private long evaluateNotNullExpr(Statistics parentStats, 
ExprNodeGenericFuncDesc pred, long currNumRows) {
       long noOfNulls = getMaxNulls(parentStats, pred);
-      long parentCardinality = parentStats.getNumRows();
+      long parentCardinality = currNumRows;
       long newPredCardinality = parentCardinality;
 
       if (parentCardinality > noOfNulls) {
@@ -661,8 +660,8 @@ public class StatsRulesProcFactory {
       return maxNoNulls;
     }
 
-    private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc 
genFunc) {
-      long numRows = stats.getNumRows();
+    private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc 
genFunc, long currNumRows) {
+      long numRows = currNumRows;
       GenericUDF udf = genFunc.getGenericUDF();
 
       ExprNodeColumnDesc columnDesc;
@@ -838,9 +837,9 @@ public class StatsRulesProcFactory {
 
     private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        Operator<?> op, long evaluatedRowCount) throws 
CloneNotSupportedException, SemanticException {
+        Operator<?> op, long currNumRows) throws CloneNotSupportedException, 
SemanticException {
 
-      long numRows = stats.getNumRows();
+      long numRows = currNumRows;
 
       if (child instanceof ExprNodeGenericFuncDesc) {
 
@@ -917,15 +916,15 @@ public class StatsRulesProcFactory {
                 || udf instanceof GenericUDFOPEqualOrLessThan
                 || udf instanceof GenericUDFOPGreaterThan
                 || udf instanceof GenericUDFOPLessThan) {
-          return evaluateComparator(stats, genFunc);
+          return evaluateComparator(stats, genFunc, numRows);
         } else if (udf instanceof GenericUDFOPNotNull) {
-          return evaluateNotNullExpr(stats, genFunc);
+          return evaluateNotNullExpr(stats, genFunc, numRows);
         } else if (udf instanceof GenericUDFOPNull) {
-          return evaluateColEqualsNullExpr(stats, genFunc);
+          return evaluateColEqualsNullExpr(stats, genFunc, numRows);
         } else if (udf instanceof GenericUDFOPAnd || udf instanceof 
GenericUDFOPOr
                 || udf instanceof GenericUDFIn || udf instanceof 
GenericUDFBetween
                 || udf instanceof GenericUDFOPNot) {
-          return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, 
evaluatedRowCount);
+          return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, 
numRows);
         } else if (udf instanceof GenericUDFInBloomFilter) {
           if (genFunc.getChildren().get(1) instanceof 
ExprNodeDynamicValueDesc) {
             // Synthetic predicates from semijoin opt should not affect stats.
@@ -936,7 +935,7 @@ public class StatsRulesProcFactory {
         if (Boolean.FALSE.equals(((ExprNodeConstantDesc) child).getValue())) {
           return 0;
         } else {
-          return stats.getNumRows();
+          return numRows;
         }
       }
 
@@ -1584,7 +1583,7 @@ public class StatsRulesProcFactory {
           // evaluate filter expression and update statistics
           try {
             newNumRows = evaluateExpression(stats, pred,
-                aspCtx, jop.getSchema().getColumnNames(), jop, 0);
+                aspCtx, jop.getSchema().getColumnNames(), jop, 
stats.getNumRows());
           } catch (CloneNotSupportedException e) {
             throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
           }
@@ -1673,7 +1672,7 @@ public class StatsRulesProcFactory {
           // evaluate filter expression and update statistics
           try {
             newNumRows = evaluateExpression(wcStats, pred,
-                aspCtx, jop.getSchema().getColumnNames(), jop, 0);
+                aspCtx, jop.getSchema().getColumnNames(), jop, 
wcStats.getNumRows());
           } catch (CloneNotSupportedException e) {
             throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
index 6e2975e..da1d03c 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
@@ -121,9 +121,9 @@ STAGE PLANS:
             Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
               predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or 
((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or 
((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or 
((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 
13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and 
(si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) 
and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 
52) and (si = 53))) (type: boolean)
-              Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
-                Statistics: Num rows: 160 Data size: 1280 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
index e04c1c6..d88819a 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
@@ -577,16 +577,16 @@ STAGE PLANS:
             Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
               predicate: ((ss_quantity > 10) and ss_store_sk is not null) 
(type: boolean)
-              Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
                 expressions: ss_store_sk (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -595,10 +595,10 @@ STAGE PLANS:
             0 _col0 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col0
-          Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE 
Column stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -707,16 +707,16 @@ STAGE PLANS:
             Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
               predicate: ((ss_quantity > 10) and ss_store_sk is not null) 
(type: boolean)
-              Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
                 expressions: ss_store_sk (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -725,10 +725,10 @@ STAGE PLANS:
             0 _col0 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col0
-          Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: COMPLETE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1015,16 +1015,16 @@ STAGE PLANS:
             Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
               predicate: ((ss_quantity > 10) and ss_store_sk is not null) 
(type: boolean)
-              Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
                 expressions: ss_store_sk (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 321 Data size: 2468 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 297 Data size: 2284 Basic stats: 
COMPLETE Column stats: COMPLETE
           TableScan
             alias: s
             Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
@@ -1065,14 +1065,14 @@ STAGE PLANS:
             1 _col0 (type: int)
             2 _col0 (type: int)
           outputColumnNames: _col2
-          Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col2 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1102,16 +1102,16 @@ STAGE PLANS:
             Statistics: Num rows: 1000 Data size: 7664 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
               predicate: (ss_store_sk is not null and ss_addr_sk is not null) 
(type: boolean)
-              Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
                 expressions: ss_addr_sk (type: int), ss_store_sk (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 916 Data size: 7020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 914 Data size: 7004 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col1 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col1 (type: int)
-                  Statistics: Num rows: 916 Data size: 7020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 914 Data size: 7004 Basic stats: 
COMPLETE Column stats: COMPLETE
                   value expressions: _col0 (type: int)
           TableScan
             alias: s
@@ -1136,7 +1136,7 @@ STAGE PLANS:
             0 _col1 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col0, _col2
-          Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE 
Column stats: COMPLETE
           File Output Operator
             compressed: false
             table:
@@ -1152,7 +1152,7 @@ STAGE PLANS:
               key expressions: _col0 (type: int)
               sort order: +
               Map-reduce partition columns: _col0 (type: int)
-              Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE 
Column stats: COMPLETE
               value expressions: _col2 (type: int)
           TableScan
             alias: ca
@@ -1177,14 +1177,14 @@ STAGE PLANS:
             0 _col0 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col2
-          Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col2 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 48c02a2..03ebe37 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -388,25 +388,25 @@ Stage-0
                             <-Map 5 [SIMPLE_EDGE] llap
                               SHUFFLE [RS_7]
                                 PartitionCols:_col0, _col1, _col2
-                                Group By Operator [GBY_6] (rows=2 width=101)
+                                Group By Operator [GBY_6] (rows=1 width=101)
                                   
Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, 
c_int, c_float
-                                  Filter Operator [FIL_39] (rows=5 width=93)
+                                  Filter Operator [FIL_39] (rows=4 width=93)
                                     predicate:(((c_int + 1) >= 0) and ((c_int 
> 0) or (c_float >= 0)) and key is not null)
                                     TableScan [TS_3] (rows=20 width=88)
                                       
default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                       <-Reducer 8 [SIMPLE_EDGE] llap
                         SHUFFLE [RS_21]
                           PartitionCols:_col0
-                          Select Operator [SEL_17] (rows=2 width=89)
+                          Select Operator [SEL_17] (rows=1 width=89)
                             Output:["_col0","_col1"]
-                            Group By Operator [GBY_16] (rows=2 width=93)
+                            Group By Operator [GBY_16] (rows=1 width=93)
                               Output:["_col0","_col1","_col2"],keys:KEY._col0, 
KEY._col1, KEY._col2
                             <-Map 7 [SIMPLE_EDGE] llap
                               SHUFFLE [RS_15]
                                 PartitionCols:_col0, _col1, _col2
-                                Group By Operator [GBY_14] (rows=2 width=93)
+                                Group By Operator [GBY_14] (rows=1 width=93)
                                   Output:["_col0","_col1","_col2"],keys:key, 
c_int, c_float
-                                  Filter Operator [FIL_40] (rows=5 width=93)
+                                  Filter Operator [FIL_40] (rows=4 width=93)
                                     predicate:(((c_int + 1) >= 0) and ((c_int 
> 0) or (c_float >= 0)) and key is not null)
                                     TableScan [TS_11] (rows=20 width=88)
                                       
default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1088,25 +1088,25 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_14]
-        Select Operator [SEL_13] (rows=40 width=101)
+        Select Operator [SEL_13] (rows=48 width=101)
           Output:["_col0","_col1","_col2","_col3","_col4"]
-          Merge Join Operator [MERGEJOIN_24] (rows=40 width=101)
+          Merge Join Operator [MERGEJOIN_24] (rows=48 width=101)
             
Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual
 filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))}
           <-Map 1 [SIMPLE_EDGE] llap
             SHUFFLE [RS_9]
               PartitionCols:_col0
-              Select Operator [SEL_2] (rows=9 width=93)
+              Select Operator [SEL_2] (rows=8 width=93)
                 Output:["_col0","_col1","_col2"]
-                Filter Operator [FIL_21] (rows=9 width=93)
+                Filter Operator [FIL_21] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_0] (rows=20 width=88)
                     
default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
           <-Map 3 [SIMPLE_EDGE] llap
             SHUFFLE [RS_10]
               PartitionCols:_col0
-              Select Operator [SEL_5] (rows=9 width=89)
+              Select Operator [SEL_5] (rows=8 width=89)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_22] (rows=9 width=93)
+                Filter Operator [FIL_22] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_3] (rows=20 width=88)
                     
default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1135,25 +1135,25 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_14]
-        Select Operator [SEL_13] (rows=40 width=101)
+        Select Operator [SEL_13] (rows=48 width=101)
           Output:["_col0","_col1","_col2","_col3","_col4"]
-          Merge Join Operator [MERGEJOIN_24] (rows=40 width=101)
+          Merge Join Operator [MERGEJOIN_24] (rows=48 width=101)
             
Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual
 filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))}
           <-Map 1 [SIMPLE_EDGE] llap
             SHUFFLE [RS_9]
               PartitionCols:_col0
-              Select Operator [SEL_2] (rows=9 width=93)
+              Select Operator [SEL_2] (rows=8 width=93)
                 Output:["_col0","_col1","_col2"]
-                Filter Operator [FIL_21] (rows=9 width=93)
+                Filter Operator [FIL_21] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_0] (rows=20 width=88)
                     
default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
           <-Map 3 [SIMPLE_EDGE] llap
             SHUFFLE [RS_10]
               PartitionCols:_col0
-              Select Operator [SEL_5] (rows=9 width=89)
+              Select Operator [SEL_5] (rows=8 width=89)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_22] (rows=9 width=93)
+                Filter Operator [FIL_22] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_3] (rows=20 width=88)
                     
default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1416,16 +1416,16 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=9 width=4)
+        Select Operator [SEL_11] (rows=8 width=4)
           Output:["_col0"]
-          Merge Join Operator [MERGEJOIN_17] (rows=9 width=4)
+          Merge Join Operator [MERGEJOIN_17] (rows=8 width=4)
             Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"]
           <-Map 1 [SIMPLE_EDGE] llap
             SHUFFLE [RS_8]
               PartitionCols:_col0
-              Select Operator [SEL_2] (rows=9 width=93)
+              Select Operator [SEL_2] (rows=8 width=93)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_15] (rows=9 width=93)
+                Filter Operator [FIL_15] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_0] (rows=20 width=88)
                     
default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1463,20 +1463,20 @@ Stage-0
           <-Map 1 [SIMPLE_EDGE] llap
             SHUFFLE [RS_13]
               PartitionCols:_col0
-              Select Operator [SEL_2] (rows=9 width=93)
+              Select Operator [SEL_2] (rows=8 width=93)
                 Output:["_col0","_col1","_col2"]
-                Filter Operator [FIL_25] (rows=9 width=93)
+                Filter Operator [FIL_25] (rows=8 width=93)
                   predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 
0)) and key is not null)
                   TableScan [TS_0] (rows=20 width=88)
                     
default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
           <-Map 3 [SIMPLE_EDGE] llap
             SHUFFLE [RS_14]
               PartitionCols:_col0
-              Group By Operator [GBY_10] (rows=4 width=85)
+              Group By Operator [GBY_10] (rows=3 width=85)
                 Output:["_col0"],keys:_col0
-                Select Operator [SEL_5] (rows=9 width=85)
+                Select Operator [SEL_5] (rows=8 width=85)
                   Output:["_col0"]
-                  Filter Operator [FIL_26] (rows=9 width=93)
+                  Filter Operator [FIL_26] (rows=8 width=93)
                     predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float 
>= 0)) and key is not null)
                     TableScan [TS_3] (rows=20 width=88)
                       
default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out
index c45210e..8fc23c4 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out
@@ -29,7 +29,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> 
boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) 
-> boolean) -> boolean
                     predicate: (cdouble is not null and cint is not null and 
cboolean1 is not null and ctimestamp1 is not null) (type: boolean)
-                    Statistics: Num rows: 5112 Data size: 265564 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2945 Data size: 152996 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cdouble (type: double), cint (type: int), 
cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS 
decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: 
decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( 
ctimestamp1 AS decimal(15,0)) (type: decimal(15,0))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
@@ -38,7 +38,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15]
                           selectExpressions: CastDoubleToDecimal(col 5) -> 
12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), 
CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 
15:decimal(15,0)
-                      Statistics: Num rows: 5112 Data size: 2410700 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2945 Data size: 1388804 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Limit
                         Number of rows: 10
                         Limit Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out 
b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
index 32d1001..b1e0b14 100644
--- a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out
@@ -34,7 +34,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> 
boolean) -> boolean
                     predicate: (cboolean1 and cboolean1 is not null) (type: 
boolean)
-                    Statistics: Num rows: 4587 Data size: 13704 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3030 Data size: 9052 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cboolean1 (type: boolean), if(cboolean1, 
'first', 'second') (type: string)
                       outputColumnNames: _col0, _col1
@@ -43,7 +43,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [10, 12]
                           selectExpressions: 
IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String
-                      Statistics: Num rows: 4587 Data size: 857712 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3030 Data size: 566572 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: boolean)
                         sort order: +
@@ -51,7 +51,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 4587 Data size: 857712 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3030 Data size: 566572 Basic 
stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -80,13 +80,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1]
-                Statistics: Num rows: 4587 Data size: 857712 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 3030 Data size: 566572 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 4587 Data size: 857712 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3030 Data size: 566572 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
index db76eaa..2307843 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
@@ -30611,17 +30611,17 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 
47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean)
-                    Statistics: Num rows: 6 Data size: 1630 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 6 Data size: 1630 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 6 Data size: 1630 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
 #### A masked pattern was here ####
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30841,17 +30841,17 @@ STAGE PLANS:
                   Filter Operator
                     isSamplingPred: false
                     predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 
47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean)
-                    Statistics: Num rows: 27 Data size: 6990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 27 Data size: 6990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 27 Data size: 6990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
 #### A masked pattern was here ####
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out
index f06c2db..1a0e846 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out
@@ -73,7 +73,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: 
FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 
5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, 
FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: 
CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 
6981.0) -> boolean, FilterExprOrExpr(children: 
FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: 
CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, 
FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> 
boolean) -> boolean
                     predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) 
> cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 
6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 
like '%a')))) (type: boolean)
-                    Statistics: Num rows: 5461 Data size: 1107444 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 9557 Data size: 1937820 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cdouble (type: double), ctimestamp1 (type: 
timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 
(type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) 
(type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- 
cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), 
(UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: 
double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: 
bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- 
(- cdouble)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -82,13 +82,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 
17, 19, 20, 18, 22, 23, 25]
                           selectExpressions: DoubleColUnaryMinus(col 5) -> 
12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: 
CastLongToDouble(col 1) -> 15:double) -> 16:double, 
DoubleColModuloDoubleScalar(col 17, val 33.0)(children: 
DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 
15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, 
DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 
18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) 
-> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, 
LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, 
DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: 
DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 
23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: 
DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double
-                      Statistics: Num rows: 5461 Data size: 1082056 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9557 Data size: 1893568 Basic 
stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 5461 Data size: 1082056 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 9557 Data size: 1893568 Basic 
stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
index 1c07962..a66ea36 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
@@ -76,7 +76,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, 
FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 
988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val 
-863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) 
-> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 
0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 
3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 
5)(children: col 4) -> boolean) -> boolean) -> boolean
                     predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or 
(CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or 
(UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: 
boolean)
-                    Statistics: Num rows: 4778 Data size: 640688 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 4096 Data size: 549274 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cfloat (type: float), cstring1 (type: 
string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: 
double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) 
(type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: 
double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: 
double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: 
double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: 
decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
@@ -85,7 +85,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 
13, 16, 18, 19, 21, 17]
                           selectExpressions: DoubleColDivideDoubleColumn(col 
4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, 
LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, 
DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 
17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, 
CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, 
DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 
17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: 
DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, 
DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: 
CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), 
DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: 
DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) 
-> 22:double) -> 17:double
-                      Statistics: Num rows: 4778 Data size: 1414848 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 4096 Data size: 1212930 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col5 (type: bigint), _col0 (type: 
float)
                         sort order: ++
@@ -95,7 +95,7 @@ STAGE PLANS:
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 
21, 17]
-                        Statistics: Num rows: 4778 Data size: 1414848 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 4096 Data size: 1212930 Basic 
stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), 
_col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: 
double)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -136,13 +136,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 
11, 12, 13]
-                Statistics: Num rows: 4778 Data size: 1414848 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 4096 Data size: 1212930 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 4778 Data size: 1414848 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 4096 Data size: 1212930 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
index 6c32ccf..9e13ea6 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
@@ -82,7 +82,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, 
FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 
0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, 
FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, 
FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, 
FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 
5) -> boolean, FilterExprAndExpr(children: 
FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> 
boolean) -> boolean) -> boolean
                     predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) 
<= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and 
((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= 
cdouble)))) (type: boolean)
-                    Statistics: Num rows: 7281 Data size: 1789382 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5461 Data size: 1342196 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cboolean1 (type: boolean), cbigint (type: 
bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 
(type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), 
(UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), 
(- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), 
(cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % 
UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- 
ctinyint) % ctinyint) (type: tinyint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -91,7 +91,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 
15, 16, 18, 19, 17, 20, 22]
                           selectExpressions: LongColAddLongColumn(col 3, col 
3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 
14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 
16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, 
LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, 
LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, 
LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 
0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long
-                      Statistics: Num rows: 7281 Data size: 1231410 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5461 Data size: 923616 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: boolean), _col1 (type: 
bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: 
timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), 
_col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 
(type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: 
tinyint)
                         sort order: +++++++++++++++
@@ -101,7 +101,7 @@ STAGE PLANS:
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             valueColumns: []
-                        Statistics: Num rows: 7281 Data size: 1231410 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 5461 Data size: 923616 Basic 
stats: COMPLETE Column stats: COMPLETE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -142,7 +142,7 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 9, 14]
-                Statistics: Num rows: 7281 Data size: 1231410 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 5461 Data size: 923616 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 25
                   Limit Vectorization:
@@ -331,7 +331,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, 
FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 
0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, 
FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, 
FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, 
FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 
5) -> boolean, FilterExprAndExpr(children: 
FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> 
boolean) -> boolean) -> boolean
                     predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) 
<= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and 
((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and 
(3569.0 >= cdouble)))) (type: boolean)
-                    Statistics: Num rows: 7281 Data size: 1789382 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5461 Data size: 1342196 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cboolean1 (type: boolean), cbigint (type: 
bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 
(type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), 
(UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), 
(- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), 
(cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % 
UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- 
ctinyint) % ctinyint) (type: tinyint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -340,7 +340,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 
15, 16, 18, 19, 17, 20, 22]
                           selectExpressions: LongColAddLongColumn(col 3, col 
3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 
14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 
16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, 
LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, 
LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, 
LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 
0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long
-                      Statistics: Num rows: 7281 Data size: 1231410 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5461 Data size: 923616 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: boolean), _col1 (type: 
bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: 
timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), 
_col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 
(type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: 
tinyint)
                         sort order: +++++++++++++++
@@ -348,7 +348,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 7281 Data size: 1231410 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 5461 Data size: 923616 Basic 
stats: COMPLETE Column stats: COMPLETE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -377,7 +377,7 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 9, 14]
-                Statistics: Num rows: 7281 Data size: 1231410 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 5461 Data size: 923616 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 25
                   Limit Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 66764cf..fe4a30f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -622,7 +622,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: 
FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, 
FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, 
FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, 
FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 
3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) 
-> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 
10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, 
FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) 
-> boolean
                     predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) 
or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) 
or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) 
(type: boolean)
-                    Statistics: Num rows: 12288 Data size: 3093170 Basic 
stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 11346 Data size: 2856120 Basic 
stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cint, cbigint, 
cdouble
@@ -630,7 +630,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 3, 5]
-                      Statistics: Num rows: 12288 Data size: 3093170 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 11346 Data size: 2856120 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: var_pop(cbigint), count(), 
max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), 
count(ctinyint), avg(ctinyint)
                         Group By Vectorization:
@@ -3206,7 +3206,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: 
FilterExprOrExpr(children: FilterExprAndExpr(children: 
FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 
1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> 
boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: 
CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val 
-257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, 
FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, 
FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, 
FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 
0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> 
boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean
                     predicate: ((((cdouble < UDFToDouble(csmallint)) and 
(cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or 
((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 
regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) 
and cboolean1 is not null) (type: boolean)
-                    Statistics: Num rows: 7845 Data size: 1661020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 7153 Data size: 1514550 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cboolean1 (type: boolean)
                       outputColumnNames: ctinyint, csmallint, cint, cbigint, 
cfloat, cdouble, cboolean1
@@ -3214,7 +3214,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10]
-                      Statistics: Num rows: 7845 Data size: 1661020 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 7153 Data size: 1514550 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: max(cfloat), sum(cbigint), 
var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), 
stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
index 2a95065..eb6eabe 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
@@ -40,11 +40,11 @@ STAGE PLANS:
                         outputColumnNames: _col2, _col3
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 661228 Data size: 7913928 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 585044 Data size: 7002120 Basic 
stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col2 (type: smallint), _col3 (type: 
double)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 661228 Data size: 7913928 
Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 585044 Data size: 7002120 
Basic stats: COMPLETE Column stats: COMPLETE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -54,7 +54,7 @@ STAGE PLANS:
                             outputColumnNames: _col1
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 1452263 Data size: 11604232 
Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 1284939 Data size: 10267240 
Basic stats: COMPLETE Column stats: COMPLETE
                             Group By Operator
                               aggregations: sum(_col1)
                               mode: hash
@@ -81,16 +81,16 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 146796 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (ctinyint is not null and csmallint is not 
null) (type: boolean)
-                    Statistics: Num rows: 6848 Data size: 81820 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 6059 Data size: 72396 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cdouble (type: double)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 6848 Data size: 81820 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 6059 Data size: 72396 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: tinyint)
-                        Statistics: Num rows: 6848 Data size: 81820 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 6059 Data size: 72396 Basic 
stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: smallint), _col2 
(type: double)
             Execution mode: vectorized, llap
             LLAP IO: all inputs

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/perf/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out 
b/ql/src/test/results/clientpositive/perf/query23.q.out
index dde707d..5925869 100644
--- a/ql/src/test/results/clientpositive/perf/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/query23.q.out
@@ -1,5 +1,5 @@
-Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 25' is a cross product
 Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 25' is a cross product
 PREHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt

http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
index 1f95357..ed0319d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
@@ -73,7 +73,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: 
FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 
5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, 
FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: 
CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 
6981.0) -> boolean, FilterExprOrExpr(children: 
FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: 
CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, 
FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> 
boolean) -> boolean
                     predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) 
> cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 
6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 
like '%a')))) (type: boolean)
-                    Statistics: Num rows: 5461 Data size: 1174134 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9557 Data size: 2054789 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), ctimestamp1 (type: 
timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 
(type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) 
(type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- 
cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), 
(UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: 
double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: 
bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- 
(- cdouble)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -82,13 +82,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 
17, 19, 20, 18, 22, 23, 25]
                           selectExpressions: DoubleColUnaryMinus(col 5) -> 
12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: 
CastLongToDouble(col 1) -> 15:double) -> 16:double, 
DoubleColModuloDoubleScalar(col 17, val 33.0)(children: 
DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 
15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, 
DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 
18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) 
-> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, 
LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, 
DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: 
DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 
23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: 
DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double
-                      Statistics: Num rows: 5461 Data size: 1174134 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9557 Data size: 2054789 Basic 
stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 5461 Data size: 1174134 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9557 Data size: 2054789 Basic 
stats: COMPLETE Column stats: NONE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[2/2] hive git commit: HIVE-17465 Statistics: Drill-down filters don't reduce row-counts progressively (Vineet Garg, reviewed by Ashutosh Chauhan)

Reply via email to