[47/53] [abbrv] hive git commit: HIVE-11405: Add early termination for recursion in StatsRulesProcFactory.evaluateExpression for OR expression (Prasanth Jayachandran reviewed by Gopal V, Gunther Hagleitner)

sershe Wed, 05 Aug 2015 17:50:45 -0700

HIVE-11405: Add early termination for recursion in 
StatsRulesProcFactory.evaluateExpression for OR expression (Prasanth 
Jayachandran reviewed by Gopal V, Gunther Hagleitner)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/96f59144
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/96f59144
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/96f59144

Branch: refs/heads/llap
Commit: 96f5914461e5782a952ec62a3d0bf16037b8cfda
Parents: d3a879a
Author: Prasanth Jayachandran <[email protected]>
Authored: Wed Aug 5 12:43:01 2015 -0700
Committer: Prasanth Jayachandran <[email protected]>
Committed: Wed Aug 5 12:43:01 2015 -0700

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java |  46 ++--
 .../annotate_stats_deep_filters.q               |  66 +++++
 .../annotate_stats_deep_filters.q.out           | 244 +++++++++++++++++++
 .../test/results/clientpositive/ppd_join2.q.out |  18 +-
 .../test/results/clientpositive/ppd_join3.q.out |  46 ++--
 .../clientpositive/spark/ppd_join2.q.out        |  18 +-
 .../clientpositive/spark/ppd_join3.q.out        |  46 ++--
 .../clientpositive/spark/vectorization_17.q.out |  10 +-
 .../clientpositive/tez/vectorization_17.q.out   |  10 +-
 .../clientpositive/vectorization_17.q.out       |  10 +-
 10 files changed, 415 insertions(+), 99 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 1663b88..6b3e715 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -18,8 +18,13 @@
 
 package org.apache.hadoop.hive.ql.optimizer.stats.annotation;
 
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.lang.reflect.Field;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -72,14 +77,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 
-import java.lang.reflect.Field;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.Stack;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 public class StatsRulesProcFactory {
 
@@ -259,7 +258,7 @@ public class StatsRulesProcFactory {
 
           // evaluate filter expression and update statistics
           long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
-              neededCols, fop);
+              neededCols, fop, 0);
           Statistics st = parentStats.clone();
 
           if (satisfyPrecondition(parentStats)) {
@@ -297,7 +296,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        FilterOperator fop) throws CloneNotSupportedException {
+        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException {
       long newNumRows = 0;
       Statistics andStats = null;
 
@@ -316,7 +315,7 @@ public class StatsRulesProcFactory {
           // evaluate children
           for (ExprNodeDesc child : genFunc.getChildren()) {
             newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child,
-                aspCtx, neededCols, fop);
+                aspCtx, neededCols, fop, evaluatedRowCount);
             if (satisfyPrecondition(aspCtx.getAndExprStats())) {
               updateStats(aspCtx.getAndExprStats(), newNumRows, true, fop);
             } else {
@@ -324,17 +323,24 @@ public class StatsRulesProcFactory {
             }
           }
         } else if (udf instanceof GenericUDFOPOr) {
-          // for OR condition independently compute and update stats
+          // for OR condition independently compute and update stats.
           for (ExprNodeDesc child : genFunc.getChildren()) {
-            newNumRows = StatsUtils.safeAdd(
-                evaluateChildExpr(stats, child, aspCtx, neededCols, fop), 
newNumRows);
+            // early exit if OR evaluation yields more rows than input rows
+            if (evaluatedRowCount >= stats.getNumRows()) {
+              evaluatedRowCount = stats.getNumRows();
+            } else {
+              newNumRows = StatsUtils.safeAdd(
+                  evaluateChildExpr(stats, child, aspCtx, neededCols, fop, 
evaluatedRowCount),
+                  newNumRows);
+              evaluatedRowCount = newNumRows;
+            }
           }
         } else if (udf instanceof GenericUDFOPNot) {
           newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
         } else {
 
           // single predicate condition
-          newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop);
+          newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, 
evaluatedRowCount);
         }
       } else if (pred instanceof ExprNodeColumnDesc) {
 
@@ -381,7 +387,7 @@ public class StatsRulesProcFactory {
             long newNumRows = 0;
             for (ExprNodeDesc child : genFunc.getChildren()) {
               newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols,
-                  fop);
+                  fop, 0);
             }
             return numRows - newNumRows;
           } else if (leaf instanceof ExprNodeConstantDesc) {
@@ -439,7 +445,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        FilterOperator fop) throws CloneNotSupportedException {
+        FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException {
 
       long numRows = stats.getNumRows();
 
@@ -525,7 +531,7 @@ public class StatsRulesProcFactory {
           return evaluateColEqualsNullExpr(stats, genFunc);
         } else if (udf instanceof GenericUDFOPAnd || udf instanceof 
GenericUDFOPOr
             || udf instanceof GenericUDFOPNot) {
-          return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop);
+          return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, 
evaluatedRowCount);
         }
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q 
b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
new file mode 100644
index 0000000..c027532
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
@@ -0,0 +1,66 @@
+create table over1k(
+t tinyint,
+si smallint,
+i int,
+b bigint,
+f float,
+d double,
+bo boolean,
+s string,
+ts timestamp,
+dec decimal(4,2),
+bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/over1k' overwrite into table over1k;
+load data local inpath '../../data/files/over1k' into table over1k;
+
+analyze table over1k compute statistics;
+analyze table over1k compute statistics for columns;
+
+set hive.stats.fetch.column.stats=true;
+explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53));
+
+set hive.stats.fetch.column.stats=false;
+explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
new file mode 100644
index 0000000..788d6c8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out
@@ -0,0 +1,244 @@
+PREHOOK: query: create table over1k(
+t tinyint,
+si smallint,
+i int,
+b bigint,
+f float,
+d double,
+bo boolean,
+s string,
+ts timestamp,
+dec decimal(4,2),
+bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over1k
+POSTHOOK: query: create table over1k(
+t tinyint,
+si smallint,
+i int,
+b bigint,
+f float,
+d double,
+bo boolean,
+s string,
+ts timestamp,
+dec decimal(4,2),
+bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over1k
+PREHOOK: query: load data local inpath '../../data/files/over1k' overwrite 
into table over1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over1k
+POSTHOOK: query: load data local inpath '../../data/files/over1k' overwrite 
into table over1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over1k
+PREHOOK: query: load data local inpath '../../data/files/over1k' into table 
over1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over1k
+POSTHOOK: query: load data local inpath '../../data/files/over1k' into table 
over1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over1k
+PREHOOK: query: analyze table over1k compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over1k
+PREHOOK: Output: default@over1k
+POSTHOOK: query: analyze table over1k compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over1k
+POSTHOOK: Output: default@over1k
+PREHOOK: query: analyze table over1k compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over1k
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table over1k compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over1k
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over1k
+            Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: (((t = 1) and (si = 2)) or (((t = 2) and (si = 3)) or 
(((t = 3) and (si = 4)) or (((t = 4) and (si = 5)) or (((t = 5) and (si = 6)) 
or (((t = 6) and (si = 7)) or (((t = 7) and (si = 8)) or (((t = 9) and (si = 
10)) or (((t = 10) and (si = 11)) or (((t = 11) and (si = 12)) or (((t = 12) 
and (si = 13)) or (((t = 13) and (si = 14)) or (((t = 14) and (si = 15)) or 
(((t = 15) and (si = 16)) or (((t = 16) and (si = 17)) or (((t = 17) and (si = 
18)) or (((t = 27) and (si = 28)) or (((t = 37) and (si = 38)) or (((t = 47) 
and (si = 48)) or ((t = 52) and (si = 53))))))))))))))))))))) (type: boolean)
+              Statistics: Num rows: 280 Data size: 2232 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                Statistics: Num rows: 280 Data size: 2232 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from over1k where (
+(t=1 and si=2)
+or (t=2 and si=3)
+or (t=3 and si=4) 
+or (t=4 and si=5) 
+or (t=5 and si=6) 
+or (t=6 and si=7) 
+or (t=7 and si=8)
+or (t=9 and si=10)
+or (t=10 and si=11)
+or (t=11 and si=12)
+or (t=12 and si=13)
+or (t=13 and si=14) 
+or (t=14 and si=15) 
+or (t=15 and si=16) 
+or (t=16 and si=17) 
+or (t=17 and si=18)
+or (t=27 and si=28)
+or (t=37 and si=38)
+or (t=47 and si=48)
+or (t=52 and si=53))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over1k
+            Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (((t = 1) and (si = 2)) or (((t = 2) and (si = 3)) or 
(((t = 3) and (si = 4)) or (((t = 4) and (si = 5)) or (((t = 5) and (si = 6)) 
or (((t = 6) and (si = 7)) or (((t = 7) and (si = 8)) or (((t = 9) and (si = 
10)) or (((t = 10) and (si = 11)) or (((t = 11) and (si = 12)) or (((t = 12) 
and (si = 13)) or (((t = 13) and (si = 14)) or (((t = 14) and (si = 15)) or 
(((t = 15) and (si = 16)) or (((t = 16) and (si = 17)) or (((t = 17) and (si = 
18)) or (((t = 27) and (si = 28)) or (((t = 37) and (si = 38)) or (((t = 47) 
and (si = 48)) or ((t = 52) and (si = 53))))))))))))))))))))) (type: boolean)
+              Statistics: Num rows: 2098 Data size: 211174 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                Statistics: Num rows: 2098 Data size: 211174 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/ppd_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out 
b/ql/src/test/results/clientpositive/ppd_join2.q.out
index 27a8ddf..335d995 100644
--- a/ql/src/test/results/clientpositive/ppd_join2.q.out
+++ b/ql/src/test/results/clientpositive/ppd_join2.q.out
@@ -40,25 +40,25 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: ((((((((key <> '302') and (key <> '311')) and ((value 
<> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key 
<> '14')) and key is not null) and value is not null) (type: boolean)
-              Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
                   predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') or 
(_col0 > '1')) and (_col0 < '400'))) (type: boolean)
-                  Statistics: Num rows: 24 Data size: 254 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 14 Data size: 148 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((_col0 <> '305') and (_col0 <> '14')) (type: 
boolean)
-                    Statistics: Num rows: 24 Data size: 254 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 14 Data size: 148 Basic stats: 
COMPLETE Column stats: NONE
                     Filter Operator
                       predicate: _col0 is not null (type: boolean)
-                      Statistics: Num rows: 12 Data size: 127 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 7 Data size: 74 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 12 Data size: 127 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 7 Data size: 74 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
           TableScan
             alias: src
@@ -1755,16 +1755,16 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: ((((((((key <> '302') and (key <> '311')) and ((value 
<> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) and (key 
<> '14')) and key is not null) and value is not null) (type: boolean)
-              Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: src

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/ppd_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out 
b/ql/src/test/results/clientpositive/ppd_join3.q.out
index abc286c..d658cfb 100644
--- a/ql/src/test/results/clientpositive/ppd_join3.q.out
+++ b/ql/src/test/results/clientpositive/ppd_join3.q.out
@@ -40,29 +40,29 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 
'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> 
'4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean)
-              Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
                   predicate: ((_col0 > '0') and (((_col1 <> 'val_500') or 
(_col0 > '1')) and (_col0 < '400'))) (type: boolean)
-                  Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
                     expressions: _col0 (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 5 Data size: 53 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
                     Filter Operator
                       predicate: ((_col0 <> '12') and (_col0 <> '4')) (type: 
boolean)
-                      Statistics: Num rows: 5 Data size: 53 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: _col0 is not null (type: boolean)
-                        Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -96,20 +96,20 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column 
stats: NONE
           Filter Operator
             predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean)
-            Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col2 (type: string)
               outputColumnNames: _col0, _col2
-              Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
               Filter Operator
                 predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: boolean)
-                Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
                   predicate: _col0 is not null (type: boolean)
-                  Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
                     table:
@@ -149,7 +149,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col2 (type: string)
       Reduce Operator Tree:
         Join Operator
@@ -1817,16 +1817,16 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (((((((((key <> '11') and (key > '0')) and ((value <> 
'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) and (key <> 
'4')) and key is not null) and (key <> '13')) and (key <> '1')) (type: boolean)
-              Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 37 Data size: 393 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -1851,14 +1851,14 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column 
stats: NONE
           Filter Operator
             predicate: ((_col1 > '10') or (_col0 <> '10')) (type: boolean)
-            Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: _col0 (type: string), _col2 (type: string)
               outputColumnNames: _col0, _col2
-              Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE 
Column stats: NONE
               File Output Operator
                 compressed: false
                 table:
@@ -1889,7 +1889,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col2 (type: string)
       Reduce Operator Tree:
         Join Operator
@@ -1899,14 +1899,14 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col1, _col3
-          Statistics: Num rows: 44 Data size: 475 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: string), _col3 (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 44 Data size: 475 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 44 Data size: 475 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/spark/ppd_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out 
b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out
index a98ab13..6fed2dc 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out
@@ -72,25 +72,25 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((((((((key <> '302') and (key <> '311')) and 
((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) 
and (key <> '14')) and key is not null) and value is not null) (type: boolean)
-                    Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: ((_col0 <> '311') and (((_col1 <> 'val_50') 
or (_col0 > '1')) and (_col0 < '400'))) (type: boolean)
-                        Statistics: Num rows: 24 Data size: 254 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 14 Data size: 148 Basic stats: 
COMPLETE Column stats: NONE
                         Filter Operator
                           predicate: ((_col0 <> '305') and (_col0 <> '14')) 
(type: boolean)
-                          Statistics: Num rows: 24 Data size: 254 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 14 Data size: 148 Basic stats: 
COMPLETE Column stats: NONE
                           Filter Operator
                             predicate: _col0 is not null (type: boolean)
-                            Statistics: Num rows: 12 Data size: 127 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 7 Data size: 74 Basic stats: 
COMPLETE Column stats: NONE
                             Reduce Output Operator
                               key expressions: _col0 (type: string)
                               sort order: +
                               Map-reduce partition columns: _col0 (type: 
string)
-                              Statistics: Num rows: 12 Data size: 127 Basic 
stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 7 Data size: 74 Basic 
stats: COMPLETE Column stats: NONE
                               value expressions: _col1 (type: string)
         Map 5 
             Map Operator Tree:
@@ -1777,16 +1777,16 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ((((((((key <> '302') and (key <> '311')) and 
((value <> 'val_50') or (key > '1'))) and (key < '400')) and (key <> '305')) 
and (key <> '14')) and key is not null) and value is not null) (type: boolean)
-                    Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 56 Data size: 594 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 42 Data size: 446 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
         Map 5 
             Map Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/spark/ppd_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out 
b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out
index 9b5e0af..661d9d1 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out
@@ -71,29 +71,29 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((((((((key <> '11') and (key > '0')) and 
((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) 
and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) 
(type: boolean)
-                    Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: ((_col0 > '0') and (((_col1 <> 'val_500') 
or (_col0 > '1')) and (_col0 < '400'))) (type: boolean)
-                        Statistics: Num rows: 5 Data size: 53 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col0 (type: string)
                           outputColumnNames: _col0
-                          Statistics: Num rows: 5 Data size: 53 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
                           Filter Operator
                             predicate: ((_col0 <> '12') and (_col0 <> '4')) 
(type: boolean)
-                            Statistics: Num rows: 5 Data size: 53 Basic stats: 
COMPLETE Column stats: NONE
+                            Statistics: Num rows: 3 Data size: 31 Basic stats: 
COMPLETE Column stats: NONE
                             Filter Operator
                               predicate: _col0 is not null (type: boolean)
-                              Statistics: Num rows: 3 Data size: 31 Basic 
stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 2 Data size: 20 Basic 
stats: COMPLETE Column stats: NONE
                               Reduce Output Operator
                                 key expressions: _col0 (type: string)
                                 sort order: +
                                 Map-reduce partition columns: _col0 (type: 
string)
-                                Statistics: Num rows: 3 Data size: 31 Basic 
stats: COMPLETE Column stats: NONE
+                                Statistics: Num rows: 2 Data size: 20 Basic 
stats: COMPLETE Column stats: NONE
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -151,25 +151,25 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
                   predicate: ((_col1 > '10') or (_col0 <> '10')) (type: 
boolean)
-                  Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
                     expressions: _col0 (type: string), _col2 (type: string)
                     outputColumnNames: _col0, _col2
-                    Statistics: Num rows: 3 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2 Data size: 22 Basic stats: 
COMPLETE Column stats: NONE
                     Filter Operator
                       predicate: ((_col0 <> '13') and (_col0 <> '1')) (type: 
boolean)
-                      Statistics: Num rows: 3 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2 Data size: 22 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: _col0 is not null (type: boolean)
-                        Statistics: Num rows: 2 Data size: 22 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: string)
-                          Statistics: Num rows: 2 Data size: 22 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col2 (type: string)
 
   Stage: Stage-0
@@ -1839,16 +1839,16 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((((((((key <> '11') and (key > '0')) and 
((value <> 'val_500') or (key > '1'))) and (key < '400')) and (key <> '12')) 
and (key <> '4')) and key is not null) and (key <> '13')) and (key <> '1')) 
(type: boolean)
-                    Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 37 Data size: 393 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 28 Data size: 297 Basic stats: 
COMPLETE Column stats: NONE
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -1876,14 +1876,14 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col1, _col3
-                Statistics: Num rows: 44 Data size: 475 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: string), _col3 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 44 Data size: 475 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 33 Data size: 358 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 44 Data size: 475 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 33 Data size: 358 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1897,19 +1897,19 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 432 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
                   predicate: ((_col1 > '10') or (_col0 <> '10')) (type: 
boolean)
-                  Statistics: Num rows: 40 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 326 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col0 (type: string), _col2 (type: string)
                     outputColumnNames: _col0, _col2
-                    Statistics: Num rows: 40 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 326 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: string)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 40 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 326 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col2 (type: string)
 
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
index 472d98f..ea5b0da 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
@@ -68,15 +68,15 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((cbigint > -23) and ((cdouble <> 988888.0) or 
(UDFToDouble(cint) > -863.257))) and ((ctinyint >= 33) or 
((UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)))) (type: 
boolean)
-                    Statistics: Num rows: 6370 Data size: 195556 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4778 Data size: 146682 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cfloat (type: float), cstring1 (type: 
string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: 
double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) 
(type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: 
double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: 
double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: 
double), (9763215.5639 % UDFToDouble(cbigint)) (type: double), (2563.58 + (- (- 
cdouble))) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                      Statistics: Num rows: 6370 Data size: 195556 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4778 Data size: 146682 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col5 (type: bigint), _col0 (type: 
float)
                         sort order: ++
-                        Statistics: Num rows: 6370 Data size: 195556 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4778 Data size: 146682 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), 
_col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: double), _col13 (type: double)
             Execution mode: vectorized
         Reducer 2 
@@ -84,10 +84,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 
(type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 
(type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), 
VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: 
double), VALUE._col10 (type: double), VALUE._col11 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                Statistics: Num rows: 6370 Data size: 195556 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 4778 Data size: 146682 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6370 Data size: 195556 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4778 Data size: 146682 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/tez/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorization_17.q.out 
b/ql/src/test/results/clientpositive/tez/vectorization_17.q.out
index 50f7647..b5c71a4 100644
--- a/ql/src/test/results/clientpositive/tez/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorization_17.q.out
@@ -68,15 +68,15 @@ STAGE PLANS:
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((cbigint > -23) and ((cdouble <> 988888.0) or 
(UDFToDouble(cint) > -863.257))) and ((ctinyint >= 33) or 
((UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)))) (type: 
boolean)
-                    Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cfloat (type: float), cstring1 (type: 
string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: 
double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) 
(type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: 
double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: 
double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: 
double), (9763215.5639 % UDFToDouble(cbigint)) (type: double), (2563.58 + (- (- 
cdouble))) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                      Statistics: Num rows: 6370 Data size: 1369572 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4778 Data size: 1027287 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col5 (type: bigint), _col0 (type: 
float)
                         sort order: ++
-                        Statistics: Num rows: 6370 Data size: 1369572 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4778 Data size: 1027287 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), 
_col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: double), _col13 (type: double)
             Execution mode: vectorized
         Reducer 2 
@@ -84,10 +84,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 
(type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 
(type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), 
VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: 
double), VALUE._col10 (type: double), VALUE._col11 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/96f59144/ql/src/test/results/clientpositive/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_17.q.out 
b/ql/src/test/results/clientpositive/vectorization_17.q.out
index b2fe45e..ece918c 100644
--- a/ql/src/test/results/clientpositive/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_17.q.out
@@ -63,25 +63,25 @@ STAGE PLANS:
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
             Filter Operator
               predicate: (((cbigint > -23) and ((cdouble <> 988888.0) or 
(UDFToDouble(cint) > -863.257))) and ((ctinyint >= 33) or 
((UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)))) (type: 
boolean)
-              Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+              Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
               Select Operator
                 expressions: cfloat (type: float), cstring1 (type: string), 
cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), 
cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: 
double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: 
double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: 
double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: 
double), (9763215.5639 % UDFToDouble(cbigint)) (type: double), (2563.58 + (- (- 
cdouble))) (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col5 (type: bigint), _col0 (type: float)
                   sort order: ++
-                  Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string), _col2 (type: int), 
_col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 
(type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: double), _col13 (type: double)
       Execution mode: vectorized
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: 
string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 
(type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), 
VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: 
double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 
(type: double), VALUE._col11 (type: double)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13
-          Statistics: Num rows: 6370 Data size: 1369572 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 6370 Data size: 1369572 Basic stats: 
COMPLETE Column stats: NONE
+            Statistics: Num rows: 4778 Data size: 1027287 Basic stats: 
COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

[47/53] [abbrv] hive git commit: HIVE-11405: Add early termination for recursion in StatsRulesProcFactory.evaluateExpression for OR expression (Prasanth Jayachandran reviewed by Gopal V, Gunther Hagleitner)

Reply via email to