FilterMerge rules, improve Filter selectivity estimation (Laljo John Pullokkaran reviewed by Jesus Camacho Rodriguez, Ashutosh Chauhan

jpullokk Mon, 14 Dec 2015 16:39:51 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index 82d9600..79a627c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -19,10 +19,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
 import java.util.ArrayList;
 import java.util.EnumSet;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
@@ -30,7 +30,6 @@ import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.RelFactories.FilterFactory;
 import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexInputRef;
@@ -40,6 +39,7 @@ import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.SqlKind;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
 
 import com.google.common.collect.ImmutableList;
@@ -47,31 +47,24 @@ import com.google.common.collect.LinkedHashMultimap;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Sets;
 
-
 public class HivePreFilteringRule extends RelOptRule {
 
-  protected static final Logger LOG = LoggerFactory
-          .getLogger(HivePreFilteringRule.class.getName());
-
-
-  public static final HivePreFilteringRule INSTANCE =
-          new HivePreFilteringRule();
+  protected static final Logger            LOG        = LoggerFactory
+                                                          
.getLogger(HivePreFilteringRule.class
+                                                              .getName());
 
-  private final FilterFactory filterFactory;
+  public static final HivePreFilteringRule INSTANCE   = new 
HivePreFilteringRule();
 
+  private final FilterFactory              filterFactory;
 
-  private static final Set<SqlKind> COMPARISON = EnumSet.of(
-          SqlKind.EQUALS,
-          SqlKind.GREATER_THAN_OR_EQUAL,
-          SqlKind.LESS_THAN_OR_EQUAL,
-          SqlKind.GREATER_THAN,
-          SqlKind.LESS_THAN,
-          SqlKind.NOT_EQUALS);
-
+  private static final Set<SqlKind>        COMPARISON = 
EnumSet.of(SqlKind.EQUALS,
+                                                          
SqlKind.GREATER_THAN_OR_EQUAL,
+                                                          
SqlKind.LESS_THAN_OR_EQUAL,
+                                                          
SqlKind.GREATER_THAN, SqlKind.LESS_THAN,
+                                                          SqlKind.NOT_EQUALS);
 
   private HivePreFilteringRule() {
-    super(operand(Filter.class,
-            operand(RelNode.class, any())));
+    super(operand(Filter.class, operand(RelNode.class, any())));
     this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY;
   }
 
@@ -86,8 +79,7 @@ public class HivePreFilteringRule extends RelOptRule {
       return false;
     }
 
-    HiveRulesRegistry registry = call.getPlanner().
-            getContext().unwrap(HiveRulesRegistry.class);
+    HiveRulesRegistry registry = 
call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
 
     // If this operator has been visited already by the rule,
     // we do not need to apply the optimization
@@ -103,69 +95,95 @@ public class HivePreFilteringRule extends RelOptRule {
     final Filter filter = call.rel(0);
 
     // 0. Register that we have visited this operator in this rule
-    HiveRulesRegistry registry = call.getPlanner().
-            getContext().unwrap(HiveRulesRegistry.class);
+    HiveRulesRegistry registry = 
call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
     if (registry != null) {
       registry.registerVisited(this, filter);
     }
 
     final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
 
-    final RexNode condition = RexUtil.pullFactors(rexBuilder, 
filter.getCondition());
+    // 1. Recompose filter possibly by pulling out common elements from DNF
+    // expressions
+    RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, 
filter.getCondition());
+
+    // 2. We extract possible candidates to be pushed down
+    List<RexNode> operandsToPushDown = new ArrayList<>();
+    List<RexNode> deterministicExprs = new ArrayList<>();
+    List<RexNode> nonDeterministicExprs = new ArrayList<>();
+
+    switch (topFilterCondition.getKind()) {
+    case AND:
+      ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) 
topFilterCondition)
+          .getOperands());
+      Set<String> operandsToPushDownDigest = new HashSet<String>();
+      List<RexNode> extractedCommonOperands = null;
+
+      for (RexNode operand : operands) {
+        if (operand.getKind() == SqlKind.OR) {
+          extractedCommonOperands = extractCommonOperands(rexBuilder, operand);
+          for (RexNode extractedExpr : extractedCommonOperands) {
+            if (operandsToPushDownDigest.add(extractedExpr.toString())) {
+              operandsToPushDown.add(extractedExpr);
+            }
+          }
+        }
 
-    // 1. We extract possible candidates to be pushed down
-    List<RexNode> commonOperands = new ArrayList<>();
-    switch (condition.getKind()) {
-      case AND:
-        ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) 
condition).getOperands());
-        for (RexNode operand: operands) {
-          if (operand.getKind() == SqlKind.OR) {
-            commonOperands.addAll(extractCommonOperands(rexBuilder,operand));
+        // TODO: Make expr traversal recursive. Extend to traverse inside
+        // elements of DNF/CNF & extract more deterministic pieces out.
+        if (HiveCalciteUtil.isDeterministic(operand)) {
+          deterministicExprs.add(operand);
+        } else {
+          nonDeterministicExprs.add(operand);
+        }
+      }
+
+      // Pull out Deterministic exprs from non-deterministic and push down
+      // deterministic expressions as a separate filter
+      // NOTE: Hive by convention doesn't pushdown non deterministic 
expressions
+      if (nonDeterministicExprs.size() > 0) {
+        for (RexNode expr : deterministicExprs) {
+          if (!operandsToPushDownDigest.contains(expr.toString())) {
+            operandsToPushDown.add(expr);
+            operandsToPushDownDigest.add(expr.toString());
           }
         }
-        break;
-      case OR:
-        commonOperands = extractCommonOperands(rexBuilder,condition);
-        break;
-      default:
-        return;
+
+        topFilterCondition = RexUtil.pullFactors(rexBuilder,
+            RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, 
false));
+      }
+
+      break;
+
+    case OR:
+      operandsToPushDown = extractCommonOperands(rexBuilder, 
topFilterCondition);
+      break;
+    default:
+      return;
     }
 
     // 2. If we did not generate anything for the new predicate, we bail out
-    if (commonOperands.isEmpty()) {
+    if (operandsToPushDown.isEmpty()) {
       return;
     }
 
     // 3. If the new conjuncts are already present in the plan, we bail out
-    final RelOptPredicateList predicates = 
RelMetadataQuery.getPulledUpPredicates(filter.getInput());
-    final List<RexNode> newConjuncts = new ArrayList<>();
-    for (RexNode commonOperand : commonOperands) {
-      boolean found = false;
-      for (RexNode conjunct : predicates.pulledUpPredicates) {
-        if (commonOperand.toString().equals(conjunct.toString())) {
-          found = true;
-          break;
-        }
-      }
-      if (!found) {
-        newConjuncts.add(commonOperand);
-      }
-    }
+    final List<RexNode> newConjuncts = 
HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(),
+        operandsToPushDown);
     if (newConjuncts.isEmpty()) {
       return;
     }
 
     // 4. Otherwise, we create a new condition
-    final RexNode newCondition = RexUtil.pullFactors(rexBuilder,
-            RexUtil.composeConjunction(rexBuilder, newConjuncts, false));
+    final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder,
+        RexUtil.composeConjunction(rexBuilder, newConjuncts, false));
 
     // 5. We create the new filter that might be pushed down
-    RelNode newFilter = filterFactory.createFilter(filter.getInput(), 
newCondition);
-    RelNode newTopFilter = filterFactory.createFilter(newFilter, condition);
+    RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), 
newChildFilterCondition);
+    RelNode newTopFilter = filterFactory.createFilter(newChildFilter, 
topFilterCondition);
 
     // 6. We register both so we do not fire the rule on them again
     if (registry != null) {
-      registry.registerVisited(this, newFilter);
+      registry.registerVisited(this, newChildFilter);
       registry.registerVisited(this, newTopFilter);
     }
 
@@ -175,13 +193,15 @@ public class HivePreFilteringRule extends RelOptRule {
 
   private static List<RexNode> extractCommonOperands(RexBuilder rexBuilder, 
RexNode condition) {
     assert condition.getKind() == SqlKind.OR;
-    Multimap<String,RexNode> reductionCondition = LinkedHashMultimap.create();
+    Multimap<String, RexNode> reductionCondition = LinkedHashMultimap.create();
 
-    // Data structure to control whether a certain reference is present in 
every operand
+    // Data structure to control whether a certain reference is present in 
every
+    // operand
     Set<String> refsInAllOperands = null;
 
-    // 1. We extract the information necessary to create the predicate for the 
new
-    //    filter; currently we support comparison functions, in and between
+    // 1. We extract the information necessary to create the predicate for the
+    // new
+    // filter; currently we support comparison functions, in and between
     ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) 
condition).getOperands());
     for (int i = 0; i < operands.size(); i++) {
       final RexNode operand = operands.get(i);
@@ -190,27 +210,27 @@ public class HivePreFilteringRule extends RelOptRule {
       final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
 
       Set<String> refsInCurrentOperand = Sets.newHashSet();
-      for (RexNode conjunction: conjunctions) {
+      for (RexNode conjunction : conjunctions) {
         // We do not know what it is, we bail out for safety
-        if (!(conjunction instanceof RexCall)) {
+        if (!(conjunction instanceof RexCall) || 
!HiveCalciteUtil.isDeterministic(conjunction)) {
           return new ArrayList<>();
         }
         RexCall conjCall = (RexCall) conjunction;
         RexNode ref = null;
-        if(COMPARISON.contains(conjCall.getOperator().getKind())) {
-          if (conjCall.operands.get(0) instanceof RexInputRef &&
-                  conjCall.operands.get(1) instanceof RexLiteral) {
+        if (COMPARISON.contains(conjCall.getOperator().getKind())) {
+          if (conjCall.operands.get(0) instanceof RexInputRef
+              && conjCall.operands.get(1) instanceof RexLiteral) {
             ref = conjCall.operands.get(0);
-          } else if (conjCall.operands.get(1) instanceof RexInputRef &&
-                  conjCall.operands.get(0) instanceof RexLiteral) {
+          } else if (conjCall.operands.get(1) instanceof RexInputRef
+              && conjCall.operands.get(0) instanceof RexLiteral) {
             ref = conjCall.operands.get(1);
           } else {
             // We do not know what it is, we bail out for safety
             return new ArrayList<>();
           }
-        } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
+        } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) {
           ref = conjCall.operands.get(0);
-        } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
+        } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
           ref = conjCall.operands.get(1);
         } else {
           // We do not know what it is, we bail out for safety
@@ -228,7 +248,8 @@ public class HivePreFilteringRule extends RelOptRule {
       } else {
         refsInAllOperands = Sets.intersection(refsInAllOperands, 
refsInCurrentOperand);
       }
-      // If we did not add any factor or there are no common factors, we can 
bail out
+      // If we did not add any factor or there are no common factors, we can
+      // bail out
       if (refsInAllOperands.isEmpty()) {
         return new ArrayList<>();
       }
@@ -237,7 +258,8 @@ public class HivePreFilteringRule extends RelOptRule {
     // 2. We gather the common factors and return them
     List<RexNode> commonOperands = new ArrayList<>();
     for (String ref : refsInAllOperands) {
-      commonOperands.add(RexUtil.composeDisjunction(rexBuilder, 
reductionCondition.get(ref), false));
+      commonOperands
+          .add(RexUtil.composeDisjunction(rexBuilder, 
reductionCondition.get(ref), false));
     }
     return commonOperands;
   }


http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index b52779c..c04060f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -17,6 +17,10 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
 import org.apache.calcite.rel.RelNode;
@@ -31,8 +35,10 @@ import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
 
 public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
   private final RelNode childRel;
@@ -81,6 +87,21 @@ public class FilterSelectivityEstimator extends 
RexVisitorImpl<Double> {
       break;
     }
 
+    case IS_NOT_NULL: {
+      if (childRel instanceof HiveTableScan) {
+        double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel);
+        double totalNoOfTuples = childRel.getRows();
+        if (totalNoOfTuples >= noOfNulls) {
+          selectivity = (totalNoOfTuples - noOfNulls) / 
Math.max(totalNoOfTuples, 1);
+        } else {
+          throw new RuntimeException("Invalid Stats number of null > no of 
tuples");
+        }
+      } else {
+        selectivity = computeNotEqualitySelectivity(call);
+      }
+      break;
+    }
+
     case LESS_THAN_OR_EQUAL:
     case GREATER_THAN_OR_EQUAL:
     case LESS_THAN:
@@ -199,6 +220,33 @@ public class FilterSelectivityEstimator extends 
RexVisitorImpl<Double> {
     return selectivity;
   }
 
+  /**
+   * Given a RexCall & TableScan find max no of nulls. Currently it picks the
+   * col with max no of nulls.
+   * 
+   * TODO: improve this
+   * 
+   * @param call
+   * @param t
+   * @return
+   */
+  private long getMaxNulls(RexCall call, HiveTableScan t) {
+    long tmpNoNulls = 0;
+    long maxNoNulls = 0;
+
+    Set<Integer> iRefSet = HiveCalciteUtil.getInputRefs(call);
+    List<ColStatistics> colStats = t.getColStat(new 
ArrayList<Integer>(iRefSet));
+
+    for (ColStatistics cs : colStats) {
+      tmpNoNulls = cs.getNumNulls();
+      if (tmpNoNulls > maxNoNulls) {
+        maxNoNulls = tmpNoNulls;
+      }
+    }
+
+    return maxNoNulls;
+  }
+
   private Double getMaxNDV(RexCall call) {
     double tmpNDV;
     double maxNDV = 1.0;

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
new file mode 100644
index 0000000..b7244fd
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -0,0 +1,645 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.linq4j.function.Predicate1;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptPredicateList;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdPredicates;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexPermuteInputsShuttle;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+
+
+//TODO: Move this to calcite
+public class HiveRelMdPredicates extends RelMdPredicates {
+  public static final RelMetadataProvider SOURCE = 
ReflectiveRelMetadataProvider.reflectiveSource(
+                                                     
BuiltInMethod.PREDICATES.method,
+                                                     new 
HiveRelMdPredicates());
+
+  private static final List<RexNode> EMPTY_LIST = ImmutableList.of();
+
+  /**
+   * Infers predicates for a project.
+   *
+   * <ol>
+   * <li>create a mapping from input to projection. Map only positions that
+   * directly reference an input column.
+   * <li>Expressions that only contain above columns are retained in the
+   * Project's pullExpressions list.
+   * <li>For e.g. expression 'a + e = 9' below will not be pulled up because 
'e'
+   * is not in the projection list.
+   *
+   * <pre>
+   * childPullUpExprs:      {a &gt; 7, b + c &lt; 10, a + e = 9}
+   * projectionExprs:       {a, b, c, e / 2}
+   * projectionPullupExprs: {a &gt; 7, b + c &lt; 10}
+   * </pre>
+   *
+   * </ol>
+   */
+  @Override
+  public RelOptPredicateList getPredicates(Project project) {
+    RelNode child = project.getInput();
+    final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
+    RelOptPredicateList childInfo = 
RelMetadataQuery.getPulledUpPredicates(child);
+
+    List<RexNode> projectPullUpPredicates = new ArrayList<RexNode>();
+    HashMultimap<Integer, Integer> inpIndxToOutIndxMap = HashMultimap.create();
+
+    ImmutableBitSet.Builder columnsMappedBuilder = ImmutableBitSet.builder();
+    Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, 
child.getRowType().getFieldCount(),
+        project.getRowType().getFieldCount());
+
+    for (Ord<RexNode> o : Ord.zip(project.getProjects())) {
+      if (o.e instanceof RexInputRef) {
+        int sIdx = ((RexInputRef) o.e).getIndex();
+        m.set(sIdx, o.i);
+        inpIndxToOutIndxMap.put(sIdx, o.i);
+        columnsMappedBuilder.set(sIdx);
+      }
+    }
+
+    // Go over childPullUpPredicates. If a predicate only contains columns in
+    // 'columnsMapped' construct a new predicate based on mapping.
+    final ImmutableBitSet columnsMapped = columnsMappedBuilder.build();
+    for (RexNode r : childInfo.pulledUpPredicates) {
+      ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(r);
+      if (columnsMapped.contains(rCols)) {
+        r = r.accept(new RexPermuteInputsShuttle(m, child));
+        projectPullUpPredicates.add(r);
+      }
+    }
+
+    // Project can also generate constants. We need to include them.
+    for (Ord<RexNode> expr : Ord.zip(project.getProjects())) {
+      if (RexLiteral.isNullLiteral(expr.e)) {
+        
projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
+            rexBuilder.makeInputRef(project, expr.i)));
+      } else if (expr.e instanceof RexLiteral) {
+        final RexLiteral literal = (RexLiteral) expr.e;
+        
projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+            rexBuilder.makeInputRef(project, expr.i), literal));
+      } else if (expr.e instanceof RexCall && 
HiveCalciteUtil.isDeterministicFuncOnLiterals(expr.e)) {
+      //TODO: Move this to calcite
+        
projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+            rexBuilder.makeInputRef(project, expr.i), expr.e));
+      }
+    }
+    return RelOptPredicateList.of(projectPullUpPredicates);
+  }
+
+  /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */
+  @Override
+  public RelOptPredicateList getPredicates(Join join) {
+    RexBuilder rB = join.getCluster().getRexBuilder();
+    RelNode left = join.getInput(0);
+    RelNode right = join.getInput(1);
+
+    RelOptPredicateList leftInfo = 
RelMetadataQuery.getPulledUpPredicates(left);
+    RelOptPredicateList rightInfo = 
RelMetadataQuery.getPulledUpPredicates(right);
+
+    HiveJoinConditionBasedPredicateInference jI = new 
HiveJoinConditionBasedPredicateInference(join,
+        RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false),
+        RexUtil.composeConjunction(rB, rightInfo.pulledUpPredicates, false));
+
+    return jI.inferPredicates(false);
+  }
+
+  /**
+   * Utility to infer predicates from one side of the join that apply on the
+   * other side.
+   *
+   * <p>Contract is:<ul>
+   *
+   * <li>initialize with a {@link org.apache.calcite.rel.core.Join} and
+   * optional predicates applicable on its left and right subtrees.
+   *
+   * <li>you can
+   * then ask it for equivalentPredicate(s) given a predicate.
+   *
+   * </ul>
+   *
+   * <p>So for:
+   * <ol>
+   * <li>'<code>R1(x) join R2(y) on x = y</code>' a call for
+   * equivalentPredicates on '<code>x > 7</code>' will return '
+   * <code>[y > 7]</code>'
+   * <li>'<code>R1(x) join R2(y) on x = y join R3(z) on y = z</code>' a call 
for
+   * equivalentPredicates on the second join '<code>x > 7</code>' will return '
+   * <code>[y > 7, z > 7]</code>'
+   * </ol>
+   */
+  static class HiveJoinConditionBasedPredicateInference {
+    final Join joinRel;
+    final boolean isSemiJoin;
+    final int nSysFields;
+    final int nFieldsLeft;
+    final int nFieldsRight;
+    final ImmutableBitSet leftFieldsBitSet;
+    final ImmutableBitSet rightFieldsBitSet;
+    final ImmutableBitSet allFieldsBitSet;
+    SortedMap<Integer, BitSet> equivalence;
+    final Map<String, ImmutableBitSet> exprFields;
+    final Set<String> allExprsDigests;
+    final Set<String> equalityPredicates;
+    final RexNode leftChildPredicates;
+    final RexNode rightChildPredicates;
+
+    public HiveJoinConditionBasedPredicateInference(Join joinRel,
+            RexNode lPreds, RexNode rPreds) {
+      this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds);
+    }
+
+    private HiveJoinConditionBasedPredicateInference(Join joinRel, boolean 
isSemiJoin,
+        RexNode lPreds, RexNode rPreds) {
+      super();
+      this.joinRel = joinRel;
+      this.isSemiJoin = isSemiJoin;
+      nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+      nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+      nSysFields = joinRel.getSystemFieldList().size();
+      leftFieldsBitSet = ImmutableBitSet.range(nSysFields,
+          nSysFields + nFieldsLeft);
+      rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft,
+          nSysFields + nFieldsLeft + nFieldsRight);
+      allFieldsBitSet = ImmutableBitSet.range(0,
+          nSysFields + nFieldsLeft + nFieldsRight);
+
+      exprFields = Maps.newHashMap();
+      allExprsDigests = new HashSet<String>();
+
+      if (lPreds == null) {
+        leftChildPredicates = null;
+      } else {
+        Mappings.TargetMapping leftMapping = Mappings.createShiftMapping(
+            nSysFields + nFieldsLeft, nSysFields, 0, nFieldsLeft);
+        leftChildPredicates = lPreds.accept(
+            new RexPermuteInputsShuttle(leftMapping, joinRel.getInput(0)));
+
+        for (RexNode r : RelOptUtil.conjunctions(leftChildPredicates)) {
+          exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r));
+          allExprsDigests.add(r.toString());
+        }
+      }
+      if (rPreds == null) {
+        rightChildPredicates = null;
+      } else {
+        Mappings.TargetMapping rightMapping = Mappings.createShiftMapping(
+            nSysFields + nFieldsLeft + nFieldsRight,
+            nSysFields + nFieldsLeft, 0, nFieldsRight);
+        rightChildPredicates = rPreds.accept(
+            new RexPermuteInputsShuttle(rightMapping, joinRel.getInput(1)));
+
+        for (RexNode r : RelOptUtil.conjunctions(rightChildPredicates)) {
+          exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r));
+          allExprsDigests.add(r.toString());
+        }
+      }
+
+      equivalence = Maps.newTreeMap();
+      equalityPredicates = new HashSet<String>();
+      for (int i = 0; i < nSysFields + nFieldsLeft + nFieldsRight; i++) {
+        equivalence.put(i, BitSets.of(i));
+      }
+
+      // Only process equivalences found in the join conditions. Processing
+      // Equivalences from the left or right side infer predicates that are
+      // already present in the Tree below the join.
+      RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+      List<RexNode> exprs =
+          RelOptUtil.conjunctions(
+              compose(rexBuilder, ImmutableList.of(joinRel.getCondition())));
+
+      final EquivalenceFinder eF = new EquivalenceFinder();
+      new ArrayList<Void>(Lists.transform(exprs, new Function<RexNode, Void>() 
{
+        public Void apply(RexNode input) {
+          return input.accept(eF);
+        }
+      }));
+
+      equivalence = BitSets.closure(equivalence);
+    }
+
+    /**
+     * The PullUp Strategy is sound but not complete.
+     * <ol>
+     * <li>We only pullUp inferred predicates for now. Pulling up existing
+     * predicates causes an explosion of duplicates. The existing predicates 
are
+     * pushed back down as new predicates. Once we have rules to eliminate
+     * duplicate Filter conditions, we should pullUp all predicates.
+     * <li>For Left Outer: we infer new predicates from the left and set them 
as
+     * applicable on the Right side. No predicates are pulledUp.
+     * <li>Right Outer Joins are handled in an analogous manner.
+     * <li>For Full Outer Joins no predicates are pulledUp or inferred.
+     * </ol>
+     */
+    public RelOptPredicateList inferPredicates(
+        boolean includeEqualityInference) {
+      List<RexNode> inferredPredicates = new ArrayList<RexNode>();
+      Set<String> allExprsDigests = new HashSet<String>(this.allExprsDigests);
+      final JoinRelType joinType = joinRel.getJoinType();
+      switch (joinType) {
+      case INNER:
+      case LEFT:
+        infer(leftChildPredicates, allExprsDigests, inferredPredicates,
+            includeEqualityInference,
+            joinType == JoinRelType.LEFT ? rightFieldsBitSet
+                : allFieldsBitSet);
+        break;
+      }
+      switch (joinType) {
+      case INNER:
+      case RIGHT:
+        infer(rightChildPredicates, allExprsDigests, inferredPredicates,
+            includeEqualityInference,
+            joinType == JoinRelType.RIGHT ? leftFieldsBitSet
+                : allFieldsBitSet);
+        break;
+      }
+
+      Mappings.TargetMapping rightMapping = Mappings.createShiftMapping(
+          nSysFields + nFieldsLeft + nFieldsRight,
+          0, nSysFields + nFieldsLeft, nFieldsRight);
+      final HiveJoinRexPermuteInputsShuttle rightPermute =
+          new HiveJoinRexPermuteInputsShuttle(rightMapping, joinRel);
+      Mappings.TargetMapping leftMapping = Mappings.createShiftMapping(
+          nSysFields + nFieldsLeft, 0, nSysFields, nFieldsLeft);
+      final HiveJoinRexPermuteInputsShuttle leftPermute =
+          new HiveJoinRexPermuteInputsShuttle(leftMapping, joinRel);
+
+      List<RexNode> leftInferredPredicates = new ArrayList<RexNode>();
+      List<RexNode> rightInferredPredicates = new ArrayList<RexNode>();
+
+      for (RexNode iP : inferredPredicates) {
+        ImmutableBitSet iPBitSet = RelOptUtil.InputFinder.bits(iP);
+        if (leftFieldsBitSet.contains(iPBitSet)) {
+          leftInferredPredicates.add(iP.accept(leftPermute));
+        } else if (rightFieldsBitSet.contains(iPBitSet)) {
+          rightInferredPredicates.add(iP.accept(rightPermute));
+        }
+      }
+
+      switch (joinType) {
+      case INNER:
+        Iterable<RexNode> pulledUpPredicates;
+        if (isSemiJoin) {
+          pulledUpPredicates = Iterables.concat(
+                RelOptUtil.conjunctions(leftChildPredicates),
+                leftInferredPredicates);
+        } else {
+          pulledUpPredicates = Iterables.concat(
+                RelOptUtil.conjunctions(leftChildPredicates),
+                RelOptUtil.conjunctions(rightChildPredicates),
+                RelOptUtil.conjunctions(joinRel.getCondition()),
+                inferredPredicates);
+        }
+        return RelOptPredicateList.of(pulledUpPredicates,
+          leftInferredPredicates, rightInferredPredicates);
+      case LEFT:
+        return RelOptPredicateList.of(
+            RelOptUtil.conjunctions(leftChildPredicates),
+            leftInferredPredicates, rightInferredPredicates);
+      case RIGHT:
+        return RelOptPredicateList.of(
+            RelOptUtil.conjunctions(rightChildPredicates),
+            inferredPredicates, EMPTY_LIST);
+      default:
+        assert inferredPredicates.size() == 0;
+        return RelOptPredicateList.EMPTY;
+      }
+    }
+
+    public RexNode left() {
+      return leftChildPredicates;
+    }
+
+    public RexNode right() {
+      return rightChildPredicates;
+    }
+
+    private void infer(RexNode predicates, Set<String> allExprsDigests,
+        List<RexNode> inferedPredicates, boolean includeEqualityInference,
+        ImmutableBitSet inferringFields) {
+      for (RexNode r : RelOptUtil.conjunctions(predicates)) {
+        if (!includeEqualityInference
+            && equalityPredicates.contains(r.toString())) {
+          continue;
+        }
+        for (Mapping m : mappings(r)) {
+          RexNode tr = r.accept(
+              new RexPermuteInputsShuttle(m, joinRel.getInput(0),
+                  joinRel.getInput(1)));
+          if (inferringFields.contains(RelOptUtil.InputFinder.bits(tr))
+              && !allExprsDigests.contains(tr.toString())
+              && !isAlwaysTrue(tr)) {
+            inferedPredicates.add(tr);
+            allExprsDigests.add(tr.toString());
+          }
+        }
+      }
+    }
+
+    Iterable<Mapping> mappings(final RexNode predicate) {
+      return new Iterable<Mapping>() {
+        public Iterator<Mapping> iterator() {
+          ImmutableBitSet fields = exprFields.get(predicate.toString());
+          if (fields.cardinality() == 0) {
+            return Iterators.emptyIterator();
+          }
+          return new ExprsItr(fields);
+        }
+      };
+    }
+
+    private void equivalent(int p1, int p2) {
+      BitSet b = equivalence.get(p1);
+      b.set(p2);
+
+      b = equivalence.get(p2);
+      b.set(p1);
+    }
+
+    RexNode compose(RexBuilder rexBuilder, Iterable<RexNode> exprs) {
+      exprs = Linq4j.asEnumerable(exprs).where(new Predicate1<RexNode>() {
+        public boolean apply(RexNode expr) {
+          return expr != null;
+        }
+      });
+      return RexUtil.composeConjunction(rexBuilder, exprs, false);
+    }
+
+    /**
+     * Find expressions of the form 'col_x = col_y'.
+     */
+    class EquivalenceFinder extends RexVisitorImpl<Void> {
+      protected EquivalenceFinder() {
+        super(true);
+      }
+
+      @Override public Void visitCall(RexCall call) {
+        if (call.getOperator().getKind() == SqlKind.EQUALS) {
+          int lPos = pos(call.getOperands().get(0));
+          int rPos = pos(call.getOperands().get(1));
+          if (lPos != -1 && rPos != -1) {
+            HiveJoinConditionBasedPredicateInference.this.equivalent(lPos, 
rPos);
+            HiveJoinConditionBasedPredicateInference.this.equalityPredicates
+                .add(call.toString());
+          }
+        }
+        return null;
+      }
+    }
+
+    /**
+     * Given an expression returns all the possible substitutions.
+     *
+     * <p>For example, for an expression 'a + b + c' and the following
+     * equivalences: <pre>
+     * a : {a, b}
+     * b : {a, b}
+     * c : {c, e}
+     * </pre>
+     *
+     * <p>The following Mappings will be returned:
+     * <pre>
+     * {a->a, b->a, c->c}
+     * {a->a, b->a, c->e}
+     * {a->a, b->b, c->c}
+     * {a->a, b->b, c->e}
+     * {a->b, b->a, c->c}
+     * {a->b, b->a, c->e}
+     * {a->b, b->b, c->c}
+     * {a->b, b->b, c->e}
+     * </pre>
+     *
+     * <p>which imply the following inferences:
+     * <pre>
+     * a + a + c
+     * a + a + e
+     * a + b + c
+     * a + b + e
+     * b + a + c
+     * b + a + e
+     * b + b + c
+     * b + b + e
+     * </pre>
+     */
+    class ExprsItr implements Iterator<Mapping> {
+      final int[] columns;
+      final BitSet[] columnSets;
+      final int[] iterationIdx;
+      Mapping nextMapping;
+      boolean firstCall;
+
+      ExprsItr(ImmutableBitSet fields) {
+        nextMapping = null;
+        columns = new int[fields.cardinality()];
+        columnSets = new BitSet[fields.cardinality()];
+        iterationIdx = new int[fields.cardinality()];
+        for (int j = 0, i = fields.nextSetBit(0); i >= 0; i = fields
+            .nextSetBit(i + 1), j++) {
+          columns[j] = i;
+          columnSets[j] = equivalence.get(i);
+          iterationIdx[j] = 0;
+        }
+        firstCall = true;
+      }
+
+      public boolean hasNext() {
+        if (firstCall) {
+          initializeMapping();
+          firstCall = false;
+        } else {
+          computeNextMapping(iterationIdx.length - 1);
+        }
+        return nextMapping != null;
+      }
+
+      public Mapping next() {
+        return nextMapping;
+      }
+
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
+
+      private void computeNextMapping(int level) {
+        int t = columnSets[level].nextSetBit(iterationIdx[level]);
+        if (t < 0) {
+          if (level == 0) {
+            nextMapping = null;
+          } else {
+            iterationIdx[level] = 0;
+            computeNextMapping(level - 1);
+          }
+        } else {
+          nextMapping.set(columns[level], t);
+          iterationIdx[level] = t + 1;
+        }
+      }
+
+      private void initializeMapping() {
+        nextMapping = Mappings.create(MappingType.PARTIAL_FUNCTION,
+            nSysFields + nFieldsLeft + nFieldsRight,
+            nSysFields + nFieldsLeft + nFieldsRight);
+        for (int i = 0; i < columnSets.length; i++) {
+          BitSet c = columnSets[i];
+          int t = c.nextSetBit(iterationIdx[i]);
+          if (t < 0) {
+            nextMapping = null;
+            return;
+          }
+          nextMapping.set(columns[i], t);
+          iterationIdx[i] = t + 1;
+        }
+      }
+    }
+
+    private int pos(RexNode expr) {
+      if (expr instanceof RexInputRef) {
+        return ((RexInputRef) expr).getIndex();
+      }
+      return -1;
+    }
+
+    private boolean isAlwaysTrue(RexNode predicate) {
+      if (predicate instanceof RexCall) {
+        RexCall c = (RexCall) predicate;
+        if (c.getOperator().getKind() == SqlKind.EQUALS) {
+          int lPos = pos(c.getOperands().get(0));
+          int rPos = pos(c.getOperands().get(1));
+          return lPos != -1 && lPos == rPos;
+        }
+      }
+      return predicate.isAlwaysTrue();
+    }
+  }
+
+  /**
+   * Shuttle which applies a permutation to its input fields.
+   *
+   * @see RexPermutationShuttle
+   * @see 
RexUtil#apply(org.apache.calcite.util.mapping.Mappings.TargetMapping, RexNode)
+   */
+  public static class HiveJoinRexPermuteInputsShuttle extends RexShuttle {
+    //~ Instance fields 
--------------------------------------------------------
+
+    private final Mappings.TargetMapping mapping;
+    private final ImmutableList<RelDataTypeField> fields;
+    private final RelOptCluster cluster;
+    private final RelDataType rType;
+
+    //~ Constructors 
-----------------------------------------------------------
+
+    private HiveJoinRexPermuteInputsShuttle(
+        Mappings.TargetMapping mapping,
+        RelNode input) {
+      this.mapping = mapping;
+      this.cluster = input.getCluster();
+      this.rType = input.getRowType();
+      this.fields = ImmutableList.copyOf(rType.getFieldList());
+    }
+
+    //~ Methods 
----------------------------------------------------------------
+
+    private static ImmutableList<RelDataTypeField> fields(RelNode[] inputs) {
+      final ImmutableList.Builder<RelDataTypeField> fields =
+          ImmutableList.builder();
+      for (RelNode input : inputs) {
+        fields.addAll(input.getRowType().getFieldList());
+      }
+      return fields.build();
+    }
+
+    @Override public RexNode visitInputRef(RexInputRef local) {
+      final int index = local.getIndex();
+      int target = mapping.getTarget(index);
+      return new RexInputRef(
+          target,
+          fields.get(index).getType());
+    }
+
+    @Override public RexNode visitCall(RexCall call) {
+      if (call.getOperator() == RexBuilder.GET_OPERATOR) {
+        final String name =
+            (String) ((RexLiteral) call.getOperands().get(1)).getValue2();
+        final int i = lookup(fields, name);
+        if (i >= 0) {
+          return RexInputRef.of(i, fields);
+        }
+      }
+      return HiveCalciteUtil.getTypeSafePred(cluster, super.visitCall(call), 
rType);
+    }
+
+    private static int lookup(List<RelDataTypeField> fields, String name) {
+      for (int i = 0; i < fields.size(); i++) {
+        final RelDataTypeField field = fields.get(i);
+        if (field.getName().equals(name)) {
+          return i;
+        }
+      }
+      return -1;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index bcb9ea7..37249f9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -202,6 +202,7 @@ public class SqlFunctionConverter {
         case IN:
         case BETWEEN:
         case ROW:
+        case IS_NOT_NULL:
           node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, 
"TOK_FUNCTION");
           node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, 
hToken.text));
           break;
@@ -314,7 +315,7 @@ public class SqlFunctionConverter {
       registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, 
"in"));
       registerFunction("between", HiveBetween.INSTANCE, 
hToken(HiveParser.Identifier, "between"));
       registerFunction("struct", SqlStdOperatorTable.ROW, 
hToken(HiveParser.Identifier, "struct"));
-
+      registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, 
hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL"));
     }
 
     private void registerFunction(String name, SqlOperator calciteFn, 
HiveToken hiveToken) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index c1e314f..1d3a90a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -52,8 +52,12 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
@@ -337,8 +341,9 @@ public class StatsRulesProcFactory {
           }
         } else if (udf instanceof GenericUDFOPNot) {
           newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
+        } else if (udf instanceof GenericUDFOPNotNull) {
+          return evaluateNotNullExpr(stats, genFunc);
         } else {
-
           // single predicate condition
           newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, 
evaluatedRowCount);
         }
@@ -443,6 +448,58 @@ public class StatsRulesProcFactory {
       return numRows / 2;
     }
 
+    private long evaluateNotNullExpr(Statistics parentStats, 
ExprNodeGenericFuncDesc pred) {
+      long noOfNulls = getMaxNulls(parentStats, pred);
+      long parentCardinality = parentStats.getNumRows();
+      long newPredCardinality = parentCardinality;
+
+      if (parentCardinality > noOfNulls) {
+        newPredCardinality = parentCardinality - noOfNulls;
+      } else {
+        LOG.error("Invalid column stats: No of nulls > cardinality");
+      }
+
+      return newPredCardinality;
+    }
+
+    private long getMaxNulls(Statistics stats, ExprNodeDesc pred) {
+      long tmpNoNulls = 0;
+      long maxNoNulls = 0;
+
+      if (pred instanceof ExprNodeColumnDesc) {
+        ColStatistics cs = 
stats.getColumnStatisticsFromColName(((ExprNodeColumnDesc) pred)
+            .getColumn());
+        if (cs != null) {
+          tmpNoNulls = cs.getNumNulls();
+        }
+      } else if (pred instanceof ExprNodeGenericFuncDesc || pred instanceof 
ExprNodeColumnListDesc) {
+        long noNullsOfChild = 0;
+        for (ExprNodeDesc childExpr : pred.getChildren()) {
+          noNullsOfChild = getMaxNulls(stats, childExpr);
+          if (noNullsOfChild > tmpNoNulls) {
+            tmpNoNulls = noNullsOfChild;
+          }
+        }
+      } else if (pred instanceof ExprNodeConstantDesc) {
+        if (ExprNodeDescUtils.isNullConstant(pred)) {
+          tmpNoNulls = stats.getNumRows();
+        } else {
+          tmpNoNulls = 0;
+        }
+      } else if (pred instanceof ExprNodeDynamicListDesc) {
+        tmpNoNulls = 0;
+      } else if (pred instanceof ExprNodeFieldDesc) {
+        // TODO Confirm this is safe
+        tmpNoNulls = getMaxNulls(stats, ((ExprNodeFieldDesc) pred).getDesc());
+      }
+
+      if (tmpNoNulls > maxNoNulls) {
+        maxNoNulls = tmpNoNulls;
+      }
+
+      return maxNoNulls;
+    }
+
     private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
         FilterOperator fop, long evaluatedRowCount) throws 
CloneNotSupportedException {
@@ -525,8 +582,7 @@ public class StatsRulesProcFactory {
             || udf instanceof GenericUDFOPLessThan) {
           return numRows / 3;
         } else if (udf instanceof GenericUDFOPNotNull) {
-          long newNumRows = evaluateColEqualsNullExpr(stats, genFunc);
-          return stats.getNumRows() - newNumRows;
+            return evaluateNotNullExpr(stats, genFunc);
         } else if (udf instanceof GenericUDFOPNull) {
           return evaluateColEqualsNullExpr(stats, genFunc);
         } else if (udf instanceof GenericUDFOPAnd || udf instanceof 
GenericUDFOPOr

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c005b1a..ac50673 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -62,13 +62,11 @@ import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
 import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
-import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
-import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
+import org.apache.calcite.rel.rules.FilterMergeRule;
 import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
 import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
 import org.apache.calcite.rel.rules.ProjectMergeRule;
 import org.apache.calcite.rel.rules.ProjectRemoveRule;
-import org.apache.calcite.rel.rules.ReduceExpressionsRule;
 import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule;
 import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule;
 import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule;
@@ -137,7 +135,9 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
@@ -818,7 +818,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
   private class CalcitePlannerAction implements 
Frameworks.PlannerAction<RelNode> {
     private RelOptCluster                                 cluster;
     private RelOptSchema                                  relOptSchema;
-    private final Map<String, PrunedPartitionList>              partitionCache;
+    private final Map<String, PrunedPartitionList>        partitionCache;
 
     // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
     // just last one.
@@ -918,11 +918,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 4. Run other optimizations that do not need stats
       calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, 
mdProvider.getMetadataProvider(),
-              HepMatchOrder.BOTTOM_UP, ReduceExpressionsRule.JOIN_INSTANCE,
-              ReduceExpressionsRule.FILTER_INSTANCE, 
ReduceExpressionsRule.PROJECT_INSTANCE,
+              HepMatchOrder.BOTTOM_UP,
               ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE,
               new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY),
-              HiveAggregateProjectMergeRule.INSTANCE);
+              HiveAggregateProjectMergeRule.INSTANCE, 
HiveJoinCommuteRule.INSTANCE);
 
       // 5. Run aggregate-join transpose (cost based)
       //    If it failed because of missing stats, we continue with
@@ -955,10 +954,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
         }
       }
 
-      // 6. Run rule to try to remove projects on top of join operators
-      calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, 
mdProvider.getMetadataProvider(),
-              HepMatchOrder.BOTTOM_UP, HiveJoinCommuteRule.INSTANCE);
-
       // 7. Run rule to fix windowing issue when it is done over
       // aggregation columns (HIVE-10627)
       if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
@@ -1014,12 +1009,11 @@ public class CalcitePlanner extends SemanticAnalyzer {
      * @return
      */
     private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, 
RelMetadataProvider mdProvider) {
-
       // TODO: Decorelation of subquery should be done before attempting
       // Partition Pruning; otherwise Expression evaluation may try to execute
       // corelated sub query.
 
-      //0. Distinct aggregate rewrite
+      //1. Distinct aggregate rewrite
       // Run this optimization early, since it is expanding the operator 
pipeline.
       if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
           conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
@@ -1029,7 +1023,40 @@ public class CalcitePlanner extends SemanticAnalyzer {
         basePlan = hepPlan(basePlan, true, mdProvider, 
HiveExpandDistinctAggregatesRule.INSTANCE);
       }
 
-      // 1. Push down limit through outer join
+      // 2. Try factoring out common filter elements & separating deterministic
+      // vs non-deterministic UDF. This needs to run before PPD so that PPD can
+      // add on-clauses for old style Join Syntax
+      // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
+      // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
+      basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY,
+          HivePreFilteringRule.INSTANCE);
+
+      // 3. PPD for old Join Syntax
+      // NOTE: PPD needs to run before adding not null filters in order to
+      // support old style join syntax (so that on-clauses will get filled up).
+      // TODO: Add in ReduceExpressionrules (Constant folding) to below once
+      // HIVE-11927 is fixed.
+      basePlan = hepPlan(basePlan, true, mdProvider, new 
HiveFilterProjectTransposeRule(
+          Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+          HiveProject.DEFAULT_PROJECT_FACTORY), new 
HiveFilterSetOpTransposeRule(
+          HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN,
+          HiveFilterJoinRule.FILTER_ON_JOIN, new 
HiveFilterAggregateTransposeRule(Filter.class,
+              HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new 
FilterMergeRule(
+              HiveFilter.DEFAULT_FILTER_FACTORY));
+
+      // TODO: Transitive inference, constant prop & Predicate push down has to
+      // do multiple passes till no more inference is left
+      // Currently doing so would result in a spin. Just checking for if 
inferred
+      // pred is present below may not be sufficient as inferred & pushed pred
+      // could have been mutated by constant folding/prop
+      // 4. Transitive inference for join on clauses
+      basePlan = hepPlan(basePlan, true, mdProvider, new 
HiveJoinPushTransitivePredicatesRule(
+          Join.class, HiveFilter.DEFAULT_FILTER_FACTORY));
+
+      // 5. Push down limit through outer join
+      // NOTE: We run this after PPD to support old style join syntax.
+      // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and 
R1.y<10) or
+      // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10
       if 
(conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) {
         // This should be a cost based decision, but till we enable the 
extended cost
         // model, we will use the given value for the variable
@@ -1044,35 +1071,28 @@ public class CalcitePlanner extends SemanticAnalyzer {
             HiveProjectSortTransposeRule.INSTANCE);
       }
 
-      // 2. Push Down Semi Joins
+      // 6. Add not null filters
+      basePlan = hepPlan(basePlan, true, mdProvider, 
HiveJoinAddNotNullRule.INSTANCE);
+
+      // 7. Rerun Constant propagation and PPD now that we have added Not NULL 
filters & did transitive inference
+      // TODO: Add in ReduceExpressionrules (Constant folding) to below once
+      // HIVE-11927 is fixed.
+      basePlan = hepPlan(basePlan, true, mdProvider, new 
HiveFilterProjectTransposeRule(
+          Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+          HiveProject.DEFAULT_PROJECT_FACTORY), new 
HiveFilterSetOpTransposeRule(
+          HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN,
+          HiveFilterJoinRule.FILTER_ON_JOIN, new 
HiveFilterAggregateTransposeRule(Filter.class,
+              HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new 
FilterMergeRule(
+              HiveFilter.DEFAULT_FILTER_FACTORY));
+
+      // 8. Push Down Semi Joins
       basePlan = hepPlan(basePlan, true, mdProvider, 
SemiJoinJoinTransposeRule.INSTANCE,
           SemiJoinFilterTransposeRule.INSTANCE, 
SemiJoinProjectTransposeRule.INSTANCE);
 
-      // 3. Add not null filters
-      if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
-        basePlan = hepPlan(basePlan, true, mdProvider, 
HiveJoinAddNotNullRule.INSTANCE);
-      }
-
-      // 4. Constant propagation, common filter extraction, and PPD
-      basePlan = hepPlan(basePlan, true, mdProvider,
-          ReduceExpressionsRule.PROJECT_INSTANCE,
-          ReduceExpressionsRule.FILTER_INSTANCE,
-          ReduceExpressionsRule.JOIN_INSTANCE,
-          HivePreFilteringRule.INSTANCE,
-          new HiveFilterProjectTransposeRule(Filter.class, 
HiveFilter.DEFAULT_FILTER_FACTORY,
-                  HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY),
-          new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY),
-          HiveFilterJoinRule.JOIN,
-          HiveFilterJoinRule.FILTER_ON_JOIN,
-          new FilterAggregateTransposeRule(Filter.class,
-              HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class));
-
-      // 5. Transitive inference & Partition Pruning
-      basePlan = hepPlan(basePlan, false, mdProvider, new 
HiveJoinPushTransitivePredicatesRule(
-          Join.class, HiveFilter.DEFAULT_FILTER_FACTORY),
-          new HivePartitionPruneRule(conf));
-
-      // 6. Projection Pruning
+      // 9. Apply Partition Pruning
+      basePlan = hepPlan(basePlan, false, mdProvider, new 
HivePartitionPruneRule(conf));
+
+      // 10. Projection Pruning (this introduces select above TS & hence needs 
to be run last due to PP)
       HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
           cluster, HiveProject.DEFAULT_PROJECT_FACTORY,
           HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY,
@@ -1080,11 +1100,18 @@ public class CalcitePlanner extends SemanticAnalyzer {
           HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
       basePlan = fieldTrimmer.trim(basePlan);
 
-      // 7. Rerun PPD through Project as column pruning would have introduced 
DT
-      // above scans
-      basePlan = hepPlan(basePlan, true, mdProvider,
-          new FilterProjectTransposeRule(Filter.class, 
HiveFilter.DEFAULT_FILTER_FACTORY,
-              HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY));
+
+      // 11. Merge Project-Project if possible
+      basePlan = hepPlan(basePlan, false, mdProvider, new 
ProjectMergeRule(true,
+          HiveProject.DEFAULT_PROJECT_FACTORY));
+
+      // 12. Rerun PPD through Project as column pruning would have introduced
+      // DT above scans; By pushing filter just above TS, Hive can push it into
+      // storage (incase there are filters on non partition cols). This only
+      // matches FIL-PROJ-TS
+      basePlan = hepPlan(basePlan, true, mdProvider, new 
HiveFilterProjectTSTransposeRule(
+          Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+          HiveProject.DEFAULT_PROJECT_FACTORY, HiveTableScan.class));
 
       return basePlan;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index e291a48..0223038 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -487,6 +487,14 @@ public class ExprNodeDescUtils {
     return true;
   }
 
+  public static boolean isNullConstant(ExprNodeDesc value) {
+    if ((value instanceof ExprNodeConstantDesc)
+        && ((ExprNodeConstantDesc) value).getValue() == null) {
+      return true;
+    }
+    return false;
+  }
+
   public static PrimitiveTypeInfo deriveMinArgumentCast(
       ExprNodeDesc childExpr, TypeInfo targetType) {
     assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + 
targetType;

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q 
b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
index 799a66b..c017172 100644
--- a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
+++ b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
@@ -94,7 +94,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as 
a, c_int+1 as b, sum
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -152,7 +152,7 @@ select unionsrc.key, count(1) FROM (select 'max' as key, 
max(c_int) as value fro
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -280,7 +280,7 @@ select * from (select q, b, `//cbo_t2`.p, `c/b/o_t1`.c, 
`cbo_/t3////`.c_int from
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -310,7 +310,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as 
a, c_int+1 as b, sum
 
 select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, 
sum(c_int) as c from `c/b/o_t1` where (`c/b/o_t1`.c_int + 1 >= 0) and 
(`c/b/o_t1`.c_int > 0 or `c/b/o_t1`.c_float >= 0)  group by c_float, 
`c/b/o_t1`.c_int, key having `c/b/o_t1`.c_float > 0 and (c_int >=1 or c_float 
>= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc limit 5) `c/b/o_t1` 
left outer join (select key as p, c_int+1 as q, sum(c_int) as r from `//cbo_t2` 
where (`//cbo_t2`.c_int + 1 >= 0) and (`//cbo_t2`.c_int > 0 or 
`//cbo_t2`.c_float >= 0)  group by c_float, `//cbo_t2`.c_int, key  having 
`//cbo_t2`.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 
0 limit 5) `//cbo_t2` on `c/b/o_t1`.a=p left outer join `cbo_/t3////` on 
`c/b/o_t1`.a=key where (b + `//cbo_t2`.q >= 0) and (b > 0 or c_int >= 0) group 
by `cbo_/t3////`.c_int, c  having `cbo_/t3////`.c_int > 0 and (c_int >=1 or c 
>= 1) and (c_int + c) >= 0  order by `cbo_/t3////`.c_int % c asc, 
`cbo_/t3////`.c_int, c desc li
 mit 5;
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -342,7 +342,7 @@ select a, c, count(*)  from (select key as a, c_int+1 as b, 
sum(c_int) as c from
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -452,7 +452,7 @@ select key from `c/b/o_t1` where c_int = -6  or c_int = +6;
 
 select count(`c/b/o_t1`.dt) from `c/b/o_t1` join `//cbo_t2` on `c/b/o_t1`.dt  
= `//cbo_t2`.dt  where `c/b/o_t1`.dt = '2014' ;
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -470,7 +470,7 @@ select `c/b/o_t1`.value from `c/b/o_t1` join `//cbo_t2` on 
`c/b/o_t1`.key = `//c
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -602,7 +602,7 @@ from (select b.key, count(*)
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -712,7 +712,7 @@ having p_name in
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -872,7 +872,7 @@ having b.p_mfgr not in
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -910,7 +910,7 @@ select count(distinct c_int) as a, avg(c_float) from 
`c/b/o_t1` group by c_int o
 
 select count(distinct c_int) as a, avg(c_float) from `c/b/o_t1` group by 
c_float, c_int order by a;
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -936,7 +936,7 @@ select r2.key from (select key, c_int from (select key, 
c_int from `c/b/o_t1` un
 
 
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 
@@ -1026,7 +1026,7 @@ drop view v3;
 
 drop view v4;
 
-set hive.cbo.enable=true;
+set hive.cbo.enable=false;
 
 set hive.exec.check.crossproducts=false;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out 
b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
index cdecad1..e2e589c 100644
--- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
+++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
@@ -107,32 +107,32 @@ STAGE PLANS:
             Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 20 Data size: 2100 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 4200 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: b
             Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 15 Data size: 1583 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 29 Data size: 3062 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Join Operator
@@ -142,14 +142,14 @@ STAGE PLANS:
             0 _col0 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col0, _col1, _col4
-          Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: int), _col1 (type: string), _col4 (type: 
string)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -190,7 +190,7 @@ STAGE PLANS:
             Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE 
Column stats: NONE
               HashTable Sink Operator
                 keys:
                   0 key (type: int)
@@ -204,7 +204,7 @@ STAGE PLANS:
             Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE 
Column stats: NONE
               Map Join Operator
                 condition map:
                      Inner Join 0 to 1
@@ -212,14 +212,14 @@ STAGE PLANS:
                   0 key (type: int)
                   1 key (type: int)
                 outputColumnNames: _col0, _col1, _col7
-                Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), _col7 
(type: string)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 22 Data size: 2310 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 44 Data size: 4620 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 22 Data size: 2310 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 44 Data size: 4620 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out 
b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
index b2a7d89..f860011 100644
--- a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
+++ b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
@@ -77,7 +77,7 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
               HashTable Sink Operator
                 keys:
                   0 key (type: int)
@@ -91,7 +91,7 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE 
Column stats: NONE
               Map Join Operator
                 condition map:
                      Inner Join 0 to 1
@@ -99,14 +99,14 @@ STAGE PLANS:
                   0 key (type: int)
                   1 key (type: int)
                 outputColumnNames: _col0, _col1, _col5, _col6
-                Statistics: Num rows: 275 Data size: 2646 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5293 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), _col5 
(type: int), _col6 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
-                  Statistics: Num rows: 275 Data size: 2646 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5293 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 275 Data size: 2646 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 550 Data size: 5293 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out 
b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
index 216b037..c6a3567 100644
--- a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
+++ b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
@@ -86,24 +86,24 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: ((key + 1) is not null and (key < 100)) (type: 
boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
               Reduce Output Operator
                 key expressions: (key + 1) (type: double)
                 sort order: +
                 Map-reduce partition columns: (key + 1) (type: double)
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
                 value expressions: key (type: string), value (type: string)
           TableScan
             alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: UDFToDouble(key) is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Reduce Output Operator
                 key expressions: UDFToDouble(key) (type: double)
                 sort order: +
                 Map-reduce partition columns: UDFToDouble(key) (type: double)
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 value expressions: key (type: string), value (type: string)
       Reduce Operator Tree:
         Join Operator
@@ -113,13 +113,13 @@ STAGE PLANS:
             0 (key + 1) (type: double)
             1 UDFToDouble(key) (type: double)
           outputColumnNames: _col0, _col1, _col5, _col6
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: 2 (type: int), concat(_col0, _col1, _col5, _col6) 
(type: string), concat(_col0, _col1) (type: string), concat(_col5, _col6) 
(type: string), concat(_col0, _col1, _col5) (type: string), concat(_col0, 
_col5, _col6) (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
             UDTF Operator
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
               function name: stack
               Limit
                 Number of rows: 10

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/ambiguous_col.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ambiguous_col.q.out 
b/ql/src/test/results/clientpositive/ambiguous_col.q.out
index 7f04e89..1d1d6af 100644
--- a/ql/src/test/results/clientpositive/ambiguous_col.q.out
+++ b/ql/src/test/results/clientpositive/ambiguous_col.q.out
@@ -17,32 +17,32 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
           TableScan
             alias: src1
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -51,14 +51,14 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col0 (type: string), _col1 
(type: string)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -136,31 +136,31 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
           TableScan
             alias: src1
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -169,14 +169,14 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col0 (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -254,31 +254,31 @@ STAGE PLANS:
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
           TableScan
             alias: src1
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -287,14 +287,14 @@ STAGE PLANS:
             0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col0 (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

[50/51] [partial] hive git commit: HIVE-11110: Reorder applyPreJoinOrderingTransforms, add NotNULL/FilterMerge rules, improve Filter selectivity estimation (Laljo John Pullokkaran reviewed by Jesus Camacho Rodriguez, Ashutosh Chauhan

Reply via email to