[39/50] [abbrv] hive git commit: HIVE-11424 : Rule to transform OR clauses into IN clauses in CBO (Jesus Camacho Rodriguez via Ashutosh Chauhan)

jdere Mon, 04 Apr 2016 13:38:32 -0700

HIVE-11424 : Rule to transform OR clauses into IN clauses in CBO (Jesus Camacho 
Rodriguez via Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c8ff3f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c8ff3f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c8ff3f1

Branch: refs/heads/llap
Commit: 8c8ff3f144921e9b985abe51eb82ebad94195b4a
Parents: 09b00fc
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Tue Mar 22 23:41:00 2016 -0800
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Tue Mar 29 11:18:58 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   7 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   4 +-
 .../rules/HivePointLookupOptimizerRule.java     | 381 +++++++++++++++++++
 .../ql/optimizer/pcr/PcrExprProcFactory.java    | 103 ++---
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  40 +-
 .../clientpositive/auto_join19_inclause.q       |  18 +
 .../queries/clientpositive/filter_in_or_dup.q   |  19 +
 .../clientpositive/auto_join19_inclause.q.out   | 130 +++++++
 .../clientpositive/constprog_semijoin.q.out     |   4 +-
 .../dynpart_sort_optimization_acid.q.out        |   4 +-
 .../clientpositive/filter_in_or_dup.q.out       |  96 +++++
 .../results/clientpositive/perf/query13.q.out   |  14 +-
 .../results/clientpositive/perf/query27.q.out   |   2 +-
 .../results/clientpositive/perf/query34.q.out   |   2 +-
 .../results/clientpositive/perf/query48.q.out   |  14 +-
 .../results/clientpositive/perf/query68.q.out   |   2 +-
 .../results/clientpositive/perf/query73.q.out   |   2 +-
 .../results/clientpositive/perf/query79.q.out   |   2 +-
 .../results/clientpositive/perf/query82.q.out   |   2 +-
 .../results/clientpositive/perf/query85.q.out   |  26 +-
 .../results/clientpositive/pointlookup2.q.out   |  38 +-
 .../results/clientpositive/pointlookup3.q.out   |  50 ++-
 .../results/clientpositive/pointlookup4.q.out   |   2 +-
 .../spark/constprog_semijoin.q.out              |   4 +-
 .../clientpositive/tez/bucketpruning1.q.out     |   8 +-
 .../clientpositive/tez/constprog_semijoin.q.out |   4 +-
 .../tez/vector_mr_diff_schema_alias.q.out       |   2 +-
 .../vector_mr_diff_schema_alias.q.out           |   2 +-
 28 files changed, 824 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index b516925..56b96b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -1398,6 +1398,13 @@ public final class FunctionRegistry {
   }
 
   /**
+   * Returns whether the exprNodeDesc is a node of "in".
+   */
+  public static boolean isIn(ExprNodeDesc desc) {
+    return GenericUDFIn.class == getGenericUDFClassFromExprDesc(desc);
+  }
+
+  /**
    * Returns whether the exprNodeDesc is a node of "not".
    */
   public static boolean isOpNot(ExprNodeDesc desc) {

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index f56cd96..55c71dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -23,7 +23,6 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import 
org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
 import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
@@ -83,7 +82,8 @@ public class Optimizer {
     }
 
     // Try to transform OR predicates in Filter into simpler IN clauses first
-    if (HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+    if (HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) &&
+            !pctx.getContext().isCboSucceeded()) {
       final int min = HiveConf.getIntVar(hiveConf,
           HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
       transformations.add(new PointLookupOptimizer(min));

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
new file mode 100644
index 0000000..9609a1e
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
@@ -0,0 +1,381 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+/**
+ * This optimization will take a Filter expression, and if its predicate 
contains
+ * an OR operator whose children are constant equality expressions, it will try
+ * to generate an IN clause (which is more efficient). If the OR operator 
contains
+ * AND operator children, the optimization might generate an IN clause that 
uses
+ * structs.
+ */
+public class HivePointLookupOptimizerRule extends RelOptRule {
+
+  protected static final Log LOG = 
LogFactory.getLog(HivePointLookupOptimizerRule.class);
+
+
+  // Minimum number of OR clauses needed to transform into IN clauses
+  private final int min;
+
+  public HivePointLookupOptimizerRule(int min) {
+    super(operand(Filter.class, any()));
+    this.min = min;
+  }
+
+  public void onMatch(RelOptRuleCall call) {
+    final Filter filter = call.rel(0);
+
+    final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
+
+    final RexNode condition = RexUtil.pullFactors(rexBuilder, 
filter.getCondition());
+
+    // 1. We try to transform possible candidates
+    RexTransformIntoInClause transformIntoInClause = new 
RexTransformIntoInClause(rexBuilder, filter, min);
+    RexNode newCondition = transformIntoInClause.apply(condition);
+
+    // 2. We merge IN expressions
+    RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder);
+    newCondition = mergeInClause.apply(newCondition);
+
+    // 3. If we could not transform anything, we bail out
+    if (newCondition.toString().equals(condition.toString())) {
+      return;
+    }
+
+    // 4. We create the filter with the new condition
+    RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), 
newCondition);
+
+    call.transformTo(newFilter);
+  }
+
+
+  /**
+   * Transforms OR clauses into IN clauses, when possible.
+   */
+  protected static class RexTransformIntoInClause extends RexShuttle {
+    private final RexBuilder rexBuilder;
+    private final Filter filterOp;
+    private final int min;
+
+    RexTransformIntoInClause(RexBuilder rexBuilder, Filter filterOp, int min) {
+      this.filterOp = filterOp;
+      this.rexBuilder = rexBuilder;
+      this.min = min;
+    }
+
+    @Override public RexNode visitCall(RexCall call) {
+      RexNode node;
+      switch (call.getKind()) {
+        case AND:
+          ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) 
call).getOperands());
+          List<RexNode> newOperands = new ArrayList<RexNode>();
+          for (RexNode operand: operands) {
+            RexNode newOperand;
+            if (operand.getKind() == SqlKind.OR) {
+              try {
+                newOperand = transformIntoInClauseCondition(rexBuilder,
+                        filterOp.getRowType(), operand, min);
+                if (newOperand == null) {
+                  return call;
+                }
+              } catch (SemanticException e) {
+                LOG.error("Exception in HivePointLookupOptimizerRule", e);
+                return call;
+              }
+            } else {
+              newOperand = operand;
+            }
+            newOperands.add(newOperand);
+          }
+          node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
+          break;
+        case OR:
+          try {
+            node = transformIntoInClauseCondition(rexBuilder,
+                    filterOp.getRowType(), call, min);
+            if (node == null) {
+              return call;
+            }
+          } catch (SemanticException e) {
+            LOG.error("Exception in HivePointLookupOptimizerRule", e);
+            return call;
+          }
+          break;
+        default:
+          return super.visitCall(call);
+      }
+      return node;
+    }
+
+    private static RexNode transformIntoInClauseCondition(RexBuilder 
rexBuilder, RelDataType inputSchema,
+            RexNode condition, int min) throws SemanticException {
+      assert condition.getKind() == SqlKind.OR;
+
+      // 1. We extract the information necessary to create the predicate for 
the new
+      //    filter
+      ListMultimap<RexInputRef,RexLiteral> columnConstantsMap = 
ArrayListMultimap.create();
+      ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) 
condition).getOperands());
+      if (operands.size() < min) {
+        // We bail out
+        return null;
+      }
+      for (int i = 0; i < operands.size(); i++) {
+        RexNode operand = operands.get(i);
+
+        final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand);
+        final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
+
+        for (RexNode conjunction: conjunctions) {
+          // 1.1. If it is not a RexCall, we bail out
+          if (!(conjunction instanceof RexCall)) {
+            return null;
+          }
+          // 1.2. We extract the information that we need
+          RexCall conjCall = (RexCall) conjunction;
+          if(conjCall.getOperator().getKind() == SqlKind.EQUALS) {
+            if (conjCall.operands.get(0) instanceof RexInputRef &&
+                    conjCall.operands.get(1) instanceof RexLiteral) {
+              RexInputRef ref = (RexInputRef) conjCall.operands.get(0);
+              RexLiteral literal = (RexLiteral) conjCall.operands.get(1);
+              columnConstantsMap.put(ref, literal);
+              if (columnConstantsMap.get(ref).size() != i+1) {
+                // If we have not added to this column before, we bail out
+                return null;
+              }
+            } else if (conjCall.operands.get(1) instanceof RexInputRef &&
+                    conjCall.operands.get(0) instanceof RexLiteral) {
+              RexInputRef ref = (RexInputRef) conjCall.operands.get(1);
+              RexLiteral literal = (RexLiteral) conjCall.operands.get(0);
+              columnConstantsMap.put(ref, literal);
+              if (columnConstantsMap.get(ref).size() != i+1) {
+                // If we have not added to this column before, we bail out
+                return null;
+              }
+            } else {
+              // Bail out
+              return null;
+            }
+          } else {
+            return null;
+          }
+        }
+      }
+
+      // 3. We build the new predicate and return it
+      List<RexNode> newOperands = new ArrayList<RexNode>(operands.size());
+      // 3.1 Create structs
+      List<RexInputRef> columns = new ArrayList<RexInputRef>();
+      List<String> names = new ArrayList<String>();
+      ImmutableList.Builder<RelDataType> paramsTypes = ImmutableList.builder();
+      List<TypeInfo> structReturnType = new ArrayList<TypeInfo>();
+      ImmutableList.Builder<RelDataType> newOperandsTypes = 
ImmutableList.builder();
+      for (int i = 0; i < operands.size(); i++) {
+        List<RexLiteral> constantFields = new 
ArrayList<RexLiteral>(operands.size());
+
+        for (RexInputRef ref : columnConstantsMap.keySet()) {
+          // If any of the elements was not referenced by every operand, we 
bail out
+          if (columnConstantsMap.get(ref).size() <= i) {
+            return null;
+          }
+          RexLiteral columnConstant = columnConstantsMap.get(ref).get(i);
+          if (i == 0) {
+            columns.add(ref);
+            names.add(inputSchema.getFieldNames().get(ref.getIndex()));
+            paramsTypes.add(ref.getType());
+            structReturnType.add(TypeConverter.convert(ref.getType()));
+          }
+          constantFields.add(columnConstant);
+        }
+
+        if (i == 0) {
+          RexNode columnsRefs;
+          if (columns.size() == 1) {
+            columnsRefs = columns.get(0);
+          } else {
+            // Create STRUCT clause
+            columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, 
columns);
+          }
+          newOperands.add(columnsRefs);
+          newOperandsTypes.add(columnsRefs.getType());
+        }
+        RexNode values;
+        if (constantFields.size() == 1) {
+          values = constantFields.get(0);
+        } else {
+          // Create STRUCT clause
+          values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, 
constantFields);
+        }
+        newOperands.add(values);
+        newOperandsTypes.add(values.getType());
+      }
+
+      // 4. Create and return IN clause
+      return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands);
+    }
+
+  }
+
+  /**
+   * Merge IN clauses, when possible.
+   */
+  protected static class RexMergeInClause extends RexShuttle {
+    private final RexBuilder rexBuilder;
+
+    RexMergeInClause(RexBuilder rexBuilder) {
+      this.rexBuilder = rexBuilder;
+    }
+
+    @Override public RexNode visitCall(RexCall call) {
+      RexNode node;
+      final List<RexNode> operands;
+      final List<RexNode> newOperands;
+      Map<String,RexNode> stringToExpr = Maps.newHashMap();
+      Multimap<String,String> inLHSExprToRHSExprs = 
LinkedHashMultimap.create();
+      switch (call.getKind()) {
+        case AND:
+          // IN clauses need to be combined by keeping only common elements
+          operands = Lists.newArrayList(RexUtil.flattenAnd(((RexCall) 
call).getOperands()));
+          for (int i = 0; i < operands.size(); i++) {
+            RexNode operand = operands.get(i);
+            if (operand.getKind() == SqlKind.IN) {
+              RexCall inCall = (RexCall) operand;
+              if 
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+                continue;
+              }
+              String ref = inCall.getOperands().get(0).toString();
+              stringToExpr.put(ref, inCall.getOperands().get(0));
+              if (inLHSExprToRHSExprs.containsKey(ref)) {
+                Set<String> expressions = Sets.newHashSet();
+                for (int j = 1; j < inCall.getOperands().size(); j++) {
+                  String expr = inCall.getOperands().get(j).toString();
+                  expressions.add(expr);
+                  stringToExpr.put(expr, inCall.getOperands().get(j));
+                }
+                inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+              } else {
+                for (int j = 1; j < inCall.getOperands().size(); j++) {
+                  String expr = inCall.getOperands().get(j).toString();
+                  inLHSExprToRHSExprs.put(ref, expr);
+                  stringToExpr.put(expr, inCall.getOperands().get(j));
+                }
+              }
+              operands.remove(i);
+              --i;
+            }
+          }
+          // Create IN clauses
+          newOperands = createInClauses(rexBuilder, stringToExpr, 
inLHSExprToRHSExprs);
+          newOperands.addAll(operands);
+          // Return node
+          node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
+          break;
+        case OR:
+          // IN clauses need to be combined by keeping all elements
+          operands = Lists.newArrayList(RexUtil.flattenOr(((RexCall) 
call).getOperands()));
+          for (int i = 0; i < operands.size(); i++) {
+            RexNode operand = operands.get(i);
+            if (operand.getKind() == SqlKind.IN) {
+              RexCall inCall = (RexCall) operand;
+              if 
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+                continue;
+              }
+              String ref = inCall.getOperands().get(0).toString();
+              stringToExpr.put(ref, inCall.getOperands().get(0));
+              for (int j = 1; j < inCall.getOperands().size(); j++) {
+                String expr = inCall.getOperands().get(j).toString();
+                inLHSExprToRHSExprs.put(ref, expr);
+                stringToExpr.put(expr, inCall.getOperands().get(j));
+              }
+              operands.remove(i);
+              --i;
+            }
+          }
+          // Create IN clauses
+          newOperands = createInClauses(rexBuilder, stringToExpr, 
inLHSExprToRHSExprs);
+          newOperands.addAll(operands);
+          // Return node
+          node = RexUtil.composeDisjunction(rexBuilder, newOperands, false);
+          break;
+        default:
+          return super.visitCall(call);
+      }
+      return node;
+    }
+
+    private static List<RexNode> createInClauses(RexBuilder rexBuilder, 
Map<String, RexNode> stringToExpr,
+            Multimap<String, String> inLHSExprToRHSExprs) {
+      List<RexNode> newExpressions = Lists.newArrayList();
+      for (Entry<String,Collection<String>> entry : 
inLHSExprToRHSExprs.asMap().entrySet()) {
+        String ref = entry.getKey();
+        Collection<String> exprs = entry.getValue();
+        if (exprs.isEmpty()) {
+          newExpressions.add(rexBuilder.makeLiteral(false));
+        } else {
+          List<RexNode> newOperands = new ArrayList<RexNode>(exprs.size() + 1);
+          newOperands.add(stringToExpr.get(ref));
+          for (String expr : exprs) {
+            newOperands.add(stringToExpr.get(expr));
+          }
+          newExpressions.add(rexBuilder.makeCall(HiveIn.INSTANCE, 
newOperands));
+        }
+      }
+      return newExpressions;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index 9cc9ea9..9911179 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -25,8 +25,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -49,13 +47,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Expression processor factory for partition condition removing. Each 
processor tries to
@@ -368,50 +365,66 @@ public final class PcrExprProcFactory {
           return getResultWrapFromResults(results, fd, newNodeOutputs);
         }
         return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, 
newNodeOutputs));
-      } else if (fd.getGenericUDF() instanceof GenericUDFIn) {
-          List<ExprNodeDesc> children = fd.getChildren();
-          boolean removePredElem = false;
-          ExprNodeDesc lhs = children.get(0);
-
-          if (lhs instanceof ExprNodeGenericFuncDesc) {
-              // Make sure that the generic udf is deterministic
-              if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) 
lhs)
-                  .getGenericUDF())) {
-                boolean hasOnlyPartCols = true;
-                boolean hasDynamicListDesc = false;
-
-                for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) 
lhs).getChildren()) {
-                  // Check if the current field expression contains only
-                  // partition column or a virtual column or constants.
-                  // If yes, this filter predicate is a candidate for this 
optimization.
-                  if (!(ed instanceof ExprNodeColumnDesc &&
-                       
((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
-                    hasOnlyPartCols = false;
-                    break;
-                  }
-                }
+      } else if (FunctionRegistry.isIn(fd)) {
+        List<ExprNodeDesc> children = fd.getChildren();
+        boolean removePredElem = false;
+        ExprNodeDesc lhs = children.get(0);
+
+        if (lhs instanceof ExprNodeColumnDesc) {
+          // It is an IN clause on a column
+          if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) {
+            // It is a partition column, we can proceed
+            removePredElem = true;
+          }
+          if (removePredElem) {
+            // We should not remove the dynamic partition pruner generated 
synthetic predicates.
+            for (int i = 1; i < children.size(); i++) {
+              if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+                removePredElem = false;
+                break;
+              }
+            }
+          }
+        } else if (lhs instanceof ExprNodeGenericFuncDesc) {
+          // It is an IN clause on a struct
+          // Make sure that the generic udf is deterministic
+          if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs)
+              .getGenericUDF())) {
+            boolean hasOnlyPartCols = true;
+            boolean hasDynamicListDesc = false;
+
+            for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) 
lhs).getChildren()) {
+              // Check if the current field expression contains only
+              // partition column or a virtual column or constants.
+              // If yes, this filter predicate is a candidate for this 
optimization.
+              if (!(ed instanceof ExprNodeColumnDesc &&
+                   ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
+                hasOnlyPartCols = false;
+                break;
+              }
+            }
 
-                // If we have non-partition columns, we cannot remove the 
predicate.
-                if (hasOnlyPartCols) {
-                  // We should not remove the dynamic partition pruner 
generated synthetic predicates.
-                  for (int i = 1; i < children.size(); i++) {
-                    if (children.get(i) instanceof ExprNodeDynamicListDesc) {
-                      hasDynamicListDesc = true;
-                      break;
-                    }
-                  }
+            // If we have non-partition columns, we cannot remove the 
predicate.
+            if (hasOnlyPartCols) {
+              // We should not remove the dynamic partition pruner generated 
synthetic predicates.
+              for (int i = 1; i < children.size(); i++) {
+                if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+                  hasDynamicListDesc = true;
+                  break;
                 }
-
-                removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
               }
+            }
+
+            removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
           }
+        }
 
-          // If removePredElem is set to true, return true as this is a 
potential candidate
-          //  for partition condition remover. Else, set the WalkState for 
this node to unknown.
-          return removePredElem ?
-            new NodeInfoWrapper(WalkState.TRUE, null,
-            new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
-            new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, 
nodeOutputs)) ;
+        // If removePredElem is set to true, return true as this is a 
potential candidate
+        // for partition condition remover. Else, set the WalkState for this 
node to unknown.
+        return removePredElem ?
+          new NodeInfoWrapper(WalkState.TRUE, null,
+          new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
+          new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, 
nodeOutputs)) ;
       } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
         // If it's a non-deterministic UDF, set unknown to true
         return new NodeInfoWrapper(WalkState.UNKNOWN, null,

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index fd2246b..b59347d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -153,6 +153,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTranspos
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
@@ -1138,23 +1139,32 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 3. Run exhaustive PPD, add not null filters, transitive inference,
       // constant propagation, constant folding
+      List<RelOptRule> rules = Lists.newArrayList();
+      if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) {
+        
rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING);
+      } else {
+        rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC);
+      }
+      rules.add(HiveFilterSetOpTransposeRule.INSTANCE);
+      rules.add(HiveFilterSortTransposeRule.INSTANCE);
+      rules.add(HiveFilterJoinRule.JOIN);
+      rules.add(HiveFilterJoinRule.FILTER_ON_JOIN);
+      rules.add(new HiveFilterAggregateTransposeRule(Filter.class, 
HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class));
+      rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY));
+      rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
+      rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
+      rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
+      if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+        final int min = 
conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+        rules.add(new HivePointLookupOptimizerRule(min));
+      }
+      rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
+      rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
+      rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN);
+      rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
       perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, 
HepMatchOrder.BOTTOM_UP,
-          conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? 
HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING
-              : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC,
-          HiveFilterSetOpTransposeRule.INSTANCE,
-          HiveFilterSortTransposeRule.INSTANCE,
-          HiveFilterJoinRule.JOIN,
-          HiveFilterJoinRule.FILTER_ON_JOIN,
-          new HiveFilterAggregateTransposeRule(Filter.class, 
HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class),
-          new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY),
-          HiveReduceExpressionsRule.PROJECT_INSTANCE,
-          HiveReduceExpressionsRule.FILTER_INSTANCE,
-          HiveReduceExpressionsRule.JOIN_INSTANCE,
-          HiveJoinAddNotNullRule.INSTANCE_JOIN,
-          HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN,
-          HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN,
-          HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
+              rules.toArray(new RelOptRule[rules.size()]));
       perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
         "Calcite: Prejoin ordering transformation, PPD, not null predicates, 
transitive inference, constant folding");
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/auto_join19_inclause.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_join19_inclause.q 
b/ql/src/test/queries/clientpositive/auto_join19_inclause.q
new file mode 100644
index 0000000..7773289
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/auto_join19_inclause.q
@@ -0,0 +1,18 @@
+set hive.mapred.mode=nonstrict;
+set hive.auto.convert.join = true;
+set hive.optimize.point.lookup.min=2;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11');
+
+
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11');
+
+
+SELECT sum(hash(dest1.key,dest1.value)) FROM dest1;

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/filter_in_or_dup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/filter_in_or_dup.q 
b/ql/src/test/queries/clientpositive/filter_in_or_dup.q
new file mode 100644
index 0000000..34a5139
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/filter_in_or_dup.q
@@ -0,0 +1,19 @@
+set hive.optimize.point.lookup.min=2;
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2');
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3');
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2');

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out 
b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
new file mode 100644
index 0000000..3f70055
--- /dev/null
+++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
@@ -0,0 +1,130 @@
+PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-4 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:src2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:src2 
+          TableScan
+            alias: src2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0, _col4
+                  Statistics: Num rows: 2200 Data size: 23372 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: UDFToInteger(_col0) (type: int), _col4 (type: 
string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2200 Data size: 23372 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.dest1
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value 
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' 
or src1.hr = '11')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+407444119660

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_semijoin.q.out 
b/ql/src/test/results/clientpositive/constprog_semijoin.q.out
index 0e0e883..940a148 100644
--- a/ql/src/test/results/clientpositive/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/constprog_semijoin.q.out
@@ -502,7 +502,7 @@ STAGE PLANS:
             alias: table1
             Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: ((((dimid = 100) = true) and (dimid) IN (100, 200)) 
and (dimid = 100) is not null) (type: boolean)
+              predicate: (((dimid) IN (100, 200) and ((dimid = 100) = true)) 
and (dimid = 100) is not null) (type: boolean)
               Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: id (type: int), val (type: string), val1 (type: 
string), dimid (type: int)
@@ -518,7 +518,7 @@ STAGE PLANS:
             alias: table3
             Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column 
stats: NONE
             Filter Operator
-              predicate: ((((id = 100) = true) and (id) IN (100, 200)) and (id 
= 100) is not null) (type: boolean)
+              predicate: (((id) IN (100, 200) and ((id = 100) = true)) and (id 
= 100) is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: id (type: int), (id = 100) (type: boolean)

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index ddb05e2..eca29df 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
           TableScan
             alias: acid
             Filter Operator
-              predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: 
boolean)
+              predicate: (key = 'foo') (type: boolean)
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
                 outputColumnNames: _col0, _col3
@@ -390,7 +390,7 @@ STAGE PLANS:
           TableScan
             alias: acid
             Filter Operator
-              predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: 
boolean)
+              predicate: (key = 'foo') (type: boolean)
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
                 outputColumnNames: _col0, _col3

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out 
b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
new file mode 100644
index 0000000..f863ac3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
@@ -0,0 +1,96 @@
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: f
+          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
+          Filter Operator
+            predicate: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: f
+          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
+          Filter Operator
+            predicate: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: f
+          Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column 
stats: NONE
+          Filter Operator
+            predicate: (key) IN ('1', '2') (type: boolean)
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query13.q.out 
b/ql/src/test/results/clientpositive/perf/query13.q.out
index cc40e79..ad50576 100644
--- a/ql/src/test/results/clientpositive/perf/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/query13.q.out
@@ -128,7 +128,7 @@ Stage-0
           SHUFFLE [RS_39]
             Group By Operator [GBY_38] (rows=1 width=112)
               
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col5)","avg(_col7)","avg(_col8)","sum(_col8)"]
-              Merge Join Operator [MERGEJOIN_73] (rows=18150000 width=1014)
+              Merge Join Operator [MERGEJOIN_73] (rows=9075000 width=1014)
                 
Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col5","_col7","_col8"]
               <-Map 12 [SIMPLE_EDGE]
                 SHUFFLE [RS_35]
@@ -142,19 +142,19 @@ Stage-0
               <-Reducer 5 [SIMPLE_EDGE]
                 SHUFFLE [RS_34]
                   PartitionCols:_col0
-                  Select Operator [SEL_30] (rows=16500000 width=1014)
+                  Select Operator [SEL_30] (rows=8250000 width=1014)
                     Output:["_col0","_col5","_col7","_col8"]
-                    Filter Operator [FIL_29] (rows=16500000 width=1014)
+                    Filter Operator [FIL_29] (rows=8250000 width=1014)
                       predicate:(((_col17) IN ('KY', 'GA', 'NM') and _col9 
BETWEEN 100 AND 200) or ((_col17) IN ('MT', 'OR', 'IN') and _col9 BETWEEN 150 
AND 300) or ((_col17) IN ('WI', 'MO', 'WV') and _col9 BETWEEN 50 AND 250))
-                      Merge Join Operator [MERGEJOIN_72] (rows=22000000 
width=1014)
+                      Merge Join Operator [MERGEJOIN_72] (rows=11000000 
width=1014)
                         
Conds:RS_26._col3=RS_27._col0(Inner),Output:["_col0","_col5","_col7","_col8","_col9","_col17"]
                       <-Map 11 [SIMPLE_EDGE]
                         SHUFFLE [RS_27]
                           PartitionCols:_col0
-                          Select Operator [SEL_25] (rows=20000000 width=1014)
+                          Select Operator [SEL_25] (rows=10000000 width=1014)
                             Output:["_col0","_col1"]
-                            Filter Operator [FIL_67] (rows=20000000 width=1014)
-                              predicate:((((ca_state) IN ('KY', 'GA', 'NM') or 
(ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and 
(ca_country = 'United States')) and ca_address_sk is not null)
+                            Filter Operator [FIL_67] (rows=10000000 width=1014)
+                              predicate:(((ca_state) IN ('KY', 'GA', 'NM', 
'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and 
ca_address_sk is not null)
                               TableScan [TS_23] (rows=40000000 width=1014)
                                 
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
                       <-Reducer 4 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query27.q.out 
b/ql/src/test/results/clientpositive/perf/query27.q.out
index 635c402..3a32d7b 100644
--- a/ql/src/test/results/clientpositive/perf/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/query27.q.out
@@ -57,7 +57,7 @@ Stage-0
                                 Select Operator [SEL_11] (rows=852 width=1910)
                                   Output:["_col0","_col1"]
                                   Filter Operator [FIL_53] (rows=852 
width=1910)
-                                    predicate:((s_state) IN ('KS', 'AL', 'MN', 
'AL', 'SC', 'VT') and s_store_sk is not null)
+                                    predicate:((s_state) IN ('KS', 'AL', 'MN', 
'SC', 'VT') and s_store_sk is not null)
                                     TableScan [TS_9] (rows=1704 width=1910)
                                       
default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
                             <-Reducer 3 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query34.q.out 
b/ql/src/test/results/clientpositive/perf/query34.q.out
index 6fa6985..a08c3ff 100644
--- a/ql/src/test/results/clientpositive/perf/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/query34.q.out
@@ -94,7 +94,7 @@ Stage-0
                                         Select Operator [SEL_5] (rows=36524 
width=1119)
                                           Output:["_col0"]
                                           Filter Operator [FIL_53] (rows=36524 
width=1119)
-                                            predicate:(((d_dom BETWEEN 1 AND 3 
or d_dom BETWEEN 25 AND 28) and (d_year) IN (1998, 1999, 2000)) and d_date_sk 
is not null)
+                                            predicate:(((d_year) IN (1998, 
1999, 2000) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28)) and 
d_date_sk is not null)
                                             TableScan [TS_3] (rows=73049 
width=1119)
                                               
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query48.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query48.q.out 
b/ql/src/test/results/clientpositive/perf/query48.q.out
index 691f5ad..d536bb5 100644
--- a/ql/src/test/results/clientpositive/perf/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/query48.q.out
@@ -23,7 +23,7 @@ Stage-0
           SHUFFLE [RS_31]
             Group By Operator [GBY_30] (rows=1 width=8)
               Output:["_col0"],aggregations:["sum(_col4)"]
-              Merge Join Operator [MERGEJOIN_57] (rows=18150000 width=1014)
+              Merge Join Operator [MERGEJOIN_57] (rows=9075000 width=1014)
                 Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col4"]
               <-Map 10 [SIMPLE_EDGE]
                 SHUFFLE [RS_27]
@@ -37,19 +37,19 @@ Stage-0
               <-Reducer 4 [SIMPLE_EDGE]
                 SHUFFLE [RS_26]
                   PartitionCols:_col0
-                  Select Operator [SEL_22] (rows=16500000 width=1014)
+                  Select Operator [SEL_22] (rows=8250000 width=1014)
                     Output:["_col0","_col4"]
-                    Filter Operator [FIL_21] (rows=16500000 width=1014)
+                    Filter Operator [FIL_21] (rows=8250000 width=1014)
                       predicate:(((_col12) IN ('KY', 'GA', 'NM') and _col6 
BETWEEN 0 AND 2000) or ((_col12) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 
AND 3000) or ((_col12) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 25000))
-                      Merge Join Operator [MERGEJOIN_56] (rows=22000000 
width=1014)
+                      Merge Join Operator [MERGEJOIN_56] (rows=11000000 
width=1014)
                         
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col0","_col4","_col6","_col12"]
                       <-Map 9 [SIMPLE_EDGE]
                         SHUFFLE [RS_19]
                           PartitionCols:_col0
-                          Select Operator [SEL_11] (rows=20000000 width=1014)
+                          Select Operator [SEL_11] (rows=10000000 width=1014)
                             Output:["_col0","_col1"]
-                            Filter Operator [FIL_52] (rows=20000000 width=1014)
-                              predicate:((((ca_state) IN ('KY', 'GA', 'NM') or 
(ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and 
(ca_country = 'United States')) and ca_address_sk is not null)
+                            Filter Operator [FIL_52] (rows=10000000 width=1014)
+                              predicate:(((ca_state) IN ('KY', 'GA', 'NM', 
'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and 
ca_address_sk is not null)
                               TableScan [TS_9] (rows=40000000 width=1014)
                                 
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
                       <-Reducer 3 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query68.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query68.q.out 
b/ql/src/test/results/clientpositive/perf/query68.q.out
index 7828cfc..38e4644 100644
--- a/ql/src/test/results/clientpositive/perf/query68.q.out
+++ b/ql/src/test/results/clientpositive/perf/query68.q.out
@@ -128,7 +128,7 @@ Stage-0
                                                     Select Operator [SEL_5] 
(rows=18262 width=1119)
                                                       Output:["_col0"]
                                                       Filter Operator [FIL_79] 
(rows=18262 width=1119)
-                                                        predicate:((d_dom 
BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+                                                        predicate:(((d_year) 
IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null)
                                                         TableScan [TS_3] 
(rows=73049 width=1119)
                                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query73.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query73.q.out 
b/ql/src/test/results/clientpositive/perf/query73.q.out
index e367f51..cf3a75e 100644
--- a/ql/src/test/results/clientpositive/perf/query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/query73.q.out
@@ -94,7 +94,7 @@ Stage-0
                                         Select Operator [SEL_5] (rows=18262 
width=1119)
                                           Output:["_col0"]
                                           Filter Operator [FIL_53] (rows=18262 
width=1119)
-                                            predicate:((d_dom BETWEEN 1 AND 2 
and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+                                            predicate:(((d_year) IN (1998, 
1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null)
                                             TableScan [TS_3] (rows=73049 
width=1119)
                                               
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query79.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query79.q.out 
b/ql/src/test/results/clientpositive/perf/query79.q.out
index fdc5773..bf537b9 100644
--- a/ql/src/test/results/clientpositive/perf/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/query79.q.out
@@ -96,7 +96,7 @@ Stage-0
                                           Select Operator [SEL_5] (rows=18262 
width=1119)
                                             Output:["_col0"]
                                             Filter Operator [FIL_53] 
(rows=18262 width=1119)
-                                              predicate:(((d_dow = 1) and 
(d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+                                              predicate:(((d_year) IN (1998, 
1999, 2000) and (d_dow = 1)) and d_date_sk is not null)
                                               TableScan [TS_3] (rows=73049 
width=1119)
                                                 
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query82.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out 
b/ql/src/test/results/clientpositive/perf/query82.q.out
index 2461644..57a50c7 100644
--- a/ql/src/test/results/clientpositive/perf/query82.q.out
+++ b/ql/src/test/results/clientpositive/perf/query82.q.out
@@ -51,7 +51,7 @@ Stage-0
                             Select Operator [SEL_2] (rows=115500 width=1436)
                               Output:["_col0","_col1","_col2","_col3"]
                               Filter Operator [FIL_38] (rows=115500 width=1436)
-                                predicate:((i_current_price BETWEEN 30 AND 60 
and (i_manufact_id) IN (437, 129, 727, 663)) and i_item_sk is not null)
+                                predicate:(((i_manufact_id) IN (437, 129, 727, 
663) and i_current_price BETWEEN 30 AND 60) and i_item_sk is not null)
                                 TableScan [TS_0] (rows=462000 width=1436)
                                   
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
                         <-Map 6 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query85.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out 
b/ql/src/test/results/clientpositive/perf/query85.q.out
index 72ac500..93b5f4e 100644
--- a/ql/src/test/results/clientpositive/perf/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/query85.q.out
@@ -23,22 +23,22 @@ Stage-0
       File Output Operator [FS_57]
         Limit [LIM_56] (rows=100 width=1014)
           Number of rows:100
-          Select Operator [SEL_55] (rows=9982500 width=1014)
+          Select Operator [SEL_55] (rows=4991250 width=1014)
             Output:["_col0","_col1","_col2","_col3"]
           <-Reducer 9 [SIMPLE_EDGE]
             SHUFFLE [RS_54]
-              Select Operator [SEL_53] (rows=9982500 width=1014)
+              Select Operator [SEL_53] (rows=4991250 width=1014)
                 Output:["_col0","_col1","_col2","_col3"]
-                Group By Operator [GBY_52] (rows=9982500 width=1014)
+                Group By Operator [GBY_52] (rows=4991250 width=1014)
                   
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)"],keys:KEY._col0
                 <-Reducer 8 [SIMPLE_EDGE]
                   SHUFFLE [RS_51]
                     PartitionCols:_col0
-                    Group By Operator [GBY_50] (rows=19965000 width=1014)
+                    Group By Operator [GBY_50] (rows=9982500 width=1014)
                       
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col4)","avg(_col14)","avg(_col13)"],keys:_col28
-                      Select Operator [SEL_49] (rows=19965000 width=1014)
+                      Select Operator [SEL_49] (rows=9982500 width=1014)
                         Output:["_col28","_col4","_col14","_col13"]
-                        Merge Join Operator [MERGEJOIN_107] (rows=19965000 
width=1014)
+                        Merge Join Operator [MERGEJOIN_107] (rows=9982500 
width=1014)
                           
Conds:RS_46._col11=RS_47._col0(Inner),Output:["_col4","_col13","_col14","_col28"]
                         <-Map 17 [SIMPLE_EDGE]
                           SHUFFLE [RS_47]
@@ -52,7 +52,7 @@ Stage-0
                         <-Reducer 7 [SIMPLE_EDGE]
                           SHUFFLE [RS_46]
                             PartitionCols:_col11
-                            Merge Join Operator [MERGEJOIN_106] (rows=18150000 
width=1014)
+                            Merge Join Operator [MERGEJOIN_106] (rows=9075000 
width=1014)
                               
Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col4","_col11","_col13","_col14"]
                             <-Map 16 [SIMPLE_EDGE]
                               SHUFFLE [RS_44]
@@ -66,19 +66,19 @@ Stage-0
                             <-Reducer 6 [SIMPLE_EDGE]
                               SHUFFLE [RS_43]
                                 PartitionCols:_col0
-                                Select Operator [SEL_36] (rows=16500000 
width=1014)
+                                Select Operator [SEL_36] (rows=8250000 
width=1014)
                                   
Output:["_col0","_col11","_col13","_col14","_col4"]
-                                  Filter Operator [FIL_35] (rows=16500000 
width=1014)
+                                  Filter Operator [FIL_35] (rows=8250000 
width=1014)
                                     predicate:(((_col23) IN ('KY', 'GA', 'NM') 
and _col6 BETWEEN 100 AND 200) or ((_col23) IN ('MT', 'OR', 'IN') and _col6 
BETWEEN 150 AND 300) or ((_col23) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 
AND 250))
-                                    Merge Join Operator [MERGEJOIN_105] 
(rows=22000000 width=1014)
+                                    Merge Join Operator [MERGEJOIN_105] 
(rows=11000000 width=1014)
                                       
Conds:RS_32._col9=RS_33._col0(Inner),Output:["_col0","_col4","_col6","_col11","_col13","_col14","_col23"]
                                     <-Map 15 [SIMPLE_EDGE]
                                       SHUFFLE [RS_33]
                                         PartitionCols:_col0
-                                        Select Operator [SEL_28] 
(rows=20000000 width=1014)
+                                        Select Operator [SEL_28] 
(rows=10000000 width=1014)
                                           Output:["_col0","_col1"]
-                                          Filter Operator [FIL_98] 
(rows=20000000 width=1014)
-                                            predicate:((((ca_state) IN ('KY', 
'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 
'WV')) and (ca_country = 'United States')) and ca_address_sk is not null)
+                                          Filter Operator [FIL_98] 
(rows=10000000 width=1014)
+                                            predicate:(((ca_state) IN ('KY', 
'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United 
States')) and ca_address_sk is not null)
                                             TableScan [TS_26] (rows=40000000 
width=1014)
                                               
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
                                     <-Reducer 5 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out 
b/ql/src/test/results/clientpositive/pointlookup2.q.out
index fb17e72..869e4cd 100644
--- a/ql/src/test/results/clientpositive/pointlookup2.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -985,21 +985,17 @@ STAGE PLANS:
             alias: t1
             Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
-            Filter Operator
-              isSamplingPred: false
-              predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean)
-              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string), ds (type: 
string)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  null sort order: 
-                  sort order: 
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
-                  tag: 0
-                  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string)
-                  auto parallelism: false
+            Select Operator
+              expressions: key (type: int), value (type: string), ds (type: 
string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                null sort order: 
+                sort order: 
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+                tag: 0
+                value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string)
+                auto parallelism: false
           TableScan
             alias: t2
             Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
@@ -1169,11 +1165,11 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column 
stats: NONE
           Filter Operator
             isSamplingPred: false
-            predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
-            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
+            predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), 
const struct('2000-04-09',2)) (type: boolean)
+            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -1201,7 +1197,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 
(type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), 
_col3 (type: string)
               auto parallelism: false
@@ -1235,13 +1231,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), 
KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), 
KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out 
b/ql/src/test/results/clientpositive/pointlookup3.q.out
index d5c4157..e98ba76 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -129,7 +129,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), 
const struct('2000-04-09',2)) (type: boolean)
+              predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
               Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds1 (type: 
string), ds2 (type: string)
@@ -374,14 +374,14 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (key = 1) (type: boolean)
+              predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
               Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string), ds1 (type: string)
-                outputColumnNames: _col1, _col2
+                expressions: key (type: int), value (type: string), ds1 (type: 
string)
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: 1 (type: int), _col1 (type: string), _col2 
(type: string), '2001-04-08' (type: string)
+                  key expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), '2001-04-08' (type: string)
                   null sort order: aaaa
                   sort order: ++++
                   Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE 
Column stats: NONE
@@ -441,7 +441,7 @@ STAGE PLANS:
       Needs Tagging: false
       Reduce Operator Tree:
         Select Operator
-          expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 
(type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
@@ -1149,21 +1149,17 @@ STAGE PLANS:
             alias: t1
             Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
-            Filter Operator
-              isSamplingPred: false
-              predicate: (ds1) IN ('2000-04-08', '2000-04-09') (type: boolean)
-              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string), ds1 (type: 
string), ds2 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  null sort order: 
-                  sort order: 
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
-                  tag: 0
-                  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
-                  auto parallelism: false
+            Select Operator
+              expressions: key (type: int), value (type: string), ds1 (type: 
string), ds2 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                null sort order: 
+                sort order: 
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+                tag: 0
+                value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
+                auto parallelism: false
           TableScan
             alias: t1
             Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE 
Column stats: NONE
@@ -1337,11 +1333,11 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7
-          Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column 
stats: NONE
           Filter Operator
             isSamplingPred: false
-            predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
-            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+            predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), 
const struct('2000-04-09',2)) (type: boolean)
+            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -1369,7 +1365,7 @@ STAGE PLANS:
               key expressions: _col4 (type: int), _col5 (type: string), _col2 
(type: string)
               null sort order: aaa
               sort order: +++
-              Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
               value expressions: _col0 (type: int), _col1 (type: string), 
_col3 (type: string), _col6 (type: string), _col7 (type: string)
               auto parallelism: false
@@ -1403,13 +1399,13 @@ STAGE PLANS:
         Select Operator
           expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), 
KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), 
KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 
(type: string), VALUE._col4 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7
-          Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out 
b/ql/src/test/results/clientpositive/pointlookup4.q.out
index 0a9bd3e..6236272 100644
--- a/ql/src/test/results/clientpositive/pointlookup4.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup4.q.out
@@ -384,7 +384,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (struct(ds1,key,ds2)) IN (const 
struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) 
(type: boolean)
+              predicate: (struct(key,ds1,ds2)) IN (const 
struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) 
(type: boolean)
               Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds1 (type: 
string), ds2 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out 
b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
index 2547405..0ab1365 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
@@ -523,7 +523,7 @@ STAGE PLANS:
                   alias: table1
                   Statistics: Num rows: 10 Data size: 200 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: ((((dimid = 100) = true) and (dimid) IN (100, 
200)) and (dimid = 100) is not null) (type: boolean)
+                    predicate: (((dimid) IN (100, 200) and ((dimid = 100) = 
true)) and (dimid = 100) is not null) (type: boolean)
                     Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: id (type: int), val (type: string), val1 
(type: string), dimid (type: int)
@@ -541,7 +541,7 @@ STAGE PLANS:
                   alias: table3
                   Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
-                    predicate: ((((id = 100) = true) and (id) IN (100, 200)) 
and (id = 100) is not null) (type: boolean)
+                    predicate: (((id) IN (100, 200) and ((id = 100) = true)) 
and (id = 100) is not null) (type: boolean)
                     Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
                     Select Operator
                       expressions: id (type: int), (id = 100) (type: boolean)

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out 
b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
index 5315f2c..3557a3b 100644
--- a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
+++ b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
@@ -1011,13 +1011,13 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: (((value = 'One') and (key) IN (2, 3)) and (ds = 
'2008-04-08')) (type: boolean)
+                  filterExpr: (((key) IN (2, 3) and (value = 'One')) and (ds = 
'2008-04-08')) (type: boolean)
                   buckets included: [2,3,] of 16
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((value = 'One') and (key) IN (2, 3)) and (ds 
= '2008-04-08')) (type: boolean)
+                    predicate: (((key) IN (2, 3) and (value = 'One')) and (ds 
= '2008-04-08')) (type: boolean)
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                     Select Operator
                       expressions: key (type: int), 'One' (type: string), 
'2008-04-08' (type: string)
@@ -1700,12 +1700,12 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: 
boolean)
+                  filterExpr: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: 
boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: 
boolean)
+                    predicate: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: 
boolean)
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                     Select Operator
                       expressions: key (type: int), 'One' (type: string), 
'2008-04-08' (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out 
b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
index 7a9932a..8fecbd7 100644
--- a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
@@ -317,7 +317,7 @@ Stage-0
             Select Operator [SEL_2] (rows=2 width=20)
               Output:["_col0","_col1","_col2","_col3"]
               Filter Operator [FIL_15] (rows=2 width=20)
-                predicate:((((dimid = 100) = true) and (dimid) IN (100, 200)) 
and (dimid = 100) is not null)
+                predicate:(((dimid) IN (100, 200) and ((dimid = 100) = true)) 
and (dimid = 100) is not null)
                 TableScan [TS_0] (rows=10 width=20)
                   
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"]
         <-Map 3 [SIMPLE_EDGE]
@@ -328,7 +328,7 @@ Stage-0
               Select Operator [SEL_5] (rows=1 width=3)
                 Output:["_col0","_col1"]
                 Filter Operator [FIL_17] (rows=1 width=3)
-                  predicate:((((id = 100) = true) and (id) IN (100, 200)) and 
(id = 100) is not null)
+                  predicate:(((id) IN (100, 200) and ((id = 100) = true)) and 
(id = 100) is not null)
                   TableScan [TS_3] (rows=5 width=3)
                     default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out 
b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
index 0d6ad69..5a2ab91 100644
--- a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
@@ -278,7 +278,7 @@ STAGE PLANS:
                   alias: store
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                   Filter Operator
-                    predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 
'VT') and s_store_sk is not null) (type: boolean)
+                    predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') 
and s_store_sk is not null) (type: boolean)
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int), s_state (type: 
string)

http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out 
b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
index 9fce991..a9e25e1 100644
--- a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
@@ -269,7 +269,7 @@ STAGE PLANS:
             alias: store
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') 
and s_store_sk is not null) (type: boolean)
+              predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and 
s_store_sk is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
                 expressions: s_store_sk (type: int), s_state (type: string)

[39/50] [abbrv] hive git commit: HIVE-11424 : Rule to transform OR clauses into IN clauses in CBO (Jesus Camacho Rodriguez via Ashutosh Chauhan)

Reply via email to