HIVE-11424 : Rule to transform OR clauses into IN clauses in CBO (Jesus Camacho Rodriguez via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c8ff3f1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c8ff3f1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c8ff3f1 Branch: refs/heads/llap Commit: 8c8ff3f144921e9b985abe51eb82ebad94195b4a Parents: 09b00fc Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Tue Mar 22 23:41:00 2016 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Tue Mar 29 11:18:58 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 7 + .../hadoop/hive/ql/optimizer/Optimizer.java | 4 +- .../rules/HivePointLookupOptimizerRule.java | 381 +++++++++++++++++++ .../ql/optimizer/pcr/PcrExprProcFactory.java | 103 ++--- .../hadoop/hive/ql/parse/CalcitePlanner.java | 40 +- .../clientpositive/auto_join19_inclause.q | 18 + .../queries/clientpositive/filter_in_or_dup.q | 19 + .../clientpositive/auto_join19_inclause.q.out | 130 +++++++ .../clientpositive/constprog_semijoin.q.out | 4 +- .../dynpart_sort_optimization_acid.q.out | 4 +- .../clientpositive/filter_in_or_dup.q.out | 96 +++++ .../results/clientpositive/perf/query13.q.out | 14 +- .../results/clientpositive/perf/query27.q.out | 2 +- .../results/clientpositive/perf/query34.q.out | 2 +- .../results/clientpositive/perf/query48.q.out | 14 +- .../results/clientpositive/perf/query68.q.out | 2 +- .../results/clientpositive/perf/query73.q.out | 2 +- .../results/clientpositive/perf/query79.q.out | 2 +- .../results/clientpositive/perf/query82.q.out | 2 +- .../results/clientpositive/perf/query85.q.out | 26 +- .../results/clientpositive/pointlookup2.q.out | 38 +- .../results/clientpositive/pointlookup3.q.out | 50 ++- .../results/clientpositive/pointlookup4.q.out | 2 +- .../spark/constprog_semijoin.q.out | 4 +- .../clientpositive/tez/bucketpruning1.q.out | 8 +- .../clientpositive/tez/constprog_semijoin.q.out | 4 +- .../tez/vector_mr_diff_schema_alias.q.out | 2 +- .../vector_mr_diff_schema_alias.q.out | 2 +- 28 files changed, 824 insertions(+), 158 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index b516925..56b96b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1398,6 +1398,13 @@ public final class FunctionRegistry { } /** + * Returns whether the exprNodeDesc is a node of "in". + */ + public static boolean isIn(ExprNodeDesc desc) { + return GenericUDFIn.class == getGenericUDFClassFromExprDesc(desc); + } + + /** * Returns whether the exprNodeDesc is a node of "not". */ public static boolean isOpNot(ExprNodeDesc desc) { http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index f56cd96..55c71dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; @@ -83,7 +82,8 @@ public class Optimizer { } // Try to transform OR predicates in Filter into simpler IN clauses first - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && + !pctx.getContext().isCboSucceeded()) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); transformations.add(new PointLookupOptimizer(min)); http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java new file mode 100644 index 0000000..9609a1e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java @@ -0,0 +1,381 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.ListMultimap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an OR operator whose children are constant equality expressions, it will try + * to generate an IN clause (which is more efficient). If the OR operator contains + * AND operator children, the optimization might generate an IN clause that uses + * structs. + */ +public class HivePointLookupOptimizerRule extends RelOptRule { + + protected static final Log LOG = LogFactory.getLog(HivePointLookupOptimizerRule.class); + + + // Minimum number of OR clauses needed to transform into IN clauses + private final int min; + + public HivePointLookupOptimizerRule(int min) { + super(operand(Filter.class, any())); + this.min = min; + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We try to transform possible candidates + RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, min); + RexNode newCondition = transformIntoInClause.apply(condition); + + // 2. We merge IN expressions + RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder); + newCondition = mergeInClause.apply(newCondition); + + // 3. If we could not transform anything, we bail out + if (newCondition.toString().equals(condition.toString())) { + return; + } + + // 4. We create the filter with the new condition + RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + + call.transformTo(newFilter); + } + + + /** + * Transforms OR clauses into IN clauses, when possible. + */ + protected static class RexTransformIntoInClause extends RexShuttle { + private final RexBuilder rexBuilder; + private final Filter filterOp; + private final int min; + + RexTransformIntoInClause(RexBuilder rexBuilder, Filter filterOp, int min) { + this.filterOp = filterOp; + this.rexBuilder = rexBuilder; + this.min = min; + } + + @Override public RexNode visitCall(RexCall call) { + RexNode node; + switch (call.getKind()) { + case AND: + ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) call).getOperands()); + List<RexNode> newOperands = new ArrayList<RexNode>(); + for (RexNode operand: operands) { + RexNode newOperand; + if (operand.getKind() == SqlKind.OR) { + try { + newOperand = transformIntoInClauseCondition(rexBuilder, + filterOp.getRowType(), operand, min); + if (newOperand == null) { + return call; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePointLookupOptimizerRule", e); + return call; + } + } else { + newOperand = operand; + } + newOperands.add(newOperand); + } + node = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + try { + node = transformIntoInClauseCondition(rexBuilder, + filterOp.getRowType(), call, min); + if (node == null) { + return call; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePointLookupOptimizerRule", e); + return call; + } + break; + default: + return super.visitCall(call); + } + return node; + } + + private static RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, RelDataType inputSchema, + RexNode condition, int min) throws SemanticException { + assert condition.getKind() == SqlKind.OR; + + // 1. We extract the information necessary to create the predicate for the new + // filter + ListMultimap<RexInputRef,RexLiteral> columnConstantsMap = ArrayListMultimap.create(); + ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + if (operands.size() < min) { + // We bail out + return null; + } + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF); + + for (RexNode conjunction: conjunctions) { + // 1.1. If it is not a RexCall, we bail out + if (!(conjunction instanceof RexCall)) { + return null; + } + // 1.2. We extract the information that we need + RexCall conjCall = (RexCall) conjunction; + if(conjCall.getOperator().getKind() == SqlKind.EQUALS) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(0); + RexLiteral literal = (RexLiteral) conjCall.operands.get(1); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(1); + RexLiteral literal = (RexLiteral) conjCall.operands.get(0); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else { + // Bail out + return null; + } + } else { + return null; + } + } + } + + // 3. We build the new predicate and return it + List<RexNode> newOperands = new ArrayList<RexNode>(operands.size()); + // 3.1 Create structs + List<RexInputRef> columns = new ArrayList<RexInputRef>(); + List<String> names = new ArrayList<String>(); + ImmutableList.Builder<RelDataType> paramsTypes = ImmutableList.builder(); + List<TypeInfo> structReturnType = new ArrayList<TypeInfo>(); + ImmutableList.Builder<RelDataType> newOperandsTypes = ImmutableList.builder(); + for (int i = 0; i < operands.size(); i++) { + List<RexLiteral> constantFields = new ArrayList<RexLiteral>(operands.size()); + + for (RexInputRef ref : columnConstantsMap.keySet()) { + // If any of the elements was not referenced by every operand, we bail out + if (columnConstantsMap.get(ref).size() <= i) { + return null; + } + RexLiteral columnConstant = columnConstantsMap.get(ref).get(i); + if (i == 0) { + columns.add(ref); + names.add(inputSchema.getFieldNames().get(ref.getIndex())); + paramsTypes.add(ref.getType()); + structReturnType.add(TypeConverter.convert(ref.getType())); + } + constantFields.add(columnConstant); + } + + if (i == 0) { + RexNode columnsRefs; + if (columns.size() == 1) { + columnsRefs = columns.get(0); + } else { + // Create STRUCT clause + columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns); + } + newOperands.add(columnsRefs); + newOperandsTypes.add(columnsRefs.getType()); + } + RexNode values; + if (constantFields.size() == 1) { + values = constantFields.get(0); + } else { + // Create STRUCT clause + values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, constantFields); + } + newOperands.add(values); + newOperandsTypes.add(values.getType()); + } + + // 4. Create and return IN clause + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + + } + + /** + * Merge IN clauses, when possible. + */ + protected static class RexMergeInClause extends RexShuttle { + private final RexBuilder rexBuilder; + + RexMergeInClause(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override public RexNode visitCall(RexCall call) { + RexNode node; + final List<RexNode> operands; + final List<RexNode> newOperands; + Map<String,RexNode> stringToExpr = Maps.newHashMap(); + Multimap<String,String> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + switch (call.getKind()) { + case AND: + // IN clauses need to be combined by keeping only common elements + operands = Lists.newArrayList(RexUtil.flattenAnd(((RexCall) call).getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + String ref = inCall.getOperands().get(0).toString(); + stringToExpr.put(ref, inCall.getOperands().get(0)); + if (inLHSExprToRHSExprs.containsKey(ref)) { + Set<String> expressions = Sets.newHashSet(); + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + expressions.add(expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + inLHSExprToRHSExprs.get(ref).retainAll(expressions); + } else { + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + inLHSExprToRHSExprs.put(ref, expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + } + operands.remove(i); + --i; + } + } + // Create IN clauses + newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + node = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + // IN clauses need to be combined by keeping all elements + operands = Lists.newArrayList(RexUtil.flattenOr(((RexCall) call).getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + String ref = inCall.getOperands().get(0).toString(); + stringToExpr.put(ref, inCall.getOperands().get(0)); + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + inLHSExprToRHSExprs.put(ref, expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + operands.remove(i); + --i; + } + } + // Create IN clauses + newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + node = RexUtil.composeDisjunction(rexBuilder, newOperands, false); + break; + default: + return super.visitCall(call); + } + return node; + } + + private static List<RexNode> createInClauses(RexBuilder rexBuilder, Map<String, RexNode> stringToExpr, + Multimap<String, String> inLHSExprToRHSExprs) { + List<RexNode> newExpressions = Lists.newArrayList(); + for (Entry<String,Collection<String>> entry : inLHSExprToRHSExprs.asMap().entrySet()) { + String ref = entry.getKey(); + Collection<String> exprs = entry.getValue(); + if (exprs.isEmpty()) { + newExpressions.add(rexBuilder.makeLiteral(false)); + } else { + List<RexNode> newOperands = new ArrayList<RexNode>(exprs.size() + 1); + newOperands.add(stringToExpr.get(ref)); + for (String expr : exprs) { + newOperands.add(stringToExpr.get(expr)); + } + newExpressions.add(rexBuilder.makeCall(HiveIn.INSTANCE, newOperands)); + } + } + return newExpressions; + } + + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 9cc9ea9..9911179 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -25,8 +25,6 @@ import java.util.List; import java.util.Map; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -49,13 +47,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Expression processor factory for partition condition removing. Each processor tries to @@ -368,50 +365,66 @@ public final class PcrExprProcFactory { return getResultWrapFromResults(results, fd, newNodeOutputs); } return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); - } else if (fd.getGenericUDF() instanceof GenericUDFIn) { - List<ExprNodeDesc> children = fd.getChildren(); - boolean removePredElem = false; - ExprNodeDesc lhs = children.get(0); - - if (lhs instanceof ExprNodeGenericFuncDesc) { - // Make sure that the generic udf is deterministic - if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) - .getGenericUDF())) { - boolean hasOnlyPartCols = true; - boolean hasDynamicListDesc = false; - - for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { - // Check if the current field expression contains only - // partition column or a virtual column or constants. - // If yes, this filter predicate is a candidate for this optimization. - if (!(ed instanceof ExprNodeColumnDesc && - ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { - hasOnlyPartCols = false; - break; - } - } + } else if (FunctionRegistry.isIn(fd)) { + List<ExprNodeDesc> children = fd.getChildren(); + boolean removePredElem = false; + ExprNodeDesc lhs = children.get(0); + + if (lhs instanceof ExprNodeColumnDesc) { + // It is an IN clause on a column + if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) { + // It is a partition column, we can proceed + removePredElem = true; + } + if (removePredElem) { + // We should not remove the dynamic partition pruner generated synthetic predicates. + for (int i = 1; i < children.size(); i++) { + if (children.get(i) instanceof ExprNodeDynamicListDesc) { + removePredElem = false; + break; + } + } + } + } else if (lhs instanceof ExprNodeGenericFuncDesc) { + // It is an IN clause on a struct + // Make sure that the generic udf is deterministic + if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) + .getGenericUDF())) { + boolean hasOnlyPartCols = true; + boolean hasDynamicListDesc = false; + + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { + hasOnlyPartCols = false; + break; + } + } - // If we have non-partition columns, we cannot remove the predicate. - if (hasOnlyPartCols) { - // We should not remove the dynamic partition pruner generated synthetic predicates. - for (int i = 1; i < children.size(); i++) { - if (children.get(i) instanceof ExprNodeDynamicListDesc) { - hasDynamicListDesc = true; - break; - } - } + // If we have non-partition columns, we cannot remove the predicate. + if (hasOnlyPartCols) { + // We should not remove the dynamic partition pruner generated synthetic predicates. + for (int i = 1; i < children.size(); i++) { + if (children.get(i) instanceof ExprNodeDynamicListDesc) { + hasDynamicListDesc = true; + break; } - - removePredElem = hasOnlyPartCols && !hasDynamicListDesc; } + } + + removePredElem = hasOnlyPartCols && !hasDynamicListDesc; } + } - // If removePredElem is set to true, return true as this is a potential candidate - // for partition condition remover. Else, set the WalkState for this node to unknown. - return removePredElem ? - new NodeInfoWrapper(WalkState.TRUE, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : - new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; + // If removePredElem is set to true, return true as this is a potential candidate + // for partition condition remover. Else, set the WalkState for this node to unknown. + return removePredElem ? + new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : + new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true return new NodeInfoWrapper(WalkState.UNKNOWN, null, http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index fd2246b..b59347d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTranspos import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; @@ -1138,23 +1139,32 @@ public class CalcitePlanner extends SemanticAnalyzer { // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding + List<RelOptRule> rules = Lists.newArrayList(); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) { + rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING); + } else { + rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC); + } + rules.add(HiveFilterSetOpTransposeRule.INSTANCE); + rules.add(HiveFilterSortTransposeRule.INSTANCE); + rules.add(HiveFilterJoinRule.JOIN); + rules.add(HiveFilterJoinRule.FILTER_ON_JOIN); + rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class)); + rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY)); + rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); + rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); + rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + final int min = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + rules.add(new HivePointLookupOptimizerRule(min)); + } + rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN); + rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); + rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); + rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, - conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING - : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, - HiveFilterSetOpTransposeRule.INSTANCE, - HiveFilterSortTransposeRule.INSTANCE, - HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, - new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), - new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY), - HiveReduceExpressionsRule.PROJECT_INSTANCE, - HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE, - HiveJoinAddNotNullRule.INSTANCE_JOIN, - HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); + rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/auto_join19_inclause.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_join19_inclause.q b/ql/src/test/queries/clientpositive/auto_join19_inclause.q new file mode 100644 index 0000000..7773289 --- /dev/null +++ b/ql/src/test/queries/clientpositive/auto_join19_inclause.q @@ -0,0 +1,18 @@ +set hive.mapred.mode=nonstrict; +set hive.auto.convert.join = true; +set hive.optimize.point.lookup.min=2; + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; + +explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); + + +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); + + +SELECT sum(hash(dest1.key,dest1.value)) FROM dest1; http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/filter_in_or_dup.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/filter_in_or_dup.q b/ql/src/test/queries/clientpositive/filter_in_or_dup.q new file mode 100644 index 0000000..34a5139 --- /dev/null +++ b/ql/src/test/queries/clientpositive/filter_in_or_dup.q @@ -0,0 +1,19 @@ +set hive.optimize.point.lookup.min=2; + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2'); + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3'); + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2'); http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/auto_join19_inclause.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out new file mode 100644 index 0000000..3f70055 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +407444119660 http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/constprog_semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/constprog_semijoin.q.out index 0e0e883..940a148 100644 --- a/ql/src/test/results/clientpositive/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/constprog_semijoin.q.out @@ -502,7 +502,7 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null) (type: boolean) + predicate: (((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null) (type: boolean) Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int) @@ -518,7 +518,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null) (type: boolean) + predicate: (((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), (id = 100) (type: boolean) http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index ddb05e2..eca29df 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -153,7 +153,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 @@ -390,7 +390,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/filter_in_or_dup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out new file mode 100644 index 0000000..f863ac3 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query13.q.out b/ql/src/test/results/clientpositive/perf/query13.q.out index cc40e79..ad50576 100644 --- a/ql/src/test/results/clientpositive/perf/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/query13.q.out @@ -128,7 +128,7 @@ Stage-0 SHUFFLE [RS_39] Group By Operator [GBY_38] (rows=1 width=112) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col5)","avg(_col7)","avg(_col8)","sum(_col8)"] - Merge Join Operator [MERGEJOIN_73] (rows=18150000 width=1014) + Merge Join Operator [MERGEJOIN_73] (rows=9075000 width=1014) Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col5","_col7","_col8"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_35] @@ -142,19 +142,19 @@ Stage-0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Select Operator [SEL_30] (rows=16500000 width=1014) + Select Operator [SEL_30] (rows=8250000 width=1014) Output:["_col0","_col5","_col7","_col8"] - Filter Operator [FIL_29] (rows=16500000 width=1014) + Filter Operator [FIL_29] (rows=8250000 width=1014) predicate:(((_col17) IN ('KY', 'GA', 'NM') and _col9 BETWEEN 100 AND 200) or ((_col17) IN ('MT', 'OR', 'IN') and _col9 BETWEEN 150 AND 300) or ((_col17) IN ('WI', 'MO', 'WV') and _col9 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_72] (rows=22000000 width=1014) + Merge Join Operator [MERGEJOIN_72] (rows=11000000 width=1014) Conds:RS_26._col3=RS_27._col0(Inner),Output:["_col0","_col5","_col7","_col8","_col9","_col17"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 - Select Operator [SEL_25] (rows=20000000 width=1014) + Select Operator [SEL_25] (rows=10000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_67] (rows=20000000 width=1014) - predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) + Filter Operator [FIL_67] (rows=10000000 width=1014) + predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null) TableScan [TS_23] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 4 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query27.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query27.q.out b/ql/src/test/results/clientpositive/perf/query27.q.out index 635c402..3a32d7b 100644 --- a/ql/src/test/results/clientpositive/perf/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/query27.q.out @@ -57,7 +57,7 @@ Stage-0 Select Operator [SEL_11] (rows=852 width=1910) Output:["_col0","_col1"] Filter Operator [FIL_53] (rows=852 width=1910) - predicate:((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) + predicate:((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) TableScan [TS_9] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query34.q.out b/ql/src/test/results/clientpositive/perf/query34.q.out index 6fa6985..a08c3ff 100644 --- a/ql/src/test/results/clientpositive/perf/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/query34.q.out @@ -94,7 +94,7 @@ Stage-0 Select Operator [SEL_5] (rows=36524 width=1119) Output:["_col0"] Filter Operator [FIL_53] (rows=36524 width=1119) - predicate:(((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) + predicate:(((d_year) IN (1998, 1999, 2000) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28)) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query48.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query48.q.out b/ql/src/test/results/clientpositive/perf/query48.q.out index 691f5ad..d536bb5 100644 --- a/ql/src/test/results/clientpositive/perf/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/query48.q.out @@ -23,7 +23,7 @@ Stage-0 SHUFFLE [RS_31] Group By Operator [GBY_30] (rows=1 width=8) Output:["_col0"],aggregations:["sum(_col4)"] - Merge Join Operator [MERGEJOIN_57] (rows=18150000 width=1014) + Merge Join Operator [MERGEJOIN_57] (rows=9075000 width=1014) Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col4"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_27] @@ -37,19 +37,19 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_22] (rows=16500000 width=1014) + Select Operator [SEL_22] (rows=8250000 width=1014) Output:["_col0","_col4"] - Filter Operator [FIL_21] (rows=16500000 width=1014) + Filter Operator [FIL_21] (rows=8250000 width=1014) predicate:(((_col12) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 0 AND 2000) or ((_col12) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 3000) or ((_col12) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_56] (rows=22000000 width=1014) + Merge Join Operator [MERGEJOIN_56] (rows=11000000 width=1014) Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col0","_col4","_col6","_col12"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=20000000 width=1014) + Select Operator [SEL_11] (rows=10000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_52] (rows=20000000 width=1014) - predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) + Filter Operator [FIL_52] (rows=10000000 width=1014) + predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null) TableScan [TS_9] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 3 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query68.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query68.q.out b/ql/src/test/results/clientpositive/perf/query68.q.out index 7828cfc..38e4644 100644 --- a/ql/src/test/results/clientpositive/perf/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/query68.q.out @@ -128,7 +128,7 @@ Stage-0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] Filter Operator [FIL_79] (rows=18262 width=1119) - predicate:((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) + predicate:(((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query73.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query73.q.out b/ql/src/test/results/clientpositive/perf/query73.q.out index e367f51..cf3a75e 100644 --- a/ql/src/test/results/clientpositive/perf/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/query73.q.out @@ -94,7 +94,7 @@ Stage-0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] Filter Operator [FIL_53] (rows=18262 width=1119) - predicate:((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) + predicate:(((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query79.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query79.q.out b/ql/src/test/results/clientpositive/perf/query79.q.out index fdc5773..bf537b9 100644 --- a/ql/src/test/results/clientpositive/perf/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/query79.q.out @@ -96,7 +96,7 @@ Stage-0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] Filter Operator [FIL_53] (rows=18262 width=1119) - predicate:(((d_dow = 1) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) + predicate:(((d_year) IN (1998, 1999, 2000) and (d_dow = 1)) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query82.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out b/ql/src/test/results/clientpositive/perf/query82.q.out index 2461644..57a50c7 100644 --- a/ql/src/test/results/clientpositive/perf/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/query82.q.out @@ -51,7 +51,7 @@ Stage-0 Select Operator [SEL_2] (rows=115500 width=1436) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_38] (rows=115500 width=1436) - predicate:((i_current_price BETWEEN 30 AND 60 and (i_manufact_id) IN (437, 129, 727, 663)) and i_item_sk is not null) + predicate:(((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60) and i_item_sk is not null) TableScan [TS_0] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] <-Map 6 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query85.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out b/ql/src/test/results/clientpositive/perf/query85.q.out index 72ac500..93b5f4e 100644 --- a/ql/src/test/results/clientpositive/perf/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/query85.q.out @@ -23,22 +23,22 @@ Stage-0 File Output Operator [FS_57] Limit [LIM_56] (rows=100 width=1014) Number of rows:100 - Select Operator [SEL_55] (rows=9982500 width=1014) + Select Operator [SEL_55] (rows=4991250 width=1014) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_54] - Select Operator [SEL_53] (rows=9982500 width=1014) + Select Operator [SEL_53] (rows=4991250 width=1014) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_52] (rows=9982500 width=1014) + Group By Operator [GBY_52] (rows=4991250 width=1014) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 - Group By Operator [GBY_50] (rows=19965000 width=1014) + Group By Operator [GBY_50] (rows=9982500 width=1014) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col4)","avg(_col14)","avg(_col13)"],keys:_col28 - Select Operator [SEL_49] (rows=19965000 width=1014) + Select Operator [SEL_49] (rows=9982500 width=1014) Output:["_col28","_col4","_col14","_col13"] - Merge Join Operator [MERGEJOIN_107] (rows=19965000 width=1014) + Merge Join Operator [MERGEJOIN_107] (rows=9982500 width=1014) Conds:RS_46._col11=RS_47._col0(Inner),Output:["_col4","_col13","_col14","_col28"] <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_47] @@ -52,7 +52,7 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col11 - Merge Join Operator [MERGEJOIN_106] (rows=18150000 width=1014) + Merge Join Operator [MERGEJOIN_106] (rows=9075000 width=1014) Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col4","_col11","_col13","_col14"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_44] @@ -66,19 +66,19 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Select Operator [SEL_36] (rows=16500000 width=1014) + Select Operator [SEL_36] (rows=8250000 width=1014) Output:["_col0","_col11","_col13","_col14","_col4"] - Filter Operator [FIL_35] (rows=16500000 width=1014) + Filter Operator [FIL_35] (rows=8250000 width=1014) predicate:(((_col23) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 100 AND 200) or ((_col23) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 300) or ((_col23) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_105] (rows=22000000 width=1014) + Merge Join Operator [MERGEJOIN_105] (rows=11000000 width=1014) Conds:RS_32._col9=RS_33._col0(Inner),Output:["_col0","_col4","_col6","_col11","_col13","_col14","_col23"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_28] (rows=20000000 width=1014) + Select Operator [SEL_28] (rows=10000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) + Filter Operator [FIL_98] (rows=10000000 width=1014) + predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null) TableScan [TS_26] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 5 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index fb17e72..869e4cd 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -985,21 +985,17 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + auto parallelism: false TableScan alias: t2 Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE @@ -1169,11 +1165,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1201,7 +1197,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) auto parallelism: false @@ -1235,13 +1231,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index d5c4157..e98ba76 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -129,7 +129,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) @@ -374,14 +374,14 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 1) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), ds1 (type: string) - outputColumnNames: _col1, _col2 + expressions: key (type: int), value (type: string), ds1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) null sort order: aaaa sort order: ++++ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE @@ -441,7 +441,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1149,21 +1149,17 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds1) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE @@ -1337,11 +1333,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1369,7 +1365,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string) auto parallelism: false @@ -1403,13 +1399,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out index 0a9bd3e..6236272 100644 --- a/ql/src/test/results/clientpositive/pointlookup4.q.out +++ b/ql/src/test/results/clientpositive/pointlookup4.q.out @@ -384,7 +384,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) (type: boolean) + predicate: (struct(key,ds1,ds2)) IN (const struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out index 2547405..0ab1365 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out @@ -523,7 +523,7 @@ STAGE PLANS: alias: table1 Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null) (type: boolean) + predicate: (((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null) (type: boolean) Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int) @@ -541,7 +541,7 @@ STAGE PLANS: alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null) (type: boolean) + predicate: (((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), (id = 100) (type: boolean) http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out index 5315f2c..3557a3b 100644 --- a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out +++ b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out @@ -1011,13 +1011,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean) + filterExpr: (((key) IN (2, 3) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) buckets included: [2,3,] of 16 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean) + predicate: (((key) IN (2, 3) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) @@ -1700,12 +1700,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean) + filterExpr: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean) + predicate: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out index 7a9932a..8fecbd7 100644 --- a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out @@ -317,7 +317,7 @@ Stage-0 Select Operator [SEL_2] (rows=2 width=20) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_15] (rows=2 width=20) - predicate:((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null) + predicate:(((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null) TableScan [TS_0] (rows=10 width=20) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] @@ -328,7 +328,7 @@ Stage-0 Select Operator [SEL_5] (rows=1 width=3) Output:["_col0","_col1"] Filter Operator [FIL_17] (rows=1 width=3) - predicate:((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null) + predicate:(((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null) TableScan [TS_3] (rows=5 width=3) default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out index 0d6ad69..5a2ab91 100644 --- a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out @@ -278,7 +278,7 @@ STAGE PLANS: alias: store Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) (type: boolean) + predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_state (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out index 9fce991..a9e25e1 100644 --- a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out +++ b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out @@ -269,7 +269,7 @@ STAGE PLANS: alias: store Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) (type: boolean) + predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_state (type: string)