http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 82d9600..79a627c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -19,10 +19,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; import java.util.EnumSet; +import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; @@ -30,7 +30,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; @@ -40,6 +39,7 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import com.google.common.collect.ImmutableList; @@ -47,31 +47,24 @@ import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; - public class HivePreFilteringRule extends RelOptRule { - protected static final Logger LOG = LoggerFactory - .getLogger(HivePreFilteringRule.class.getName()); - - - public static final HivePreFilteringRule INSTANCE = - new HivePreFilteringRule(); + protected static final Logger LOG = LoggerFactory + .getLogger(HivePreFilteringRule.class + .getName()); - private final FilterFactory filterFactory; + public static final HivePreFilteringRule INSTANCE = new HivePreFilteringRule(); + private final FilterFactory filterFactory; - private static final Set<SqlKind> COMPARISON = EnumSet.of( - SqlKind.EQUALS, - SqlKind.GREATER_THAN_OR_EQUAL, - SqlKind.LESS_THAN_OR_EQUAL, - SqlKind.GREATER_THAN, - SqlKind.LESS_THAN, - SqlKind.NOT_EQUALS); - + private static final Set<SqlKind> COMPARISON = EnumSet.of(SqlKind.EQUALS, + SqlKind.GREATER_THAN_OR_EQUAL, + SqlKind.LESS_THAN_OR_EQUAL, + SqlKind.GREATER_THAN, SqlKind.LESS_THAN, + SqlKind.NOT_EQUALS); private HivePreFilteringRule() { - super(operand(Filter.class, - operand(RelNode.class, any()))); + super(operand(Filter.class, operand(RelNode.class, any()))); this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY; } @@ -86,8 +79,7 @@ public class HivePreFilteringRule extends RelOptRule { return false; } - HiveRulesRegistry registry = call.getPlanner(). - getContext().unwrap(HiveRulesRegistry.class); + HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); // If this operator has been visited already by the rule, // we do not need to apply the optimization @@ -103,69 +95,95 @@ public class HivePreFilteringRule extends RelOptRule { final Filter filter = call.rel(0); // 0. Register that we have visited this operator in this rule - HiveRulesRegistry registry = call.getPlanner(). - getContext().unwrap(HiveRulesRegistry.class); + HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); if (registry != null) { registry.registerVisited(this, filter); } final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); - final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + // 1. Recompose filter possibly by pulling out common elements from DNF + // expressions + RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 2. We extract possible candidates to be pushed down + List<RexNode> operandsToPushDown = new ArrayList<>(); + List<RexNode> deterministicExprs = new ArrayList<>(); + List<RexNode> nonDeterministicExprs = new ArrayList<>(); + + switch (topFilterCondition.getKind()) { + case AND: + ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) topFilterCondition) + .getOperands()); + Set<String> operandsToPushDownDigest = new HashSet<String>(); + List<RexNode> extractedCommonOperands = null; + + for (RexNode operand : operands) { + if (operand.getKind() == SqlKind.OR) { + extractedCommonOperands = extractCommonOperands(rexBuilder, operand); + for (RexNode extractedExpr : extractedCommonOperands) { + if (operandsToPushDownDigest.add(extractedExpr.toString())) { + operandsToPushDown.add(extractedExpr); + } + } + } - // 1. We extract possible candidates to be pushed down - List<RexNode> commonOperands = new ArrayList<>(); - switch (condition.getKind()) { - case AND: - ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); - for (RexNode operand: operands) { - if (operand.getKind() == SqlKind.OR) { - commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + // TODO: Make expr traversal recursive. Extend to traverse inside + // elements of DNF/CNF & extract more deterministic pieces out. + if (HiveCalciteUtil.isDeterministic(operand)) { + deterministicExprs.add(operand); + } else { + nonDeterministicExprs.add(operand); + } + } + + // Pull out Deterministic exprs from non-deterministic and push down + // deterministic expressions as a separate filter + // NOTE: Hive by convention doesn't pushdown non deterministic expressions + if (nonDeterministicExprs.size() > 0) { + for (RexNode expr : deterministicExprs) { + if (!operandsToPushDownDigest.contains(expr.toString())) { + operandsToPushDown.add(expr); + operandsToPushDownDigest.add(expr.toString()); } } - break; - case OR: - commonOperands = extractCommonOperands(rexBuilder,condition); - break; - default: - return; + + topFilterCondition = RexUtil.pullFactors(rexBuilder, + RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false)); + } + + break; + + case OR: + operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition); + break; + default: + return; } // 2. If we did not generate anything for the new predicate, we bail out - if (commonOperands.isEmpty()) { + if (operandsToPushDown.isEmpty()) { return; } // 3. If the new conjuncts are already present in the plan, we bail out - final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput()); - final List<RexNode> newConjuncts = new ArrayList<>(); - for (RexNode commonOperand : commonOperands) { - boolean found = false; - for (RexNode conjunct : predicates.pulledUpPredicates) { - if (commonOperand.toString().equals(conjunct.toString())) { - found = true; - break; - } - } - if (!found) { - newConjuncts.add(commonOperand); - } - } + final List<RexNode> newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(), + operandsToPushDown); if (newConjuncts.isEmpty()) { return; } // 4. Otherwise, we create a new condition - final RexNode newCondition = RexUtil.pullFactors(rexBuilder, - RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); + final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, + RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); // 5. We create the new filter that might be pushed down - RelNode newFilter = filterFactory.createFilter(filter.getInput(), newCondition); - RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition); + RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition); // 6. We register both so we do not fire the rule on them again if (registry != null) { - registry.registerVisited(this, newFilter); + registry.registerVisited(this, newChildFilter); registry.registerVisited(this, newTopFilter); } @@ -175,13 +193,15 @@ public class HivePreFilteringRule extends RelOptRule { private static List<RexNode> extractCommonOperands(RexBuilder rexBuilder, RexNode condition) { assert condition.getKind() == SqlKind.OR; - Multimap<String,RexNode> reductionCondition = LinkedHashMultimap.create(); + Multimap<String, RexNode> reductionCondition = LinkedHashMultimap.create(); - // Data structure to control whether a certain reference is present in every operand + // Data structure to control whether a certain reference is present in every + // operand Set<String> refsInAllOperands = null; - // 1. We extract the information necessary to create the predicate for the new - // filter; currently we support comparison functions, in and between + // 1. We extract the information necessary to create the predicate for the + // new + // filter; currently we support comparison functions, in and between ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); for (int i = 0; i < operands.size(); i++) { final RexNode operand = operands.get(i); @@ -190,27 +210,27 @@ public class HivePreFilteringRule extends RelOptRule { final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF); Set<String> refsInCurrentOperand = Sets.newHashSet(); - for (RexNode conjunction: conjunctions) { + for (RexNode conjunction : conjunctions) { // We do not know what it is, we bail out for safety - if (!(conjunction instanceof RexCall)) { + if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) { return new ArrayList<>(); } RexCall conjCall = (RexCall) conjunction; RexNode ref = null; - if(COMPARISON.contains(conjCall.getOperator().getKind())) { - if (conjCall.operands.get(0) instanceof RexInputRef && - conjCall.operands.get(1) instanceof RexLiteral) { + if (COMPARISON.contains(conjCall.getOperator().getKind())) { + if (conjCall.operands.get(0) instanceof RexInputRef + && conjCall.operands.get(1) instanceof RexLiteral) { ref = conjCall.operands.get(0); - } else if (conjCall.operands.get(1) instanceof RexInputRef && - conjCall.operands.get(0) instanceof RexLiteral) { + } else if (conjCall.operands.get(1) instanceof RexInputRef + && conjCall.operands.get(0) instanceof RexLiteral) { ref = conjCall.operands.get(1); } else { // We do not know what it is, we bail out for safety return new ArrayList<>(); } - } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) { + } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) { ref = conjCall.operands.get(0); - } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { + } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { ref = conjCall.operands.get(1); } else { // We do not know what it is, we bail out for safety @@ -228,7 +248,8 @@ public class HivePreFilteringRule extends RelOptRule { } else { refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand); } - // If we did not add any factor or there are no common factors, we can bail out + // If we did not add any factor or there are no common factors, we can + // bail out if (refsInAllOperands.isEmpty()) { return new ArrayList<>(); } @@ -237,7 +258,8 @@ public class HivePreFilteringRule extends RelOptRule { // 2. We gather the common factors and return them List<RexNode> commonOperands = new ArrayList<>(); for (String ref : refsInAllOperands) { - commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false)); + commonOperands + .add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false)); } return commonOperands; }
http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index b52779c..c04060f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -17,6 +17,10 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; import org.apache.calcite.rel.RelNode; @@ -31,8 +35,10 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; public class FilterSelectivityEstimator extends RexVisitorImpl<Double> { private final RelNode childRel; @@ -81,6 +87,21 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> { break; } + case IS_NOT_NULL: { + if (childRel instanceof HiveTableScan) { + double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel); + double totalNoOfTuples = childRel.getRows(); + if (totalNoOfTuples >= noOfNulls) { + selectivity = (totalNoOfTuples - noOfNulls) / Math.max(totalNoOfTuples, 1); + } else { + throw new RuntimeException("Invalid Stats number of null > no of tuples"); + } + } else { + selectivity = computeNotEqualitySelectivity(call); + } + break; + } + case LESS_THAN_OR_EQUAL: case GREATER_THAN_OR_EQUAL: case LESS_THAN: @@ -199,6 +220,33 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> { return selectivity; } + /** + * Given a RexCall & TableScan find max no of nulls. Currently it picks the + * col with max no of nulls. + * + * TODO: improve this + * + * @param call + * @param t + * @return + */ + private long getMaxNulls(RexCall call, HiveTableScan t) { + long tmpNoNulls = 0; + long maxNoNulls = 0; + + Set<Integer> iRefSet = HiveCalciteUtil.getInputRefs(call); + List<ColStatistics> colStats = t.getColStat(new ArrayList<Integer>(iRefSet)); + + for (ColStatistics cs : colStats) { + tmpNoNulls = cs.getNumNulls(); + if (tmpNoNulls > maxNoNulls) { + maxNoNulls = tmpNoNulls; + } + } + + return maxNoNulls; + } + private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java new file mode 100644 index 0000000..b7244fd --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -0,0 +1,645 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.linq4j.function.Predicate1; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdPredicates; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPermuteInputsShuttle; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.BitSets; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.MappingType; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; + +import com.google.common.base.Function; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; + + +//TODO: Move this to calcite +public class HiveRelMdPredicates extends RelMdPredicates { + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.PREDICATES.method, + new HiveRelMdPredicates()); + + private static final List<RexNode> EMPTY_LIST = ImmutableList.of(); + + /** + * Infers predicates for a project. + * + * <ol> + * <li>create a mapping from input to projection. Map only positions that + * directly reference an input column. + * <li>Expressions that only contain above columns are retained in the + * Project's pullExpressions list. + * <li>For e.g. expression 'a + e = 9' below will not be pulled up because 'e' + * is not in the projection list. + * + * <pre> + * childPullUpExprs: {a > 7, b + c < 10, a + e = 9} + * projectionExprs: {a, b, c, e / 2} + * projectionPullupExprs: {a > 7, b + c < 10} + * </pre> + * + * </ol> + */ + @Override + public RelOptPredicateList getPredicates(Project project) { + RelNode child = project.getInput(); + final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + RelOptPredicateList childInfo = RelMetadataQuery.getPulledUpPredicates(child); + + List<RexNode> projectPullUpPredicates = new ArrayList<RexNode>(); + HashMultimap<Integer, Integer> inpIndxToOutIndxMap = HashMultimap.create(); + + ImmutableBitSet.Builder columnsMappedBuilder = ImmutableBitSet.builder(); + Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, child.getRowType().getFieldCount(), + project.getRowType().getFieldCount()); + + for (Ord<RexNode> o : Ord.zip(project.getProjects())) { + if (o.e instanceof RexInputRef) { + int sIdx = ((RexInputRef) o.e).getIndex(); + m.set(sIdx, o.i); + inpIndxToOutIndxMap.put(sIdx, o.i); + columnsMappedBuilder.set(sIdx); + } + } + + // Go over childPullUpPredicates. If a predicate only contains columns in + // 'columnsMapped' construct a new predicate based on mapping. + final ImmutableBitSet columnsMapped = columnsMappedBuilder.build(); + for (RexNode r : childInfo.pulledUpPredicates) { + ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(r); + if (columnsMapped.contains(rCols)) { + r = r.accept(new RexPermuteInputsShuttle(m, child)); + projectPullUpPredicates.add(r); + } + } + + // Project can also generate constants. We need to include them. + for (Ord<RexNode> expr : Ord.zip(project.getProjects())) { + if (RexLiteral.isNullLiteral(expr.e)) { + projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, + rexBuilder.makeInputRef(project, expr.i))); + } else if (expr.e instanceof RexLiteral) { + final RexLiteral literal = (RexLiteral) expr.e; + projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + rexBuilder.makeInputRef(project, expr.i), literal)); + } else if (expr.e instanceof RexCall && HiveCalciteUtil.isDeterministicFuncOnLiterals(expr.e)) { + //TODO: Move this to calcite + projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + rexBuilder.makeInputRef(project, expr.i), expr.e)); + } + } + return RelOptPredicateList.of(projectPullUpPredicates); + } + + /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ + @Override + public RelOptPredicateList getPredicates(Join join) { + RexBuilder rB = join.getCluster().getRexBuilder(); + RelNode left = join.getInput(0); + RelNode right = join.getInput(1); + + RelOptPredicateList leftInfo = RelMetadataQuery.getPulledUpPredicates(left); + RelOptPredicateList rightInfo = RelMetadataQuery.getPulledUpPredicates(right); + + HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join, + RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false), + RexUtil.composeConjunction(rB, rightInfo.pulledUpPredicates, false)); + + return jI.inferPredicates(false); + } + + /** + * Utility to infer predicates from one side of the join that apply on the + * other side. + * + * <p>Contract is:<ul> + * + * <li>initialize with a {@link org.apache.calcite.rel.core.Join} and + * optional predicates applicable on its left and right subtrees. + * + * <li>you can + * then ask it for equivalentPredicate(s) given a predicate. + * + * </ul> + * + * <p>So for: + * <ol> + * <li>'<code>R1(x) join R2(y) on x = y</code>' a call for + * equivalentPredicates on '<code>x > 7</code>' will return ' + * <code>[y > 7]</code>' + * <li>'<code>R1(x) join R2(y) on x = y join R3(z) on y = z</code>' a call for + * equivalentPredicates on the second join '<code>x > 7</code>' will return ' + * <code>[y > 7, z > 7]</code>' + * </ol> + */ + static class HiveJoinConditionBasedPredicateInference { + final Join joinRel; + final boolean isSemiJoin; + final int nSysFields; + final int nFieldsLeft; + final int nFieldsRight; + final ImmutableBitSet leftFieldsBitSet; + final ImmutableBitSet rightFieldsBitSet; + final ImmutableBitSet allFieldsBitSet; + SortedMap<Integer, BitSet> equivalence; + final Map<String, ImmutableBitSet> exprFields; + final Set<String> allExprsDigests; + final Set<String> equalityPredicates; + final RexNode leftChildPredicates; + final RexNode rightChildPredicates; + + public HiveJoinConditionBasedPredicateInference(Join joinRel, + RexNode lPreds, RexNode rPreds) { + this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds); + } + + private HiveJoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin, + RexNode lPreds, RexNode rPreds) { + super(); + this.joinRel = joinRel; + this.isSemiJoin = isSemiJoin; + nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size(); + nFieldsRight = joinRel.getRight().getRowType().getFieldList().size(); + nSysFields = joinRel.getSystemFieldList().size(); + leftFieldsBitSet = ImmutableBitSet.range(nSysFields, + nSysFields + nFieldsLeft); + rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, + nSysFields + nFieldsLeft + nFieldsRight); + allFieldsBitSet = ImmutableBitSet.range(0, + nSysFields + nFieldsLeft + nFieldsRight); + + exprFields = Maps.newHashMap(); + allExprsDigests = new HashSet<String>(); + + if (lPreds == null) { + leftChildPredicates = null; + } else { + Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft, nSysFields, 0, nFieldsLeft); + leftChildPredicates = lPreds.accept( + new RexPermuteInputsShuttle(leftMapping, joinRel.getInput(0))); + + for (RexNode r : RelOptUtil.conjunctions(leftChildPredicates)) { + exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); + allExprsDigests.add(r.toString()); + } + } + if (rPreds == null) { + rightChildPredicates = null; + } else { + Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft + nFieldsRight, + nSysFields + nFieldsLeft, 0, nFieldsRight); + rightChildPredicates = rPreds.accept( + new RexPermuteInputsShuttle(rightMapping, joinRel.getInput(1))); + + for (RexNode r : RelOptUtil.conjunctions(rightChildPredicates)) { + exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); + allExprsDigests.add(r.toString()); + } + } + + equivalence = Maps.newTreeMap(); + equalityPredicates = new HashSet<String>(); + for (int i = 0; i < nSysFields + nFieldsLeft + nFieldsRight; i++) { + equivalence.put(i, BitSets.of(i)); + } + + // Only process equivalences found in the join conditions. Processing + // Equivalences from the left or right side infer predicates that are + // already present in the Tree below the join. + RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); + List<RexNode> exprs = + RelOptUtil.conjunctions( + compose(rexBuilder, ImmutableList.of(joinRel.getCondition()))); + + final EquivalenceFinder eF = new EquivalenceFinder(); + new ArrayList<Void>(Lists.transform(exprs, new Function<RexNode, Void>() { + public Void apply(RexNode input) { + return input.accept(eF); + } + })); + + equivalence = BitSets.closure(equivalence); + } + + /** + * The PullUp Strategy is sound but not complete. + * <ol> + * <li>We only pullUp inferred predicates for now. Pulling up existing + * predicates causes an explosion of duplicates. The existing predicates are + * pushed back down as new predicates. Once we have rules to eliminate + * duplicate Filter conditions, we should pullUp all predicates. + * <li>For Left Outer: we infer new predicates from the left and set them as + * applicable on the Right side. No predicates are pulledUp. + * <li>Right Outer Joins are handled in an analogous manner. + * <li>For Full Outer Joins no predicates are pulledUp or inferred. + * </ol> + */ + public RelOptPredicateList inferPredicates( + boolean includeEqualityInference) { + List<RexNode> inferredPredicates = new ArrayList<RexNode>(); + Set<String> allExprsDigests = new HashSet<String>(this.allExprsDigests); + final JoinRelType joinType = joinRel.getJoinType(); + switch (joinType) { + case INNER: + case LEFT: + infer(leftChildPredicates, allExprsDigests, inferredPredicates, + includeEqualityInference, + joinType == JoinRelType.LEFT ? rightFieldsBitSet + : allFieldsBitSet); + break; + } + switch (joinType) { + case INNER: + case RIGHT: + infer(rightChildPredicates, allExprsDigests, inferredPredicates, + includeEqualityInference, + joinType == JoinRelType.RIGHT ? leftFieldsBitSet + : allFieldsBitSet); + break; + } + + Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft + nFieldsRight, + 0, nSysFields + nFieldsLeft, nFieldsRight); + final HiveJoinRexPermuteInputsShuttle rightPermute = + new HiveJoinRexPermuteInputsShuttle(rightMapping, joinRel); + Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft, 0, nSysFields, nFieldsLeft); + final HiveJoinRexPermuteInputsShuttle leftPermute = + new HiveJoinRexPermuteInputsShuttle(leftMapping, joinRel); + + List<RexNode> leftInferredPredicates = new ArrayList<RexNode>(); + List<RexNode> rightInferredPredicates = new ArrayList<RexNode>(); + + for (RexNode iP : inferredPredicates) { + ImmutableBitSet iPBitSet = RelOptUtil.InputFinder.bits(iP); + if (leftFieldsBitSet.contains(iPBitSet)) { + leftInferredPredicates.add(iP.accept(leftPermute)); + } else if (rightFieldsBitSet.contains(iPBitSet)) { + rightInferredPredicates.add(iP.accept(rightPermute)); + } + } + + switch (joinType) { + case INNER: + Iterable<RexNode> pulledUpPredicates; + if (isSemiJoin) { + pulledUpPredicates = Iterables.concat( + RelOptUtil.conjunctions(leftChildPredicates), + leftInferredPredicates); + } else { + pulledUpPredicates = Iterables.concat( + RelOptUtil.conjunctions(leftChildPredicates), + RelOptUtil.conjunctions(rightChildPredicates), + RelOptUtil.conjunctions(joinRel.getCondition()), + inferredPredicates); + } + return RelOptPredicateList.of(pulledUpPredicates, + leftInferredPredicates, rightInferredPredicates); + case LEFT: + return RelOptPredicateList.of( + RelOptUtil.conjunctions(leftChildPredicates), + leftInferredPredicates, rightInferredPredicates); + case RIGHT: + return RelOptPredicateList.of( + RelOptUtil.conjunctions(rightChildPredicates), + inferredPredicates, EMPTY_LIST); + default: + assert inferredPredicates.size() == 0; + return RelOptPredicateList.EMPTY; + } + } + + public RexNode left() { + return leftChildPredicates; + } + + public RexNode right() { + return rightChildPredicates; + } + + private void infer(RexNode predicates, Set<String> allExprsDigests, + List<RexNode> inferedPredicates, boolean includeEqualityInference, + ImmutableBitSet inferringFields) { + for (RexNode r : RelOptUtil.conjunctions(predicates)) { + if (!includeEqualityInference + && equalityPredicates.contains(r.toString())) { + continue; + } + for (Mapping m : mappings(r)) { + RexNode tr = r.accept( + new RexPermuteInputsShuttle(m, joinRel.getInput(0), + joinRel.getInput(1))); + if (inferringFields.contains(RelOptUtil.InputFinder.bits(tr)) + && !allExprsDigests.contains(tr.toString()) + && !isAlwaysTrue(tr)) { + inferedPredicates.add(tr); + allExprsDigests.add(tr.toString()); + } + } + } + } + + Iterable<Mapping> mappings(final RexNode predicate) { + return new Iterable<Mapping>() { + public Iterator<Mapping> iterator() { + ImmutableBitSet fields = exprFields.get(predicate.toString()); + if (fields.cardinality() == 0) { + return Iterators.emptyIterator(); + } + return new ExprsItr(fields); + } + }; + } + + private void equivalent(int p1, int p2) { + BitSet b = equivalence.get(p1); + b.set(p2); + + b = equivalence.get(p2); + b.set(p1); + } + + RexNode compose(RexBuilder rexBuilder, Iterable<RexNode> exprs) { + exprs = Linq4j.asEnumerable(exprs).where(new Predicate1<RexNode>() { + public boolean apply(RexNode expr) { + return expr != null; + } + }); + return RexUtil.composeConjunction(rexBuilder, exprs, false); + } + + /** + * Find expressions of the form 'col_x = col_y'. + */ + class EquivalenceFinder extends RexVisitorImpl<Void> { + protected EquivalenceFinder() { + super(true); + } + + @Override public Void visitCall(RexCall call) { + if (call.getOperator().getKind() == SqlKind.EQUALS) { + int lPos = pos(call.getOperands().get(0)); + int rPos = pos(call.getOperands().get(1)); + if (lPos != -1 && rPos != -1) { + HiveJoinConditionBasedPredicateInference.this.equivalent(lPos, rPos); + HiveJoinConditionBasedPredicateInference.this.equalityPredicates + .add(call.toString()); + } + } + return null; + } + } + + /** + * Given an expression returns all the possible substitutions. + * + * <p>For example, for an expression 'a + b + c' and the following + * equivalences: <pre> + * a : {a, b} + * b : {a, b} + * c : {c, e} + * </pre> + * + * <p>The following Mappings will be returned: + * <pre> + * {a->a, b->a, c->c} + * {a->a, b->a, c->e} + * {a->a, b->b, c->c} + * {a->a, b->b, c->e} + * {a->b, b->a, c->c} + * {a->b, b->a, c->e} + * {a->b, b->b, c->c} + * {a->b, b->b, c->e} + * </pre> + * + * <p>which imply the following inferences: + * <pre> + * a + a + c + * a + a + e + * a + b + c + * a + b + e + * b + a + c + * b + a + e + * b + b + c + * b + b + e + * </pre> + */ + class ExprsItr implements Iterator<Mapping> { + final int[] columns; + final BitSet[] columnSets; + final int[] iterationIdx; + Mapping nextMapping; + boolean firstCall; + + ExprsItr(ImmutableBitSet fields) { + nextMapping = null; + columns = new int[fields.cardinality()]; + columnSets = new BitSet[fields.cardinality()]; + iterationIdx = new int[fields.cardinality()]; + for (int j = 0, i = fields.nextSetBit(0); i >= 0; i = fields + .nextSetBit(i + 1), j++) { + columns[j] = i; + columnSets[j] = equivalence.get(i); + iterationIdx[j] = 0; + } + firstCall = true; + } + + public boolean hasNext() { + if (firstCall) { + initializeMapping(); + firstCall = false; + } else { + computeNextMapping(iterationIdx.length - 1); + } + return nextMapping != null; + } + + public Mapping next() { + return nextMapping; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + private void computeNextMapping(int level) { + int t = columnSets[level].nextSetBit(iterationIdx[level]); + if (t < 0) { + if (level == 0) { + nextMapping = null; + } else { + iterationIdx[level] = 0; + computeNextMapping(level - 1); + } + } else { + nextMapping.set(columns[level], t); + iterationIdx[level] = t + 1; + } + } + + private void initializeMapping() { + nextMapping = Mappings.create(MappingType.PARTIAL_FUNCTION, + nSysFields + nFieldsLeft + nFieldsRight, + nSysFields + nFieldsLeft + nFieldsRight); + for (int i = 0; i < columnSets.length; i++) { + BitSet c = columnSets[i]; + int t = c.nextSetBit(iterationIdx[i]); + if (t < 0) { + nextMapping = null; + return; + } + nextMapping.set(columns[i], t); + iterationIdx[i] = t + 1; + } + } + } + + private int pos(RexNode expr) { + if (expr instanceof RexInputRef) { + return ((RexInputRef) expr).getIndex(); + } + return -1; + } + + private boolean isAlwaysTrue(RexNode predicate) { + if (predicate instanceof RexCall) { + RexCall c = (RexCall) predicate; + if (c.getOperator().getKind() == SqlKind.EQUALS) { + int lPos = pos(c.getOperands().get(0)); + int rPos = pos(c.getOperands().get(1)); + return lPos != -1 && lPos == rPos; + } + } + return predicate.isAlwaysTrue(); + } + } + + /** + * Shuttle which applies a permutation to its input fields. + * + * @see RexPermutationShuttle + * @see RexUtil#apply(org.apache.calcite.util.mapping.Mappings.TargetMapping, RexNode) + */ + public static class HiveJoinRexPermuteInputsShuttle extends RexShuttle { + //~ Instance fields -------------------------------------------------------- + + private final Mappings.TargetMapping mapping; + private final ImmutableList<RelDataTypeField> fields; + private final RelOptCluster cluster; + private final RelDataType rType; + + //~ Constructors ----------------------------------------------------------- + + private HiveJoinRexPermuteInputsShuttle( + Mappings.TargetMapping mapping, + RelNode input) { + this.mapping = mapping; + this.cluster = input.getCluster(); + this.rType = input.getRowType(); + this.fields = ImmutableList.copyOf(rType.getFieldList()); + } + + //~ Methods ---------------------------------------------------------------- + + private static ImmutableList<RelDataTypeField> fields(RelNode[] inputs) { + final ImmutableList.Builder<RelDataTypeField> fields = + ImmutableList.builder(); + for (RelNode input : inputs) { + fields.addAll(input.getRowType().getFieldList()); + } + return fields.build(); + } + + @Override public RexNode visitInputRef(RexInputRef local) { + final int index = local.getIndex(); + int target = mapping.getTarget(index); + return new RexInputRef( + target, + fields.get(index).getType()); + } + + @Override public RexNode visitCall(RexCall call) { + if (call.getOperator() == RexBuilder.GET_OPERATOR) { + final String name = + (String) ((RexLiteral) call.getOperands().get(1)).getValue2(); + final int i = lookup(fields, name); + if (i >= 0) { + return RexInputRef.of(i, fields); + } + } + return HiveCalciteUtil.getTypeSafePred(cluster, super.visitCall(call), rType); + } + + private static int lookup(List<RelDataTypeField> fields, String name) { + for (int i = 0; i < fields.size(); i++) { + final RelDataTypeField field = fields.get(i); + if (field.getName().equals(name)) { + return i; + } + } + return -1; + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index bcb9ea7..37249f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -202,6 +202,7 @@ public class SqlFunctionConverter { case IN: case BETWEEN: case ROW: + case IS_NOT_NULL: node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); break; @@ -314,7 +315,7 @@ public class SqlFunctionConverter { registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); - + registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index c1e314f..1d3a90a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -52,8 +52,12 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -337,8 +341,9 @@ public class StatsRulesProcFactory { } } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); + } else if (udf instanceof GenericUDFOPNotNull) { + return evaluateNotNullExpr(stats, genFunc); } else { - // single predicate condition newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount); } @@ -443,6 +448,58 @@ public class StatsRulesProcFactory { return numRows / 2; } + private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred) { + long noOfNulls = getMaxNulls(parentStats, pred); + long parentCardinality = parentStats.getNumRows(); + long newPredCardinality = parentCardinality; + + if (parentCardinality > noOfNulls) { + newPredCardinality = parentCardinality - noOfNulls; + } else { + LOG.error("Invalid column stats: No of nulls > cardinality"); + } + + return newPredCardinality; + } + + private long getMaxNulls(Statistics stats, ExprNodeDesc pred) { + long tmpNoNulls = 0; + long maxNoNulls = 0; + + if (pred instanceof ExprNodeColumnDesc) { + ColStatistics cs = stats.getColumnStatisticsFromColName(((ExprNodeColumnDesc) pred) + .getColumn()); + if (cs != null) { + tmpNoNulls = cs.getNumNulls(); + } + } else if (pred instanceof ExprNodeGenericFuncDesc || pred instanceof ExprNodeColumnListDesc) { + long noNullsOfChild = 0; + for (ExprNodeDesc childExpr : pred.getChildren()) { + noNullsOfChild = getMaxNulls(stats, childExpr); + if (noNullsOfChild > tmpNoNulls) { + tmpNoNulls = noNullsOfChild; + } + } + } else if (pred instanceof ExprNodeConstantDesc) { + if (ExprNodeDescUtils.isNullConstant(pred)) { + tmpNoNulls = stats.getNumRows(); + } else { + tmpNoNulls = 0; + } + } else if (pred instanceof ExprNodeDynamicListDesc) { + tmpNoNulls = 0; + } else if (pred instanceof ExprNodeFieldDesc) { + // TODO Confirm this is safe + tmpNoNulls = getMaxNulls(stats, ((ExprNodeFieldDesc) pred).getDesc()); + } + + if (tmpNoNulls > maxNoNulls) { + maxNoNulls = tmpNoNulls; + } + + return maxNoNulls; + } + private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { @@ -525,8 +582,7 @@ public class StatsRulesProcFactory { || udf instanceof GenericUDFOPLessThan) { return numRows / 3; } else if (udf instanceof GenericUDFOPNotNull) { - long newNumRows = evaluateColEqualsNullExpr(stats, genFunc); - return stats.getNumRows() - newNumRows; + return evaluateNotNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPNull) { return evaluateColEqualsNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c005b1a..ac50673 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -62,13 +62,11 @@ import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; -import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; import org.apache.calcite.rel.rules.ProjectMergeRule; import org.apache.calcite.rel.rules.ProjectRemoveRule; -import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; @@ -137,7 +135,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; @@ -818,7 +818,7 @@ public class CalcitePlanner extends SemanticAnalyzer { private class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> { private RelOptCluster cluster; private RelOptSchema relOptSchema; - private final Map<String, PrunedPartitionList> partitionCache; + private final Map<String, PrunedPartitionList> partitionCache; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -918,11 +918,10 @@ public class CalcitePlanner extends SemanticAnalyzer { // 4. Run other optimizations that do not need stats calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, ReduceExpressionsRule.JOIN_INSTANCE, - ReduceExpressionsRule.FILTER_INSTANCE, ReduceExpressionsRule.PROJECT_INSTANCE, + HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE, new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY), - HiveAggregateProjectMergeRule.INSTANCE); + HiveAggregateProjectMergeRule.INSTANCE, HiveJoinCommuteRule.INSTANCE); // 5. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with @@ -955,10 +954,6 @@ public class CalcitePlanner extends SemanticAnalyzer { } } - // 6. Run rule to try to remove projects on top of join operators - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, HiveJoinCommuteRule.INSTANCE); - // 7. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { @@ -1014,12 +1009,11 @@ public class CalcitePlanner extends SemanticAnalyzer { * @return */ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { - // TODO: Decorelation of subquery should be done before attempting // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. - //0. Distinct aggregate rewrite + //1. Distinct aggregate rewrite // Run this optimization early, since it is expanding the operator pipeline. if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { @@ -1029,7 +1023,40 @@ public class CalcitePlanner extends SemanticAnalyzer { basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); } - // 1. Push down limit through outer join + // 2. Try factoring out common filter elements & separating deterministic + // vs non-deterministic UDF. This needs to run before PPD so that PPD can + // add on-clauses for old style Join Syntax + // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or + // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 + basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY, + HivePreFilteringRule.INSTANCE); + + // 3. PPD for old Join Syntax + // NOTE: PPD needs to run before adding not null filters in order to + // support old style join syntax (so that on-clauses will get filled up). + // TODO: Add in ReduceExpressionrules (Constant folding) to below once + // HIVE-11927 is fixed. + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( + HiveFilter.DEFAULT_FILTER_FACTORY)); + + // TODO: Transitive inference, constant prop & Predicate push down has to + // do multiple passes till no more inference is left + // Currently doing so would result in a spin. Just checking for if inferred + // pred is present below may not be sufficient as inferred & pushed pred + // could have been mutated by constant folding/prop + // 4. Transitive inference for join on clauses + basePlan = hepPlan(basePlan, true, mdProvider, new HiveJoinPushTransitivePredicatesRule( + Join.class, HiveFilter.DEFAULT_FILTER_FACTORY)); + + // 5. Push down limit through outer join + // NOTE: We run this after PPD to support old style join syntax. + // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or + // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) { // This should be a cost based decision, but till we enable the extended cost // model, we will use the given value for the variable @@ -1044,35 +1071,28 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveProjectSortTransposeRule.INSTANCE); } - // 2. Push Down Semi Joins + // 6. Add not null filters + basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + + // 7. Rerun Constant propagation and PPD now that we have added Not NULL filters & did transitive inference + // TODO: Add in ReduceExpressionrules (Constant folding) to below once + // HIVE-11927 is fixed. + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( + HiveFilter.DEFAULT_FILTER_FACTORY)); + + // 8. Push Down Semi Joins basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 3. Add not null filters - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); - } - - // 4. Constant propagation, common filter extraction, and PPD - basePlan = hepPlan(basePlan, true, mdProvider, - ReduceExpressionsRule.PROJECT_INSTANCE, - ReduceExpressionsRule.FILTER_INSTANCE, - ReduceExpressionsRule.JOIN_INSTANCE, - HivePreFilteringRule.INSTANCE, - new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, - HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), - new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY), - HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, - new FilterAggregateTransposeRule(Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - - // 5. Transitive inference & Partition Pruning - basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), - new HivePartitionPruneRule(conf)); - - // 6. Projection Pruning + // 9. Apply Partition Pruning + basePlan = hepPlan(basePlan, false, mdProvider, new HivePartitionPruneRule(conf)); + + // 10. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, cluster, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, @@ -1080,11 +1100,18 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 7. Rerun PPD through Project as column pruning would have introduced DT - // above scans - basePlan = hepPlan(basePlan, true, mdProvider, - new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, - HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + + // 11. Merge Project-Project if possible + basePlan = hepPlan(basePlan, false, mdProvider, new ProjectMergeRule(true, + HiveProject.DEFAULT_PROJECT_FACTORY)); + + // 12. Rerun PPD through Project as column pruning would have introduced + // DT above scans; By pushing filter just above TS, Hive can push it into + // storage (incase there are filters on non partition cols). This only + // matches FIL-PROJ-TS + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTSTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveTableScan.class)); return basePlan; } http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index e291a48..0223038 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -487,6 +487,14 @@ public class ExprNodeDescUtils { return true; } + public static boolean isNullConstant(ExprNodeDesc value) { + if ((value instanceof ExprNodeConstantDesc) + && ((ExprNodeConstantDesc) value).getValue() == null) { + return true; + } + return false; + } + public static PrimitiveTypeInfo deriveMinArgumentCast( ExprNodeDesc childExpr, TypeInfo targetType) { assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType; http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q index 799a66b..c017172 100644 --- a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q +++ b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q @@ -94,7 +94,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -152,7 +152,7 @@ select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value fro -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -280,7 +280,7 @@ select * from (select q, b, `//cbo_t2`.p, `c/b/o_t1`.c, `cbo_/t3////`.c_int from -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -310,7 +310,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from `c/b/o_t1` where (`c/b/o_t1`.c_int + 1 >= 0) and (`c/b/o_t1`.c_int > 0 or `c/b/o_t1`.c_float >= 0) group by c_float, `c/b/o_t1`.c_int, key having `c/b/o_t1`.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc limit 5) `c/b/o_t1` left outer join (select key as p, c_int+1 as q, sum(c_int) as r from `//cbo_t2` where (`//cbo_t2`.c_int + 1 >= 0) and (`//cbo_t2`.c_int > 0 or `//cbo_t2`.c_float >= 0) group by c_float, `//cbo_t2`.c_int, key having `//cbo_t2`.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 limit 5) `//cbo_t2` on `c/b/o_t1`.a=p left outer join `cbo_/t3////` on `c/b/o_t1`.a=key where (b + `//cbo_t2`.q >= 0) and (b > 0 or c_int >= 0) group by `cbo_/t3////`.c_int, c having `cbo_/t3////`.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by `cbo_/t3////`.c_int % c asc, `cbo_/t3////`.c_int, c desc li mit 5; -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -342,7 +342,7 @@ select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -452,7 +452,7 @@ select key from `c/b/o_t1` where c_int = -6 or c_int = +6; select count(`c/b/o_t1`.dt) from `c/b/o_t1` join `//cbo_t2` on `c/b/o_t1`.dt = `//cbo_t2`.dt where `c/b/o_t1`.dt = '2014' ; -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -470,7 +470,7 @@ select `c/b/o_t1`.value from `c/b/o_t1` join `//cbo_t2` on `c/b/o_t1`.key = `//c -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -602,7 +602,7 @@ from (select b.key, count(*) -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -712,7 +712,7 @@ having p_name in -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -872,7 +872,7 @@ having b.p_mfgr not in -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -910,7 +910,7 @@ select count(distinct c_int) as a, avg(c_float) from `c/b/o_t1` group by c_int o select count(distinct c_int) as a, avg(c_float) from `c/b/o_t1` group by c_float, c_int order by a; -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -936,7 +936,7 @@ select r2.key from (select key, c_int from (select key, c_int from `c/b/o_t1` un -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; @@ -1026,7 +1026,7 @@ drop view v3; drop view v4; -set hive.cbo.enable=true; +set hive.cbo.enable=false; set hive.exec.check.crossproducts=false; http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out index cdecad1..e2e589c 100644 --- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out +++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out @@ -107,32 +107,32 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -142,14 +142,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -190,7 +190,7 @@ STAGE PLANS: Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: int) @@ -204,7 +204,7 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -212,14 +212,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out index b2a7d89..f860011 100644 --- a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out +++ b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out @@ -77,7 +77,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: int) @@ -91,7 +91,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -99,14 +99,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/allcolref_in_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out index 216b037..c6a3567 100644 --- a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out +++ b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out @@ -86,24 +86,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key + 1) is not null and (key < 100)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: (key + 1) (type: double) sort order: + Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(key) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator @@ -113,13 +113,13 @@ STAGE PLANS: 0 (key + 1) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 2 (type: int), concat(_col0, _col1, _col5, _col6) (type: string), concat(_col0, _col1) (type: string), concat(_col5, _col6) (type: string), concat(_col0, _col1, _col5) (type: string), concat(_col0, _col5, _col6) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE UDTF Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE function name: stack Limit Number of rows: 10 http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/ambiguous_col.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/ambiguous_col.q.out b/ql/src/test/results/clientpositive/ambiguous_col.q.out index 7f04e89..1d1d6af 100644 --- a/ql/src/test/results/clientpositive/ambiguous_col.q.out +++ b/ql/src/test/results/clientpositive/ambiguous_col.q.out @@ -17,32 +17,32 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -51,14 +51,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -136,31 +136,31 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -169,14 +169,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -254,31 +254,31 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -287,14 +287,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
