http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index e810747..85e66d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -18,44 +18,24 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashSet; -import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import org.apache.calcite.linq4j.Linq4j; import org.apache.calcite.linq4j.Ord; -import org.apache.calcite.linq4j.function.Predicate1; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdPredicates; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexPermutationShuttle; import org.apache.calcite.rex.RexPermuteInputsShuttle; -import org.apache.calcite.rex.RexShuttle; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.rex.RexVisitorImpl; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.util.BitSets; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.mapping.Mapping; @@ -63,13 +43,8 @@ import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import com.google.common.base.Function; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; //TODO: Move this to calcite @@ -151,511 +126,4 @@ public class HiveRelMdPredicates extends RelMdPredicates { return RelOptPredicateList.of(projectPullUpPredicates); } - /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ - @Override - public RelOptPredicateList getPredicates(Join join, RelMetadataQuery mq) { - RexBuilder rB = join.getCluster().getRexBuilder(); - RelNode left = join.getInput(0); - RelNode right = join.getInput(1); - - RelOptPredicateList leftInfo = mq.getPulledUpPredicates(left); - RelOptPredicateList rightInfo = mq.getPulledUpPredicates(right); - - HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join, - RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false), - RexUtil.composeConjunction(rB, rightInfo.pulledUpPredicates, false)); - - return jI.inferPredicates(false); - } - - /** - * Utility to infer predicates from one side of the join that apply on the - * other side. - * - * <p>Contract is:<ul> - * - * <li>initialize with a {@link org.apache.calcite.rel.core.Join} and - * optional predicates applicable on its left and right subtrees. - * - * <li>you can - * then ask it for equivalentPredicate(s) given a predicate. - * - * </ul> - * - * <p>So for: - * <ol> - * <li>'<code>R1(x) join R2(y) on x = y</code>' a call for - * equivalentPredicates on '<code>x > 7</code>' will return ' - * <code>[y > 7]</code>' - * <li>'<code>R1(x) join R2(y) on x = y join R3(z) on y = z</code>' a call for - * equivalentPredicates on the second join '<code>x > 7</code>' will return ' - * <code>[y > 7, z > 7]</code>' - * </ol> - */ - static class HiveJoinConditionBasedPredicateInference { - final Join joinRel; - final boolean isSemiJoin; - final int nSysFields; - final int nFieldsLeft; - final int nFieldsRight; - final ImmutableBitSet leftFieldsBitSet; - final ImmutableBitSet rightFieldsBitSet; - final ImmutableBitSet allFieldsBitSet; - SortedMap<Integer, BitSet> equivalence; - final Map<String, ImmutableBitSet> exprFields; - final Set<String> allExprsDigests; - final Set<String> equalityPredicates; - final RexNode leftChildPredicates; - final RexNode rightChildPredicates; - - public HiveJoinConditionBasedPredicateInference(Join joinRel, - RexNode lPreds, RexNode rPreds) { - this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds); - } - - private HiveJoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin, - RexNode lPreds, RexNode rPreds) { - super(); - this.joinRel = joinRel; - this.isSemiJoin = isSemiJoin; - nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size(); - nFieldsRight = joinRel.getRight().getRowType().getFieldList().size(); - nSysFields = joinRel.getSystemFieldList().size(); - leftFieldsBitSet = ImmutableBitSet.range(nSysFields, - nSysFields + nFieldsLeft); - rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, - nSysFields + nFieldsLeft + nFieldsRight); - allFieldsBitSet = ImmutableBitSet.range(0, - nSysFields + nFieldsLeft + nFieldsRight); - - exprFields = Maps.newHashMap(); - allExprsDigests = new HashSet<String>(); - - if (lPreds == null) { - leftChildPredicates = null; - } else { - Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( - nSysFields + nFieldsLeft, nSysFields, 0, nFieldsLeft); - leftChildPredicates = lPreds.accept( - new RexPermuteInputsShuttle(leftMapping, joinRel.getInput(0))); - - for (RexNode r : RelOptUtil.conjunctions(leftChildPredicates)) { - exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); - allExprsDigests.add(r.toString()); - } - } - if (rPreds == null) { - rightChildPredicates = null; - } else { - Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( - nSysFields + nFieldsLeft + nFieldsRight, - nSysFields + nFieldsLeft, 0, nFieldsRight); - rightChildPredicates = rPreds.accept( - new RexPermuteInputsShuttle(rightMapping, joinRel.getInput(1))); - - for (RexNode r : RelOptUtil.conjunctions(rightChildPredicates)) { - exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); - allExprsDigests.add(r.toString()); - } - } - - equivalence = Maps.newTreeMap(); - equalityPredicates = new HashSet<String>(); - for (int i = 0; i < nSysFields + nFieldsLeft + nFieldsRight; i++) { - equivalence.put(i, BitSets.of(i)); - } - - // Only process equivalences found in the join conditions. Processing - // Equivalences from the left or right side infer predicates that are - // already present in the Tree below the join. - RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - List<RexNode> exprs = - RelOptUtil.conjunctions( - compose(rexBuilder, ImmutableList.of(joinRel.getCondition()))); - - final EquivalenceFinder eF = new EquivalenceFinder(); - new ArrayList<Void>(Lists.transform(exprs, new Function<RexNode, Void>() { - public Void apply(RexNode input) { - return input.accept(eF); - } - })); - - equivalence = BitSets.closure(equivalence); - } - - /** - * The PullUp Strategy is sound but not complete. - * <ol> - * <li>We only pullUp inferred predicates for now. Pulling up existing - * predicates causes an explosion of duplicates. The existing predicates are - * pushed back down as new predicates. Once we have rules to eliminate - * duplicate Filter conditions, we should pullUp all predicates. - * <li>For Left Outer: we infer new predicates from the left and set them as - * applicable on the Right side. No predicates are pulledUp. - * <li>Right Outer Joins are handled in an analogous manner. - * <li>For Full Outer Joins no predicates are pulledUp or inferred. - * </ol> - */ - public RelOptPredicateList inferPredicates( - boolean includeEqualityInference) { - List<RexNode> inferredPredicates = new ArrayList<RexNode>(); - Set<String> allExprsDigests = new HashSet<String>(this.allExprsDigests); - final JoinRelType joinType = joinRel.getJoinType(); - switch (joinType) { - case INNER: - case LEFT: - infer(leftChildPredicates, allExprsDigests, inferredPredicates, - includeEqualityInference, - joinType == JoinRelType.LEFT ? rightFieldsBitSet - : allFieldsBitSet); - break; - } - switch (joinType) { - case INNER: - case RIGHT: - infer(rightChildPredicates, allExprsDigests, inferredPredicates, - includeEqualityInference, - joinType == JoinRelType.RIGHT ? leftFieldsBitSet - : allFieldsBitSet); - break; - } - - Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( - nSysFields + nFieldsLeft + nFieldsRight, - 0, nSysFields + nFieldsLeft, nFieldsRight); - final HiveJoinRexPermuteInputsShuttle rightPermute = - new HiveJoinRexPermuteInputsShuttle(rightMapping, joinRel); - Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( - nSysFields + nFieldsLeft, 0, nSysFields, nFieldsLeft); - final HiveJoinRexPermuteInputsShuttle leftPermute = - new HiveJoinRexPermuteInputsShuttle(leftMapping, joinRel); - - List<RexNode> leftInferredPredicates = new ArrayList<RexNode>(); - List<RexNode> rightInferredPredicates = new ArrayList<RexNode>(); - - for (RexNode iP : inferredPredicates) { - ImmutableBitSet iPBitSet = RelOptUtil.InputFinder.bits(iP); - if (iPBitSet.isEmpty() && joinType == JoinRelType.INNER) { - leftInferredPredicates.add(iP); - rightInferredPredicates.add(iP); - } else if (iPBitSet.isEmpty() && joinType == JoinRelType.LEFT) { - rightInferredPredicates.add(iP); - } else if (iPBitSet.isEmpty() && joinType == JoinRelType.RIGHT) { - leftInferredPredicates.add(iP); - } else if (leftFieldsBitSet.contains(iPBitSet)) { - leftInferredPredicates.add(iP.accept(leftPermute)); - } else if (rightFieldsBitSet.contains(iPBitSet)) { - rightInferredPredicates.add(iP.accept(rightPermute)); - } - } - - switch (joinType) { - case INNER: - Iterable<RexNode> pulledUpPredicates; - if (isSemiJoin) { - pulledUpPredicates = Iterables.concat( - RelOptUtil.conjunctions(leftChildPredicates), - leftInferredPredicates); - } else { - pulledUpPredicates = Iterables.concat( - RelOptUtil.conjunctions(leftChildPredicates), - RelOptUtil.conjunctions(rightChildPredicates), - RelOptUtil.conjunctions(joinRel.getCondition()), - inferredPredicates); - } - return RelOptPredicateList.of(pulledUpPredicates, - leftInferredPredicates, rightInferredPredicates); - case LEFT: - return RelOptPredicateList.of( - RelOptUtil.conjunctions(leftChildPredicates), - EMPTY_LIST, rightInferredPredicates); - case RIGHT: - return RelOptPredicateList.of( - RelOptUtil.conjunctions(rightChildPredicates), - leftInferredPredicates, EMPTY_LIST); - default: - assert inferredPredicates.size() == 0; - return RelOptPredicateList.EMPTY; - } - } - - public RexNode left() { - return leftChildPredicates; - } - - public RexNode right() { - return rightChildPredicates; - } - - private void infer(RexNode predicates, Set<String> allExprsDigests, - List<RexNode> inferedPredicates, boolean includeEqualityInference, - ImmutableBitSet inferringFields) { - for (RexNode r : RelOptUtil.conjunctions(predicates)) { - if (r.isAlwaysFalse()) { - RexLiteral falseVal = - joinRel.getCluster().getRexBuilder().makeLiteral(false); - inferedPredicates.add(falseVal); - allExprsDigests.add(falseVal.toString()); - continue; - } - if (!includeEqualityInference - && equalityPredicates.contains(r.toString())) { - continue; - } - for (Mapping m : mappings(r)) { - RexNode tr = r.accept( - new RexPermuteInputsShuttle(m, joinRel.getInput(0), - joinRel.getInput(1))); - if (inferringFields.contains(RelOptUtil.InputFinder.bits(tr)) - && !allExprsDigests.contains(tr.toString()) - && !isAlwaysTrue(tr)) { - inferedPredicates.add(tr); - allExprsDigests.add(tr.toString()); - } - } - } - } - - Iterable<Mapping> mappings(final RexNode predicate) { - return new Iterable<Mapping>() { - public Iterator<Mapping> iterator() { - ImmutableBitSet fields = exprFields.get(predicate.toString()); - if (fields.cardinality() == 0) { - return Iterators.emptyIterator(); - } - return new ExprsItr(fields); - } - }; - } - - private void equivalent(int p1, int p2) { - BitSet b = equivalence.get(p1); - b.set(p2); - - b = equivalence.get(p2); - b.set(p1); - } - - RexNode compose(RexBuilder rexBuilder, Iterable<RexNode> exprs) { - exprs = Linq4j.asEnumerable(exprs).where(new Predicate1<RexNode>() { - public boolean apply(RexNode expr) { - return expr != null; - } - }); - return RexUtil.composeConjunction(rexBuilder, exprs, false); - } - - /** - * Find expressions of the form 'col_x = col_y'. - */ - class EquivalenceFinder extends RexVisitorImpl<Void> { - protected EquivalenceFinder() { - super(true); - } - - @Override public Void visitCall(RexCall call) { - if (call.getOperator().getKind() == SqlKind.EQUALS) { - int lPos = pos(call.getOperands().get(0)); - int rPos = pos(call.getOperands().get(1)); - if (lPos != -1 && rPos != -1) { - HiveJoinConditionBasedPredicateInference.this.equivalent(lPos, rPos); - HiveJoinConditionBasedPredicateInference.this.equalityPredicates - .add(call.toString()); - } - } - return null; - } - } - - /** - * Given an expression returns all the possible substitutions. - * - * <p>For example, for an expression 'a + b + c' and the following - * equivalences: <pre> - * a : {a, b} - * b : {a, b} - * c : {c, e} - * </pre> - * - * <p>The following Mappings will be returned: - * <pre> - * {a->a, b->a, c->c} - * {a->a, b->a, c->e} - * {a->a, b->b, c->c} - * {a->a, b->b, c->e} - * {a->b, b->a, c->c} - * {a->b, b->a, c->e} - * {a->b, b->b, c->c} - * {a->b, b->b, c->e} - * </pre> - * - * <p>which imply the following inferences: - * <pre> - * a + a + c - * a + a + e - * a + b + c - * a + b + e - * b + a + c - * b + a + e - * b + b + c - * b + b + e - * </pre> - */ - class ExprsItr implements Iterator<Mapping> { - final int[] columns; - final BitSet[] columnSets; - final int[] iterationIdx; - Mapping nextMapping; - boolean firstCall; - - ExprsItr(ImmutableBitSet fields) { - nextMapping = null; - columns = new int[fields.cardinality()]; - columnSets = new BitSet[fields.cardinality()]; - iterationIdx = new int[fields.cardinality()]; - for (int j = 0, i = fields.nextSetBit(0); i >= 0; i = fields - .nextSetBit(i + 1), j++) { - columns[j] = i; - columnSets[j] = equivalence.get(i); - iterationIdx[j] = 0; - } - firstCall = true; - } - - public boolean hasNext() { - if (firstCall) { - initializeMapping(); - firstCall = false; - } else { - computeNextMapping(iterationIdx.length - 1); - } - return nextMapping != null; - } - - public Mapping next() { - return nextMapping; - } - - public void remove() { - throw new UnsupportedOperationException(); - } - - private void computeNextMapping(int level) { - int t = columnSets[level].nextSetBit(iterationIdx[level]); - if (t < 0) { - if (level == 0) { - nextMapping = null; - } else { - iterationIdx[level] = 0; - computeNextMapping(level - 1); - } - } else { - nextMapping.set(columns[level], t); - iterationIdx[level] = t + 1; - } - } - - private void initializeMapping() { - nextMapping = Mappings.create(MappingType.PARTIAL_FUNCTION, - nSysFields + nFieldsLeft + nFieldsRight, - nSysFields + nFieldsLeft + nFieldsRight); - for (int i = 0; i < columnSets.length; i++) { - BitSet c = columnSets[i]; - int t = c.nextSetBit(iterationIdx[i]); - if (t < 0) { - nextMapping = null; - return; - } - nextMapping.set(columns[i], t); - iterationIdx[i] = t + 1; - } - } - } - - private int pos(RexNode expr) { - if (expr instanceof RexInputRef) { - return ((RexInputRef) expr).getIndex(); - } - return -1; - } - - private boolean isAlwaysTrue(RexNode predicate) { - if (predicate instanceof RexCall) { - RexCall c = (RexCall) predicate; - if (c.getOperator().getKind() == SqlKind.EQUALS) { - int lPos = pos(c.getOperands().get(0)); - int rPos = pos(c.getOperands().get(1)); - return lPos != -1 && lPos == rPos; - } - } - return predicate.isAlwaysTrue(); - } - } - - /** - * Shuttle which applies a permutation to its input fields. - * - * @see RexPermutationShuttle - * @see RexUtil#apply(org.apache.calcite.util.mapping.Mappings.TargetMapping, RexNode) - */ - public static class HiveJoinRexPermuteInputsShuttle extends RexShuttle { - //~ Instance fields -------------------------------------------------------- - - private final Mappings.TargetMapping mapping; - private final ImmutableList<RelDataTypeField> fields; - private final RelOptCluster cluster; - private final RelDataType rType; - - //~ Constructors ----------------------------------------------------------- - - private HiveJoinRexPermuteInputsShuttle( - Mappings.TargetMapping mapping, - RelNode input) { - this.mapping = mapping; - this.cluster = input.getCluster(); - this.rType = input.getRowType(); - this.fields = ImmutableList.copyOf(rType.getFieldList()); - } - - //~ Methods ---------------------------------------------------------------- - - private static ImmutableList<RelDataTypeField> fields(RelNode[] inputs) { - final ImmutableList.Builder<RelDataTypeField> fields = - ImmutableList.builder(); - for (RelNode input : inputs) { - fields.addAll(input.getRowType().getFieldList()); - } - return fields.build(); - } - - @Override public RexNode visitInputRef(RexInputRef local) { - final int index = local.getIndex(); - int target = mapping.getTarget(index); - return new RexInputRef( - target, - fields.get(index).getType()); - } - - @Override public RexNode visitCall(RexCall call) { - if (call.getOperator() == RexBuilder.GET_OPERATOR) { - final String name = - (String) ((RexLiteral) call.getOperands().get(1)).getValue2(); - final int i = lookup(fields, name); - if (i >= 0) { - return RexInputRef.of(i, fields); - } - } - return HiveCalciteUtil.getTypeSafePred(cluster, super.visitCall(call), rType); - } - - private static int lookup(List<RelDataTypeField> fields, String name) { - for (int i = 0; i < fields.size(); i++) { - final RelDataTypeField field = fields.get(i); - if (field.getName().equals(name)) { - return i; - } - } - return -1; - } - } }
http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index de7e2f8..353d8db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -45,6 +45,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.rex.RexWindowBound; @@ -189,6 +190,10 @@ public class ASTConverter { int i = 0; for (RexNode r : select.getChildExps()) { + if (RexUtil.isNull(r) && r.getType().getSqlTypeName() != SqlTypeName.NULL) { + // It is NULL value with different type, we need to introduce a CAST to keep it + r = select.getCluster().getRexBuilder().makeAbstractCast(r.getType(), r); + } ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral)); String alias = select.getRowType().getFieldNames().get(i++); ASTNode selectExpr = ASTBuilder.selectExpr(expr, alias); http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index e51b6c4..2e498cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -36,6 +36,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.rex.RexWindowBound; @@ -45,6 +46,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -79,6 +81,7 @@ import com.google.common.collect.ImmutableSet; */ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { + private final boolean foldExpr; private final String tabAlias; private final RelDataType inputRowType; private final ImmutableSet<Integer> inputVCols; @@ -89,16 +92,28 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { public ExprNodeConverter(String tabAlias, RelDataType inputRowType, Set<Integer> vCols, RelDataTypeFactory dTFactory) { - this(tabAlias, null, inputRowType, null, vCols, dTFactory); + this(tabAlias, null, inputRowType, null, vCols, dTFactory, false); + } + + public ExprNodeConverter(String tabAlias, RelDataType inputRowType, + Set<Integer> vCols, RelDataTypeFactory dTFactory, boolean foldExpr) { + this(tabAlias, null, inputRowType, null, vCols, dTFactory, foldExpr); } public ExprNodeConverter(String tabAlias, String columnAlias, RelDataType inputRowType, RelDataType outputRowType, Set<Integer> inputVCols, RelDataTypeFactory dTFactory) { + this(tabAlias, columnAlias, inputRowType, outputRowType, inputVCols, dTFactory, false); + } + + public ExprNodeConverter(String tabAlias, String columnAlias, RelDataType inputRowType, + RelDataType outputRowType, Set<Integer> inputVCols, RelDataTypeFactory dTFactory, + boolean foldExpr) { super(true); this.tabAlias = tabAlias; this.inputRowType = inputRowType; this.inputVCols = ImmutableSet.copyOf(inputVCols); this.dTFactory = dTFactory; + this.foldExpr = foldExpr; } public List<WindowFunctionSpec> getWindowFunctionSpec() { @@ -117,7 +132,7 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { */ @Override public ExprNodeDesc visitCall(RexCall call) { - ExprNodeGenericFuncDesc gfDesc = null; + ExprNodeDesc gfDesc = null; if (!deep) { return null; @@ -149,6 +164,15 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { throw new RuntimeException(e); } } + + // Try to fold if it is a constant expression + if (foldExpr && RexUtil.isConstant(call)) { + ExprNodeDesc constantExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)gfDesc); + if (constantExpr != null) { + gfDesc = constantExpr; + } + } + return gfDesc; } http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index 7fbf8cd..3ecbbb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -159,7 +159,8 @@ public class HiveGBOpConvUtil { // 1. Collect GB Keys RelNode aggInputRel = aggRel.getInput(); ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias, - aggInputRel.getRowType(), new HashSet<Integer>(), aggRel.getCluster().getTypeFactory()); + aggInputRel.getRowType(), new HashSet<Integer>(), aggRel.getCluster().getTypeFactory(), + true); ExprNodeDesc tmpExprNodeDesc; for (int i : aggRel.getGroupSet()) { http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index 1307808..aef5baa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -278,7 +278,8 @@ public class HiveOpConverter { for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) { ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel .getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), - projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory()); + projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), + true); ExprNodeDesc exprCol = projectRel.getChildExps().get(pos).accept(converter); colExprMap.put(exprNames.get(pos), exprCol); exprCols.add(exprCol); @@ -520,7 +521,7 @@ public class HiveOpConverter { ExprNodeDesc filCondExpr = filterRel.getCondition().accept( new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), inputOpAf.vcolsInCalcite, - filterRel.getCluster().getTypeFactory())); + filterRel.getCluster().getTypeFactory(), true)); FilterDesc filDesc = new FilterDesc(filCondExpr, false); ArrayList<ColumnInfo> cinfoLst = createColInfos(inputOpAf.inputs.get(0)); FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, @@ -1164,7 +1165,7 @@ public class HiveOpConverter { private static ExprNodeDesc convertToExprNode(RexNode rn, RelNode inputRel, String tabAlias, Set<Integer> vcolsInCalcite) { return rn.accept(new ExprNodeConverter(tabAlias, inputRel.getRowType(), vcolsInCalcite, - inputRel.getCluster().getTypeFactory())); + inputRel.getCluster().getTypeFactory(), true)); } private static ArrayList<ColumnInfo> createColInfos(Operator<?> input) { http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java index 368264c..b5f4ca3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java @@ -115,7 +115,7 @@ public class HiveOpConverterPostProc extends Transform { "In return path join annotate rule, we find " + aliases == null ? null : aliases .size() + " aliases for " + joinOp.toString()); } - final String joinOpAlias = aliases.iterator().next();; + final String joinOpAlias = aliases.iterator().next(); aliasToOpInfo.put(joinOpAlias, joinOp); // 3. Populate other data structures http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index ee4f4ea..479070b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -74,7 +74,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -157,7 +159,7 @@ public class RexNodeConverter { } } - private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException { + private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { ExprNodeDesc tmpExprNode; RexNode tmpRN; @@ -174,6 +176,8 @@ public class RexNodeConverter { ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase; + boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && + func.getChildren().size() != 0; if (isNumeric) { tgtDT = func.getTypeInfo(); @@ -189,6 +193,9 @@ public class RexNodeConverter { if (checkForStatefulFunctions(func.getChildren())) { throw new SemanticException("Stateful expressions cannot be used inside of CASE"); } + } else if (isTransformableTimeStamp) { + // unix_timestamp(args) -> to_unix_timestamp(args) + func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren()); } for (ExprNodeDesc childExpr : func.getChildren()) { @@ -460,14 +467,16 @@ public class RexNodeConverter { } break; case FLOAT: - calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Float) value), calciteDataType); + calciteLiteral = rexBuilder.makeApproxLiteral( + new BigDecimal(Float.toString((Float)value)), calciteDataType); break; case DOUBLE: // TODO: The best solution is to support NaN in expression reduction. if (Double.isNaN((Double) value)) { throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal); } - calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Double) value), calciteDataType); + calciteLiteral = rexBuilder.makeApproxLiteral( + new BigDecimal(Double.toString((Double)value)), calciteDataType); break; case CHAR: if (value instanceof HiveChar) { http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 0b76bff..8b08ae7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -37,8 +37,6 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.util.Util; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -65,6 +63,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -204,6 +204,7 @@ public class SqlFunctionConverter { case BETWEEN: case ROW: case IS_NOT_NULL: + case IS_NULL: case CASE: node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); @@ -322,13 +323,15 @@ public class SqlFunctionConverter { registerFunction(">", SqlStdOperatorTable.GREATER_THAN, hToken(HiveParser.GREATERTHAN, ">")); registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); - registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); + registerFunction("not", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); + registerDuplicateFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); registerDuplicateFunction("!=", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); + registerFunction("isnull", SqlStdOperatorTable.IS_NULL, hToken(HiveParser.TOK_ISNULL, "TOK_ISNULL")); registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); } http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index 2825f77..ba41518 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -28,9 +29,11 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ConversionUtil; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -57,6 +60,7 @@ import com.google.common.collect.ImmutableMap.Builder; import com.google.common.collect.Lists; public class TypeConverter { + private static final Map<String, HiveToken> calciteToHiveTypeNameMap; // TODO: Handling of char[], varchar[], string... @@ -162,7 +166,9 @@ public class TypeConverter { convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE); break; case STRING: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case DATE: convertedType = dtFactory.createSqlType(SqlTypeName.DATE); @@ -187,12 +193,14 @@ public class TypeConverter { .createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale()); break; case VARCHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, - ((BaseCharTypeInfo) type).getLength()); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case CHAR: - convertedType = dtFactory.createSqlType(SqlTypeName.CHAR, - ((BaseCharTypeInfo) type).getLength()); + convertedType = dtFactory.createTypeWithCharsetAndCollation( + dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); break; case UNKNOWN: convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 9911179..f9388e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.ppr.PartExprEvalUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -474,7 +475,20 @@ public final class PcrExprProcFactory { new ExprNodeConstantDesc(fd.getTypeInfo(), result)); } - return new NodeInfoWrapper(WalkState.CONSTANT, null, getOutExpr(fd, nodeOutputs)); + // Try to fold, otherwise return the expression itself + final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs); + final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc); + if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc; + if (Boolean.TRUE.equals(constant.getValue())) { + return new NodeInfoWrapper(WalkState.TRUE, null, constant); + } else if (Boolean.FALSE.equals(constant.getValue())) { + return new NodeInfoWrapper(WalkState.FALSE, null, constant); + } else { + return new NodeInfoWrapper(WalkState.CONSTANT, null, constant); + } + } + return new NodeInfoWrapper(WalkState.CONSTANT, null, desc); } } }; http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index de6a053..49e65e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -140,6 +140,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; @@ -153,21 +154,22 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -1154,6 +1156,7 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveFilterJoinRule.FILTER_ON_JOIN); rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class)); rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY)); + rules.add(HiveProjectFilterPullUpConstantsRule.INSTANCE); rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); @@ -1168,6 +1171,7 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveSortMergeRule.INSTANCE); rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); + rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); @@ -1217,10 +1221,10 @@ public class CalcitePlanner extends SemanticAnalyzer { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Projection Pruning"); - // 8. Merge Project-Project if possible + // 8. Merge, remove and reduce Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, - HiveRelFactories.HIVE_PROJECT_FACTORY)); + basePlan = hepPlan(basePlan, false, mdProvider, null, + HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); @@ -1229,9 +1233,11 @@ public class CalcitePlanner extends SemanticAnalyzer { // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, new HiveFilterProjectTSTransposeRule( - Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, - HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); + basePlan = hepPlan(basePlan, true, mdProvider, null, + new HiveFilterProjectTSTransposeRule( + Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, + HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class), + HiveProjectFilterPullUpConstantsRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Rerun PPD"); http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6937308..8c93018 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -35,8 +35,8 @@ import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Queue; import java.util.Map.Entry; +import java.util.Queue; import java.util.Set; import java.util.TreeSet; import java.util.UUID; @@ -71,7 +71,6 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; @@ -197,7 +196,6 @@ import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -3155,9 +3153,23 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); + + ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted()); + if (filterCond instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond; + if (Boolean.TRUE.equals(c.getValue())) { + // If filter condition is TRUE, we ignore it + return input; + } + if (ExprNodeDescUtils.isNullConstant(c)) { + // If filter condition is NULL, transform to FALSE + filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false); + } + } + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted()), false), - new RowSchema(inputRR.getColumnInfos()), input), inputRR); + new FilterDesc(filterCond, false), new RowSchema( + inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 2eaed56..239cc61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -52,10 +52,12 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; @@ -1070,6 +1072,17 @@ public class TypeCheckProcFactory { desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, children); } + + // If the function is deterministic and the children are constants, + // we try to fold the expression to remove e.g. cast on constant + if (ctx.isFoldExpr() && desc instanceof ExprNodeGenericFuncDesc && + FunctionRegistry.isDeterministic(genericUDF) && + ExprNodeDescUtils.isAllConstants(children)) { + ExprNodeDesc constantExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)desc); + if (constantExpr != null) { + desc = constantExpr; + } + } } // UDFOPPositive is a no-op. // However, we still create it, and then remove it here, to make sure we http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index c6f8907..2b7b0c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -210,15 +211,25 @@ public class ExprNodeDescUtils { */ public static ArrayList<ExprNodeDesc> backtrack(List<ExprNodeDesc> sources, Operator<?> current, Operator<?> terminal) throws SemanticException { + return backtrack(sources, current, terminal, false); + } + + public static ArrayList<ExprNodeDesc> backtrack(List<ExprNodeDesc> sources, + Operator<?> current, Operator<?> terminal, boolean foldExpr) throws SemanticException { ArrayList<ExprNodeDesc> result = new ArrayList<ExprNodeDesc>(); for (ExprNodeDesc expr : sources) { - result.add(backtrack(expr, current, terminal)); + result.add(backtrack(expr, current, terminal, foldExpr)); } return result; } public static ExprNodeDesc backtrack(ExprNodeDesc source, Operator<?> current, Operator<?> terminal) throws SemanticException { + return backtrack(source, current, terminal, false); + } + + public static ExprNodeDesc backtrack(ExprNodeDesc source, Operator<?> current, + Operator<?> terminal, boolean foldExpr) throws SemanticException { Operator<?> parent = getSingleParent(current, terminal); if (parent == null) { return source; @@ -226,7 +237,7 @@ public class ExprNodeDescUtils { if (source instanceof ExprNodeGenericFuncDesc) { // all children expression should be resolved ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone(); - List<ExprNodeDesc> children = backtrack(function.getChildren(), current, terminal); + List<ExprNodeDesc> children = backtrack(function.getChildren(), current, terminal, foldExpr); for (ExprNodeDesc child : children) { if (child == null) { // Could not resolve all of the function children, fail @@ -234,6 +245,13 @@ public class ExprNodeDescUtils { } } function.setChildren(children); + if (foldExpr) { + // fold after replacing, if possible + ExprNodeDesc foldedFunction = ConstantPropagateProcFactory.foldExpr(function); + if (foldedFunction != null) { + return foldedFunction; + } + } return function; } if (source instanceof ExprNodeColumnDesc) { @@ -243,7 +261,7 @@ public class ExprNodeDescUtils { if (source instanceof ExprNodeFieldDesc) { // field expression should be resolved ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone(); - ExprNodeDesc fieldDesc = backtrack(field.getDesc(), current, terminal); + ExprNodeDesc fieldDesc = backtrack(field.getDesc(), current, terminal, foldExpr); if (fieldDesc == null) { return null; } @@ -485,6 +503,25 @@ public class ExprNodeDescUtils { } } + public static boolean isConstant(ExprNodeDesc value) { + if (value instanceof ExprNodeConstantDesc) { + return true; + } + if (value instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) value; + if (!FunctionRegistry.isDeterministic(func.getGenericUDF())) { + return false; + } + for (ExprNodeDesc child : func.getChildren()) { + if (!isConstant(child)) { + return false; + } + } + return true; + } + return false; + } + public static boolean isAllConstants(List<ExprNodeDesc> value) { for (ExprNodeDesc expr : value) { if (!(expr instanceof ExprNodeConstantDesc)) { @@ -641,4 +678,35 @@ public class ExprNodeDescUtils { } return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null; } + + // Find the constant origin of a certain column if it is originated from a constant + // Otherwise, it returns the expression that originated the column + public static ExprNodeDesc findConstantExprOrigin(String dpCol, Operator<? extends OperatorDesc> op) { + ExprNodeDesc expr = op.getColumnExprMap().get(dpCol); + ExprNodeDesc foldedExpr; + // If it is a function, we try to fold it + if (expr instanceof ExprNodeGenericFuncDesc) { + foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)expr); + if (foldedExpr == null) { + foldedExpr = expr; + } + } else { + foldedExpr = expr; + } + // If it is a column reference, we will try to resolve it + if (foldedExpr instanceof ExprNodeColumnDesc) { + Operator<? extends OperatorDesc> originOp = null; + for(Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) { + if (parentOp.getColumnExprMap() != null) { + originOp = parentOp; + break; + } + } + if (originOp != null) { + return findConstantExprOrigin(((ExprNodeColumnDesc)foldedExpr).getColumn(), originOp); + } + } + // Otherwise, we return the expression + return foldedExpr; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/queries/clientpositive/join_view.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/join_view.q b/ql/src/test/queries/clientpositive/join_view.q index 16b6816..69c96be 100644 --- a/ql/src/test/queries/clientpositive/join_view.q +++ b/ql/src/test/queries/clientpositive/join_view.q @@ -3,8 +3,6 @@ drop table invites2; create table invites (foo int, bar string) partitioned by (ds string); create table invites2 (foo int, bar string) partitioned by (ds string); -set hive.mapred.mode=strict; - -- test join views: see HIVE-1989 create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds; @@ -13,4 +11,4 @@ explain select * from v where ds='2011-09-01'; drop view v; drop table invites; -drop table invites2; \ No newline at end of file +drop table invites2; http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/annotate_stats_filter.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index ba0419e..99183fc 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -718,15 +718,15 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((year = 2001) and year is null) (type: boolean) + predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), null (type: int) + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out index c2b9872..52d17b4 100644 --- a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out +++ b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out @@ -137,6 +137,7 @@ POSTHOOK: Input: default@tstsrcpart POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 0 3 +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/archive_multi.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/archive_multi.q.out b/ql/src/test/results/clientpositive/archive_multi.q.out index 0ad29d1..38f3f1a 100644 --- a/ql/src/test/results/clientpositive/archive_multi.q.out +++ b/ql/src/test/results/clientpositive/archive_multi.q.out @@ -141,6 +141,7 @@ POSTHOOK: Input: ac_test@tstsrcpart POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 0 3 +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM ac_test.tstsrcpart a JOIN ac_test.tstsrc b ON a.key=b.key WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out b/ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out index a9ed049..fefb50c 100644 --- a/ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out +++ b/ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT explain authorization select * from src join srcpart @@ -20,7 +20,7 @@ CURRENT_USER: hive_test_user OPERATION: QUERY -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain formatted authorization select * from src join srcpart PREHOOK: type: QUERY POSTHOOK: query: explain formatted authorization select * from src join srcpart http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/auto_join33.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join33.q.out b/ql/src/test/results/clientpositive/auto_join33.q.out index b0b3019..5a8bf8c 100644 --- a/ql/src/test/results/clientpositive/auto_join33.q.out +++ b/ql/src/test/results/clientpositive/auto_join33.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 (UDFToDouble(_col0) + UDFToDouble(1)) (type: double) - 1 (UDFToDouble(_col0) + UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) + 1.0) (type: double) + 1 (UDFToDouble(_col0) + 2.0) (type: double) Stage: Stage-3 Map Reduce @@ -62,8 +62,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) + UDFToDouble(1)) (type: double) - 1 (UDFToDouble(_col0) + UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) + 1.0) (type: double) + 1 (UDFToDouble(_col0) + 2.0) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/auto_join8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out index 324f95d..8daa1c5 100644 --- a/ql/src/test/results/clientpositive/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/auto_join8.q.out @@ -152,7 +152,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [] POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/auto_join_filters.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_filters.q.out b/ql/src/test/results/clientpositive/auto_join_filters.q.out index 2fdf470..2d4a043 100644 --- a/ql/src/test/results/clientpositive/auto_join_filters.q.out +++ b/ql/src/test/results/clientpositive/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2 -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/auto_join_nulls.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/auto_join_nulls.q.out index 4af5535..44917c5 100644 --- a/ql/src/test/results/clientpositive/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -24,7 +24,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index d8eacbe..62c819e 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -138,7 +138,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -631,7 +631,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/bucket_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out b/ql/src/test/results/clientpositive/bucket_groupby.q.out index ae736f9..1afab38 100644 --- a/ql/src/test/results/clientpositive/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -438,33 +438,29 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), 3 (type: int) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), 3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), 3 (type: int) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), 3 (type: int) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1018,34 +1014,30 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), 3 (type: int) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), 3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), 3 (type: int) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), 3 (type: int) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out index 6b40ee8..557e270 100644 --- a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ b/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out @@ -22,7 +22,7 @@ POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 -Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/cast1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out index 48a0c14..d87c04c 100644 --- a/ql/src/test/results/clientpositive/cast1.q.out +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -110,8 +110,8 @@ POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 SIMPLE [] POSTHOOK: Lineage: dest1.c4 SIMPLE [] POSTHOOK: Lineage: dest1.c5 SIMPLE [] -POSTHOOK: Lineage: dest1.c6 EXPRESSION [] -POSTHOOK: Lineage: dest1.c7 EXPRESSION [] +POSTHOOK: Lineage: dest1.c6 SIMPLE [] +POSTHOOK: Lineage: dest1.c7 SIMPLE [] PREHOOK: query: select dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 http://git-wip-us.apache.org/repos/asf/hive/blob/10423f51/ql/src/test/results/clientpositive/cbo_const.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_const.q.out b/ql/src/test/results/clientpositive/cbo_const.q.out index c2a5194..ecf0269 100644 --- a/ql/src/test/results/clientpositive/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/cbo_const.q.out @@ -19,21 +19,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 2 01:02:03.000000000 2 01:02:03.000000000 2 01:02:03.000000000 2 01:02:03.000000000 true +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: drop view t1