Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java?rev=1613661&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java Sat Jul 26 15:39:55 2014 @@ -0,0 +1,944 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.collect.ImmutableSet; + +/** + * Factory for generating the different node processors used by ConstantPropagate. + */ +public final class ConstantPropagateProcFactory { + protected static final Log LOG = LogFactory.getLog(ConstantPropagateProcFactory.class.getName()); + protected static Set<Class<?>> propagatableUdfs = new HashSet<Class<?>>(); + + static { + propagatableUdfs.add(GenericUDFOPAnd.class); + }; + + private ConstantPropagateProcFactory() { + // prevent instantiation + } + + /** + * Get ColumnInfo from column expression. + * + * @param rr + * @param desc + * @return + */ + public static ColumnInfo resolveColumn(RowResolver rr, + ExprNodeColumnDesc desc) { + try { + ColumnInfo ci = rr.get(desc.getTabAlias(), desc.getColumn()); + if (ci == null) { + String[] tmp = rr.reverseLookup(desc.getColumn()); + if (tmp == null) { + return null; + } + ci = rr.get(tmp[0], tmp[1]); + ci.setTabAlias(tmp[0]); + ci.setAlias(tmp[1]); + } else { + String[] tmp = rr.reverseLookup(ci.getInternalName()); + if (tmp == null) { + return null; + } + ci.setTabAlias(tmp[0]); + ci.setAlias(tmp[1]); + } + return ci; + } catch (SemanticException e) { + throw new RuntimeException(e); + } + } + + private static final Set<PrimitiveCategory> unSupportedTypes = ImmutableSet + .<PrimitiveCategory>builder() + .add(PrimitiveCategory.DECIMAL) + .add(PrimitiveCategory.VARCHAR) + .add(PrimitiveCategory.CHAR).build(); + + /** + * Cast type from expression type to expected type ti. + * + * @param desc constant expression + * @param ti expected type info + * @return cast constant, or null if the type cast failed. + */ + private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti) { + if (desc instanceof ExprNodeNullDesc) { + return null; + } + if (!(ti instanceof PrimitiveTypeInfo) || !(desc.getTypeInfo() instanceof PrimitiveTypeInfo)) { + return null; + } + + PrimitiveTypeInfo priti = (PrimitiveTypeInfo) ti; + PrimitiveTypeInfo descti = (PrimitiveTypeInfo) desc.getTypeInfo(); + + if (unSupportedTypes.contains(priti.getPrimitiveCategory()) + || unSupportedTypes.contains(descti.getPrimitiveCategory())) { + // FIXME: support template types. It currently has conflict with + // ExprNodeConstantDesc + return null; + } + LOG.debug("Casting " + desc + " to type " + ti); + ExprNodeConstantDesc c = (ExprNodeConstantDesc) desc; + ObjectInspector origOI = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(desc.getTypeInfo()); + ObjectInspector oi = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(ti); + Converter converter = ObjectInspectorConverters.getConverter(origOI, oi); + Object convObj = converter.convert(c.getValue()); + + // Convert integer related types because converters are not sufficient + if (convObj instanceof Integer) { + switch (priti.getPrimitiveCategory()) { + case BYTE: + convObj = new Byte((byte) (((Integer) convObj).intValue())); + break; + case SHORT: + convObj = new Short((short) ((Integer) convObj).intValue()); + break; + case LONG: + convObj = new Long(((Integer) convObj).intValue()); + default: + } + } + return new ExprNodeConstantDesc(ti, convObj); + } + + /** + * Fold input expression desc. + * + * If desc is a UDF and all parameters are constants, evaluate it. If desc is a column expression, + * find it from propagated constants, and if there is, replace it with constant. + * + * @param desc folding expression + * @param constants current propagated constant map + * @param cppCtx + * @param op processing operator + * @param propagate if true, assignment expressions will be added to constants. + * @return fold expression + */ + private static ExprNodeDesc foldExpr(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, + ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, + boolean propagate) { + if (desc instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc; + + // The function must be deterministic, or we can't fold it. + GenericUDF udf = funcDesc.getGenericUDF(); + if (!isDeterministicUdf(udf)) { + LOG.debug("Function " + udf.getClass() + " undeterministic, quit folding."); + return desc; + } + + boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass()); + List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc childExpr : desc.getChildren()) { + newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext)); + } + + // If all child expressions are constants, evaluate UDF immediately + ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren()); + if (constant != null) { + LOG.debug("Folding expression:" + desc + " -> " + constant); + return constant; + } else { + + // Check if the function can be short cut. + ExprNodeDesc shortcut = shortcutFunction(udf, newExprs); + if (shortcut != null) { + LOG.debug("Folding expression:" + desc + " -> " + shortcut); + return shortcut; + } + ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs); + } + + // If in some selected binary operators (=, is null, etc), one of the + // expressions are + // constant, add them to colToConstatns as half-deterministic columns. + if (propagate) { + propagate(udf, newExprs, cppCtx.getRowResolver(op), constants); + } + + return desc; + } else if (desc instanceof ExprNodeColumnDesc) { + if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) { + return desc; + } + Operator<? extends Serializable> parent = op.getParentOperators().get(tag); + ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent); + if (col != null) { + LOG.debug("Folding expression:" + desc + " -> " + col); + return col; + } + } + return desc; + } + + private static boolean isDeterministicUdf(GenericUDF udf) { + UDFType udfType = udf.getClass().getAnnotation(UDFType.class); + if (udf instanceof GenericUDFBridge) { + udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class); + } + if (udfType.deterministic() == false) { + return false; + } + + // If udf is requiring additional jars, we can't determine the result in + // compile time. + String[] files; + String[] jars; + if (udf instanceof GenericUDFBridge) { + GenericUDFBridge bridge = (GenericUDFBridge) udf; + String udfClassName = bridge.getUdfClassName(); + try { + UDF udfInternal = + (UDF) Class.forName(bridge.getUdfClassName(), true, JavaUtils.getClassLoader()) + .newInstance(); + files = udfInternal.getRequiredFiles(); + jars = udf.getRequiredJars(); + } catch (Exception e) { + LOG.error("The UDF implementation class '" + udfClassName + + "' is not present in the class path"); + return false; + } + } else { + files = udf.getRequiredFiles(); + jars = udf.getRequiredJars(); + } + if (files != null || jars != null) { + return false; + } + return true; + } + + /** + * Propagate assignment expression, adding an entry into constant map constants. + * + * @param udf expression UDF, currently only 2 UDFs are supported: '=' and 'is null'. + * @param newExprs child expressions (parameters). + * @param cppCtx + * @param op + * @param constants + */ + private static void propagate(GenericUDF udf, List<ExprNodeDesc> newExprs, RowResolver rr, + Map<ColumnInfo, ExprNodeDesc> constants) { + if (udf instanceof GenericUDFOPEqual) { + ExprNodeDesc lOperand = newExprs.get(0); + ExprNodeDesc rOperand = newExprs.get(1); + ExprNodeColumnDesc c; + ExprNodeConstantDesc v; + if (lOperand instanceof ExprNodeColumnDesc && rOperand instanceof ExprNodeConstantDesc) { + c = (ExprNodeColumnDesc) lOperand; + v = (ExprNodeConstantDesc) rOperand; + } else if (rOperand instanceof ExprNodeColumnDesc && lOperand instanceof ExprNodeConstantDesc) { + c = (ExprNodeColumnDesc) rOperand; + v = (ExprNodeConstantDesc) lOperand; + } else { + return; + } + ColumnInfo ci = resolveColumn(rr, c); + if (ci != null) { + LOG.debug("Filter " + udf + " is identified as a value assignment, propagate it."); + if (!v.getTypeInfo().equals(ci.getType())) { + v = typeCast(v, ci.getType()); + } + if (v != null) { + constants.put(ci, v); + } + } + } else if (udf instanceof GenericUDFOPNull) { + ExprNodeDesc operand = newExprs.get(0); + if (operand instanceof ExprNodeColumnDesc) { + LOG.debug("Filter " + udf + " is identified as a value assignment, propagate it."); + ExprNodeColumnDesc c = (ExprNodeColumnDesc) operand; + ColumnInfo ci = resolveColumn(rr, c); + if (ci != null) { + constants.put(ci, new ExprNodeNullDesc()); + } + } + } + } + + private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs) { + if (udf instanceof GenericUDFOPAnd) { + for (int i = 0; i < 2; i++) { + ExprNodeDesc childExpr = newExprs.get(i); + if (childExpr instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; + if (c.getValue() == Boolean.TRUE) { + + // if true, prune it + return newExprs.get(Math.abs(i - 1)); + } else { + + // if false return false + return childExpr; + } + } + } + } + + if (udf instanceof GenericUDFOPOr) { + for (int i = 0; i < 2; i++) { + ExprNodeDesc childExpr = newExprs.get(i); + if (childExpr instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; + if (c.getValue() == Boolean.FALSE) { + + // if false, prune it + return newExprs.get(Math.abs(i - 1)); + } else { + + // if true return true + return childExpr; + } + } + } + } + + return null; + } + + /** + * Evaluate column, replace the deterministic columns with constants if possible + * + * @param desc + * @param ctx + * @param op + * @param colToConstants + * @return + */ + private static ExprNodeDesc evaluateColumn(ExprNodeColumnDesc desc, + ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> parent) { + try { + ColumnInfo ci = null; + RowResolver rr = cppCtx.getOpToParseCtxMap().get(parent).getRowResolver(); + String[] tmp = rr.reverseLookup(desc.getColumn()); + if (tmp == null) { + LOG.error("Reverse look up of column " + desc + " error!"); + return null; + } + ci = rr.get(tmp[0], tmp[1]); + if (ci != null) { + ExprNodeDesc constant = null; + // Additional work for union operator, see union27.q + if (ci.getAlias() == null) { + for (Entry<ColumnInfo, ExprNodeDesc> e : cppCtx.getOpToConstantExprs().get(parent).entrySet()) { + if (e.getKey().getInternalName().equals(ci.getInternalName())) { + constant = e.getValue(); + break; + } + } + } else { + constant = cppCtx.getOpToConstantExprs().get(parent).get(ci); + } + if (constant != null) { + if (constant instanceof ExprNodeConstantDesc + && !constant.getTypeInfo().equals(desc.getTypeInfo())) { + return typeCast(constant, desc.getTypeInfo()); + } + return constant; + } else { + return null; + } + } + LOG.error("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn()); + throw new RuntimeException("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn()); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + + } + + /** + * Evaluate UDF + * + * @param udf UDF object + * @param exprs + * @param oldExprs + * @return null if expression cannot be evaluated (not all parameters are constants). Or evaluated + * ExprNodeConstantDesc if possible. + * @throws HiveException + */ + private static ExprNodeDesc evaluateFunction(GenericUDF udf, List<ExprNodeDesc> exprs, + List<ExprNodeDesc> oldExprs) { + DeferredJavaObject[] arguments = new DeferredJavaObject[exprs.size()]; + ObjectInspector[] argois = new ObjectInspector[exprs.size()]; + for (int i = 0; i < exprs.size(); i++) { + ExprNodeDesc desc = exprs.get(i); + if (desc instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc constant = (ExprNodeConstantDesc) exprs.get(i); + if (!constant.getTypeInfo().equals(oldExprs.get(i).getTypeInfo())) { + constant = typeCast(constant, oldExprs.get(i).getTypeInfo()); + if (constant == null) { + return null; + } + } + Object value = constant.getValue(); + PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo(); + Object writableValue = + PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti) + .getPrimitiveWritableObject(value); + arguments[i] = new DeferredJavaObject(writableValue); + argois[i] = + ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), + writableValue); + } else if (desc instanceof ExprNodeNullDesc) { + + // FIXME: add null support. + return null; + } else { + return null; + } + } + + try { + ObjectInspector oi = udf.initialize(argois); + Object o = udf.evaluate(arguments); + LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o); + if (o == null) { + return new ExprNodeNullDesc(); + } + Class<?> clz = o.getClass(); + if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + TypeInfo typeInfo = poi.getTypeInfo(); + + // Handling parameterized types (varchar, decimal, etc). + if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) + || typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) + || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { + + // Do not support parameterized types. + return null; + } + o = poi.getPrimitiveJavaObject(o); + } else if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) { + + } else { + LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable."); + return null; + } + return new ExprNodeConstantDesc(o); + } catch (HiveException e) { + LOG.error("Evaluation function " + udf.getClass() + + " failed in Constant Propagatation Optimizer."); + throw new RuntimeException(e); + } + } + + /** + * Change operator row schema, replace column with constant if it is. + * + * @param op + * @param constants + * @throws SemanticException + */ + private static void foldOperator(Operator<? extends Serializable> op, + ConstantPropagateProcCtx cppCtx) throws SemanticException { + RowSchema schema = op.getSchema(); + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getOpToConstantExprs().get(op); + if (schema != null && schema.getSignature() != null) { + for (ColumnInfo col : schema.getSignature()) { + ExprNodeDesc constant = constants.get(col); + if (constant != null) { + LOG.debug("Replacing column " + col + " with constant " + constant + " in " + op); + if (!col.getType().equals(constant.getTypeInfo())) { + constant = typeCast(constant, col.getType()); + } + if (constant != null) { + col.setObjectinspector(constant.getWritableObjectInspector()); + } + } + } + } + + Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap(); + if (colExprMap != null) { + for (Entry<ColumnInfo, ExprNodeDesc> e : constants.entrySet()) { + String internalName = e.getKey().getInternalName(); + if (colExprMap.containsKey(internalName)) { + colExprMap.put(internalName, e.getValue()); + } + } + } + } + + /** + * Node Processor for Constant Propagation on Filter Operators. The processor is to fold + * conditional expressions and extract assignment expressions and propagate them. + */ + public static class ConstantPropagateFilterProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + FilterOperator op = (FilterOperator) nd; + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + + ExprNodeDesc condn = op.getConf().getPredicate(); + LOG.debug("Old filter FIL[" + op.getIdentifier() + "] conditions:" + condn.getExprString()); + ExprNodeDesc newCondn = foldExpr(condn, constants, cppCtx, op, 0, true); + if (newCondn instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc c = (ExprNodeConstantDesc) newCondn; + if (c.getValue() == Boolean.TRUE) { + cppCtx.addOpToDelete(op); + LOG.debug("Filter expression " + condn + " holds true. Will delete it."); + } else if (c.getValue() == Boolean.FALSE) { + LOG.warn("Filter expression " + condn + " holds false!"); + } + } + LOG.debug("New filter FIL[" + op.getIdentifier() + "] conditions:" + newCondn.getExprString()); + + // merge it with the downstream col list + op.getConf().setPredicate(newCondn); + foldOperator(op, cppCtx); + return null; + } + + } + + /** + * Factory method to get the ConstantPropagateFilterProc class. + * + * @return ConstantPropagateFilterProc + */ + public static ConstantPropagateFilterProc getFilterProc() { + return new ConstantPropagateFilterProc(); + } + + /** + * Node Processor for Constant Propagate for Group By Operators. + */ + public static class ConstantPropagateGroupByProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + GroupByOperator op = (GroupByOperator) nd; + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> colToConstants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, colToConstants); + + if (colToConstants.isEmpty()) { + return null; + } + + GroupByDesc conf = op.getConf(); + ArrayList<ExprNodeDesc> keys = conf.getKeys(); + for (int i = 0; i < keys.size(); i++) { + ExprNodeDesc key = keys.get(i); + ExprNodeDesc newkey = foldExpr(key, colToConstants, cppCtx, op, 0, false); + keys.set(i, newkey); + } + foldOperator(op, cppCtx); + return null; + } + } + + /** + * Factory method to get the ConstantPropagateGroupByProc class. + * + * @return ConstantPropagateGroupByProc + */ + public static ConstantPropagateGroupByProc getGroupByProc() { + return new ConstantPropagateGroupByProc(); + } + + /** + * The Default Node Processor for Constant Propagation. + */ + public static class ConstantPropagateDefaultProc implements NodeProcessor { + @SuppressWarnings("unchecked") + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + if (constants.isEmpty()) { + return null; + } + foldOperator(op, cppCtx); + return null; + } + } + + /** + * Factory method to get the ConstantPropagateDefaultProc class. + * + * @return ConstantPropagateDefaultProc + */ + public static ConstantPropagateDefaultProc getDefaultProc() { + return new ConstantPropagateDefaultProc(); + } + + /** + * The Node Processor for Constant Propagation for Select Operators. + */ + public static class ConstantPropagateSelectProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + SelectOperator op = (SelectOperator) nd; + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + foldOperator(op, cppCtx); + List<ExprNodeDesc> colList = op.getConf().getColList(); + if (colList != null) { + for (int i = 0; i < colList.size(); i++) { + ExprNodeDesc newCol = foldExpr(colList.get(i), constants, cppCtx, op, 0, false); + colList.set(i, newCol); + } + LOG.debug("New column list:(" + StringUtils.join(colList, " ") + ")"); + } + return null; + } + } + + /** + * The Factory method to get the ConstantPropagateSelectProc class. + * + * @return ConstantPropagateSelectProc + */ + public static ConstantPropagateSelectProc getSelectProc() { + return new ConstantPropagateSelectProc(); + } + + /** + * The Node Processor for constant propagation for FileSink Operators. In addition to constant + * propagation, this processor also prunes dynamic partitions to static partitions if possible. + */ + public static class ConstantPropagateFileSinkProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + FileSinkOperator op = (FileSinkOperator) nd; + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + if (constants.isEmpty()) { + return null; + } + FileSinkDesc fsdesc = op.getConf(); + DynamicPartitionCtx dpCtx = fsdesc.getDynPartCtx(); + if (dpCtx != null) { + + // If all dynamic partitions are propagated as constant, remove DP. + Set<String> inputs = dpCtx.getInputToDPCols().keySet(); + + // Assume only 1 parent for FS operator + Operator<? extends Serializable> parent = op.getParentOperators().get(0); + Map<ColumnInfo, ExprNodeDesc> parentConstants = cppCtx.getPropagatedConstants(parent); + RowResolver rr = cppCtx.getOpToParseCtxMap().get(parent).getRowResolver(); + boolean allConstant = true; + for (String input : inputs) { + String tmp[] = rr.reverseLookup(input); + ColumnInfo ci = rr.get(tmp[0], tmp[1]); + if (parentConstants.get(ci) == null) { + allConstant = false; + break; + } + } + if (allConstant) { + pruneDP(fsdesc); + } + } + foldOperator(op, cppCtx); + return null; + } + + private void pruneDP(FileSinkDesc fsdesc) { + // FIXME: Support pruning dynamic partitioning. + LOG.info("DP can be rewritten to SP!"); + } + } + + public static NodeProcessor getFileSinkProc() { + return new ConstantPropagateFileSinkProc(); + } + + /** + * The Node Processor for Constant Propagation for Operators which is designed to stop propagate. + * Currently these kinds of Operators include UnionOperator and ScriptOperator. + */ + public static class ConstantPropagateStopProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + Operator<?> op = (Operator<?>) nd; + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + cppCtx.getOpToConstantExprs().put(op, new HashMap<ColumnInfo, ExprNodeDesc>()); + LOG.debug("Stop propagate constants on op " + op.getOperatorId()); + return null; + } + } + + public static NodeProcessor getStopProc() { + return new ConstantPropagateStopProc(); + } + + /** + * The Node Processor for Constant Propagation for ReduceSink Operators. If the RS Operator is for + * a join, then only those constants from inner join tables, or from the 'inner side' of a outer + * join (left table for left outer join and vice versa) can be propagated. + */ + public static class ConstantPropagateReduceSinkProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + ReduceSinkDesc rsDesc = op.getConf(); + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + + cppCtx.getOpToConstantExprs().put(op, constants); + if (constants.isEmpty()) { + return null; + } + + if (op.getChildOperators().size() == 1 + && op.getChildOperators().get(0) instanceof JoinOperator) { + JoinOperator joinOp = (JoinOperator) op.getChildOperators().get(0); + if (skipFolding(joinOp.getConf(), rsDesc.getTag())) { + LOG.debug("Skip folding in outer join " + op); + cppCtx.getOpToConstantExprs().put(op, new HashMap<ColumnInfo, ExprNodeDesc>()); + return null; + } + } + + if (rsDesc.getDistinctColumnIndices() != null + && !rsDesc.getDistinctColumnIndices().isEmpty()) { + LOG.debug("Skip folding in distinct subqueries " + op); + cppCtx.getOpToConstantExprs().put(op, new HashMap<ColumnInfo, ExprNodeDesc>()); + return null; + } + + // key columns + ArrayList<ExprNodeDesc> newKeyEpxrs = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc desc : rsDesc.getKeyCols()) { + newKeyEpxrs.add(foldExpr(desc, constants, cppCtx, op, 0, false)); + } + rsDesc.setKeyCols(newKeyEpxrs); + + // partition columns + ArrayList<ExprNodeDesc> newPartExprs = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc desc : rsDesc.getPartitionCols()) { + ExprNodeDesc expr = foldExpr(desc, constants, cppCtx, op, 0, false); + if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeNullDesc) { + continue; + } + newPartExprs.add(expr); + } + rsDesc.setPartitionCols(newPartExprs); + + // value columns + ArrayList<ExprNodeDesc> newValExprs = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc desc : rsDesc.getValueCols()) { + newValExprs.add(foldExpr(desc, constants, cppCtx, op, 0, false)); + } + rsDesc.setValueCols(newValExprs); + foldOperator(op, cppCtx); + return null; + } + + private boolean skipFolding(JoinDesc joinDesc, int tag) { + JoinCondDesc[] conds = joinDesc.getConds(); + int i; + for (i = conds.length - 1; i >= 0; i--) { + if (conds[i].getType() == JoinDesc.INNER_JOIN) { + if (tag == i + 1) + return false; + } else if (conds[i].getType() == JoinDesc.FULL_OUTER_JOIN) { + return true; + } else if (conds[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) { + if (tag == i + 1) + return false; + return true; + } else if (conds[i].getType() == JoinDesc.LEFT_OUTER_JOIN) { + if (tag == i + 1) + return true; + } + } + if (tag == 0) { + return false; + } + return true; + } + + } + + public static NodeProcessor getReduceSinkProc() { + return new ConstantPropagateReduceSinkProc(); + } + + /** + * The Node Processor for Constant Propagation for Join Operators. + */ + public static class ConstantPropagateJoinProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + JoinOperator op = (JoinOperator) nd; + JoinDesc conf = op.getConf(); + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + if (constants.isEmpty()) { + return null; + } + + // Note: the following code (removing folded constants in exprs) is deeply coupled with + // ColumnPruner optimizer. + // Assuming ColumnPrunner will remove constant columns so we don't deal with output columns. + // Except one case that the join operator is followed by a redistribution (RS operator). + if (op.getChildOperators().size() == 1 + && op.getChildOperators().get(0) instanceof ReduceSinkOperator) { + LOG.debug("Skip JOIN-RS structure."); + return null; + } + LOG.info("Old exprs " + conf.getExprs()); + Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = conf.getExprs().entrySet().iterator(); + while (itr.hasNext()) { + Entry<Byte, List<ExprNodeDesc>> e = itr.next(); + int tag = e.getKey(); + List<ExprNodeDesc> exprs = e.getValue(); + if (exprs == null) { + continue; + } + List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc expr : exprs) { + ExprNodeDesc newExpr = foldExpr(expr, constants, cppCtx, op, tag, false); + if (newExpr instanceof ExprNodeConstantDesc || newExpr instanceof ExprNodeNullDesc) { + LOG.info("expr " + newExpr + " fold from " + expr + " is removed."); + continue; + } + newExprs.add(newExpr); + } + e.setValue(newExprs); + } + LOG.info("New exprs " + conf.getExprs()); + + for (List<ExprNodeDesc> v : conf.getFilters().values()) { + for (int i = 0; i < v.size(); i++) { + ExprNodeDesc expr = foldExpr(v.get(i), constants, cppCtx, op, 0, false); + v.set(i, expr); + } + } + foldOperator(op, cppCtx); + return null; + } + + } + + public static NodeProcessor getJoinProc() { + return new ConstantPropagateJoinProc(); + } + + /** + * The Node Processor for Constant Propagation for Table Scan Operators. + */ + public static class ConstantPropagateTableScanProc implements NodeProcessor { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + TableScanOperator op = (TableScanOperator) nd; + TableScanDesc conf = op.getConf(); + ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; + Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); + cppCtx.getOpToConstantExprs().put(op, constants); + ExprNodeGenericFuncDesc pred = conf.getFilterExpr(); + if (pred == null) { + return null; + } + + List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>(); + for (ExprNodeDesc expr : pred.getChildren()) { + ExprNodeDesc constant = foldExpr(expr, constants, cppCtx, op, 0, false); + newChildren.add(constant); + } + pred.setChildren(newChildren); + return null; + } + } + + public static NodeProcessor getTableScanProc() { + return new ConstantPropagateTableScanProc(); + } +}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Sat Jul 26 15:39:55 2014 @@ -67,6 +67,9 @@ public class Optimizer { HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) { transformations.add(new GroupByOptimizer()); } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + transformations.add(new ConstantPropagate()); + } transformations.add(new ColumnPruner()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) { transformations.add(new SkewJoinOptimizer()); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java Sat Jul 26 15:39:55 2014 @@ -18,13 +18,17 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.util.HashMap; +import java.util.Map; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; public class SortedMergeJoinProc extends AbstractSMBJoinProc implements NodeProcessor { @@ -42,6 +46,11 @@ public class SortedMergeJoinProc extends JoinOperator joinOp = (JoinOperator) nd; SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx; + Map<MapJoinOperator, QBJoinTree> mapJoinMap = pGraphContext.getMapJoinContext(); + if (mapJoinMap == null) { + mapJoinMap = new HashMap<MapJoinOperator, QBJoinTree>(); + pGraphContext.setMapJoinContext(mapJoinMap); + } boolean convert = canConvertJoinToSMBJoin( Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java Sat Jul 26 15:39:55 2014 @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.serde2.typ */ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable { private static final long serialVersionUID = 1L; + final protected transient static char[] hexArray = "0123456789ABCDEF".toCharArray(); private Object value; public ExprNodeConstantDesc() { @@ -83,6 +84,15 @@ public class ExprNodeConstantDesc extend if (typeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) { return "'" + value.toString() + "'"; + } else if (typeInfo.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) { + byte[] bytes = (byte[]) value; + char[] hexChars = new char[bytes.length * 2]; + for (int j = 0; j < bytes.length; j++) { + int v = bytes[j] & 0xFF; + hexChars[j * 2] = hexArray[v >>> 4]; + hexChars[j * 2 + 1] = hexArray[v & 0x0F]; + } + return new String(hexChars); } else { return value.toString(); } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java Sat Jul 26 15:39:55 2014 @@ -21,11 +21,14 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -77,6 +80,7 @@ import org.apache.hadoop.hive.ql.parse.S */ public class PredicatePushDown implements Transform { + private static final Log LOG = LogFactory.getLog(PredicatePushDown.class); private ParseContext pGraphContext; @Override @@ -126,6 +130,7 @@ public class PredicatePushDown implement topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); + LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); return pGraphContext; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Sat Jul 26 15:39:55 2014 @@ -255,6 +255,10 @@ public class StatsUtils { int avgRowSize = 0; for (String neededCol : neededColumns) { ColumnInfo ci = getColumnInfoForColumn(neededCol, schema); + if (ci == null) { + // No need to collect statistics of index columns + continue; + } ObjectInspector oi = ci.getObjectInspector(); String colType = ci.getTypeName(); if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDFCurrentDB.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDFCurrentDB.java?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDFCurrentDB.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDFCurrentDB.java Sat Jul 26 15:39:55 2014 @@ -54,7 +54,7 @@ public class UDFCurrentDB extends Generi @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - throw new IllegalStateException("never"); + return SessionState.get().getCurrentDatabase(); } @Override Added: hive/trunk/ql/src/test/queries/clientpositive/constprog1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/constprog1.q?rev=1613661&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/constprog1.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/constprog1.q Sat Jul 26 15:39:55 2014 @@ -0,0 +1,9 @@ +set hive.fetch.task.conversion=more; +set hive.optimize.constant.propagation=true; + +EXPLAIN +SELECT IF(INSTR(CONCAT('foo', 'bar'), 'foob') > 0, "F1", "B1") + FROM src tablesample (1 rows); + +SELECT IF(INSTR(CONCAT('foo', 'bar'), 'foob') > 0, "F1", "B1") + FROM src tablesample (1 rows); Added: hive/trunk/ql/src/test/queries/clientpositive/constprog2.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/constprog2.q?rev=1613661&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/constprog2.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/constprog2.q Sat Jul 26 15:39:55 2014 @@ -0,0 +1,10 @@ +set hive.fetch.task.conversion=more; +set hive.optimize.constant.propagation=true; + +EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 86; + +SELECT src1.key, src1.key + 1, src2.value + FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 86; + Added: hive/trunk/ql/src/test/queries/clientpositive/constprog_dp.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/constprog_dp.q?rev=1613661&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/constprog_dp.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/constprog_dp.q Sat Jul 26 15:39:55 2014 @@ -0,0 +1,11 @@ +set hive.optimize.constant.propagation=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +create table dest(key string, value string) partitioned by (ds string); + +EXPLAIN +from srcpart +insert overwrite table dest partition (ds) select key, value, ds where ds='2008-04-08'; + +from srcpart +insert overwrite table dest partition (ds) select key, value, ds where ds='2008-04-08'; Added: hive/trunk/ql/src/test/queries/clientpositive/constprog_type.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/constprog_type.q?rev=1613661&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/constprog_type.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/constprog_type.q Sat Jul 26 15:39:55 2014 @@ -0,0 +1,14 @@ +set hive.optimize.constant.propagation=true; + +CREATE TABLE dest1(d date, t timestamp); + +EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT cast('2013-11-17' as date), cast(cast('1.3041352164485E9' as double) as timestamp) + FROM src tablesample (1 rows); + +INSERT OVERWRITE TABLE dest1 +SELECT cast('2013-11-17' as date), cast(cast('1.3041352164485E9' as double) as timestamp) + FROM src tablesample (1 rows); + +SELECT * FROM dest1; Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_18.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_18.q?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_18.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_18.q Sat Jul 26 15:39:55 2014 @@ -36,8 +36,10 @@ select count(*) from test_table2 where d select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'; select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'; +set hive.optimize.constant.propagation=false; -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-only operation, one of the buckets should be empty + EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238; @@ -45,6 +47,7 @@ SELECT a.key, a.value FROM test_table1 a INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238; +set hive.optimize.constant.propagation=true; select count(*) from test_table2 where ds = '2'; select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0; select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1; Modified: hive/trunk/ql/src/test/queries/clientpositive/subquery_views.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/subquery_views.q?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/subquery_views.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/subquery_views.q Sat Jul 26 15:39:55 2014 @@ -26,6 +26,11 @@ where b.key not in ) ; +explain +select * +from cv2 where cv2.key in (select key from cv2 c where c.key < '11'); +; + select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11'); ; Modified: hive/trunk/ql/src/test/queries/clientpositive/union27.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union27.q?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/union27.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/union27.q Sat Jul 26 15:39:55 2014 @@ -1,4 +1,5 @@ create table jackson_sev_same as select * from src; create table dim_pho as select * from src; create table jackson_sev_add as select * from src; +explain select b.* from jackson_sev_same a join (select * from dim_pho union all select * from jackson_sev_add)b on a.key=b.key and b.key=97; select b.* from jackson_sev_same a join (select * from dim_pho union all select * from jackson_sev_add)b on a.key=b.key and b.key=97; Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out Sat Jul 26 15:39:55 2014 differ Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Sat Jul 26 15:39:55 2014 differ Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out Sat Jul 26 15:39:55 2014 differ Modified: hive/trunk/ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out Sat Jul 26 15:39:55 2014 @@ -36,11 +36,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 1002 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: - 0 {key} + 0 1 {value} keys: 0 key (type: string) @@ -53,8 +53,8 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 2004 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -65,14 +65,14 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1102 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Modified: hive/trunk/ql/src/test/results/clientpositive/cast1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/cast1.q.out?rev=1613661&r1=1613660&r2=1613661&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/cast1.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/cast1.q.out Sat Jul 26 15:39:55 2014 @@ -27,17 +27,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2906 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 86) (type: boolean) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1453 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (3 + 2) (type: int), (3.0 + 2) (type: double), (3 + 2.0) (type: double), (3.0 + 2.0) (type: double), ((3 + UDFToInteger(2.0)) + UDFToInteger(UDFToShort(0))) (type: int), UDFToBoolean(1) (type: boolean), UDFToInteger(true) (type: int) + expressions: 5 (type: int), 5.0 (type: double), 5.0 (type: double), 5.0 (type: double), 5 (type: int), true (type: boolean), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1453 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1453 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
