Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Wed Sep 24 07:03:35 2014 @@ -28,6 +28,8 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -38,11 +40,14 @@ import org.apache.hadoop.hive.ql.lib.Nod import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MergeJoinWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.hive.ql.plan.UnionWork; /** @@ -126,6 +131,48 @@ public class GenTezWork implements NodeP context.childToWorkMap.get(operator).add(work); } + // this transformation needs to be first because it changes the work item itself. + // which can affect the working of all downstream transformations. + if (context.currentMergeJoinOperator != null) { + // we are currently walking the big table side of the merge join. we need to create or hook up + // merge join work. + MergeJoinWork mergeJoinWork = null; + if (context.opMergeJoinWorkMap.containsKey(operator)) { + // we have found a merge work corresponding to this closing operator. Hook up this work. + mergeJoinWork = context.opMergeJoinWorkMap.get(operator); + } else { + // we need to create the merge join work + mergeJoinWork = new MergeJoinWork(); + mergeJoinWork.setMergeJoinOperator(context.currentMergeJoinOperator); + tezWork.add(mergeJoinWork); + context.opMergeJoinWorkMap.put(operator, mergeJoinWork); + } + // connect the work correctly. + mergeJoinWork.addMergedWork(work, null); + Operator<? extends OperatorDesc> parentOp = + getParentFromStack(context.currentMergeJoinOperator, stack); + int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp); + work.setTag(pos); + tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); + for (BaseWork parentWork : tezWork.getParents(work)) { + TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work); + tezWork.disconnect(parentWork, work); + tezWork.connect(parentWork, mergeJoinWork, edgeProp); + } + + for (BaseWork childWork : tezWork.getChildren(work)) { + TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork); + tezWork.disconnect(work, childWork); + tezWork.connect(mergeJoinWork, childWork, edgeProp); + } + tezWork.remove(work); + context.rootToWorkMap.put(root, mergeJoinWork); + context.childToWorkMap.get(operator).remove(work); + context.childToWorkMap.get(operator).add(mergeJoinWork); + work = mergeJoinWork; + context.currentMergeJoinOperator = null; + } + // remember which mapjoin operator links with which work if (!context.currentMapJoinOperators.isEmpty()) { for (MapJoinOperator mj: context.currentMapJoinOperators) { @@ -169,6 +216,9 @@ public class GenTezWork implements NodeP LOG.debug("connecting "+parentWork.getName()+" with "+work.getName()); TezEdgeProperty edgeProp = parentWorkMap.getValue(); tezWork.connect(parentWork, work, edgeProp); + if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) { + tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES); + } // need to set up output name for reduce sink now that we know the name // of the downstream work @@ -192,14 +242,6 @@ public class GenTezWork implements NodeP context.currentMapJoinOperators.clear(); } - // This is where we cut the tree as described above. We also remember that - // we might have to connect parent work with this work later. - for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) { - context.leafOperatorToFollowingWork.put(parent, work); - LOG.debug("Removing " + parent + " as parent from " + root); - root.removeParent(parent); - } - if (!context.currentUnionOperators.isEmpty()) { // if there are union all operators we need to add the work to the set // of union operators. @@ -229,6 +271,21 @@ public class GenTezWork implements NodeP work = unionWork; } + + // This is where we cut the tree as described above. We also remember that + // we might have to connect parent work with this work later. + boolean removeParents = false; + for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) { + removeParents = true; + context.leafOperatorToFollowingWork.put(parent, work); + LOG.debug("Removing " + parent + " as parent from " + root); + } + if (removeParents) { + for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) { + root.removeParent(parent); + } + } + // We're scanning a tree from roots to leaf (this is not technically // correct, demux and mux operators might form a diamond shape, but // we will only scan one path and ignore the others, because the @@ -248,31 +305,64 @@ public class GenTezWork implements NodeP LOG.debug("Second pass. Leaf operator: "+operator +" has common downstream work:"+followingWork); - // need to add this branch to the key + value info - assert operator instanceof ReduceSinkOperator - && followingWork instanceof ReduceWork; - ReduceSinkOperator rs = (ReduceSinkOperator) operator; - ReduceWork rWork = (ReduceWork) followingWork; - GenMapRedUtils.setKeyAndValueDesc(rWork, rs); - - // remember which parent belongs to which tag - rWork.getTagToInput().put(rs.getConf().getTag(), work.getName()); - - // remember the output name of the reduce sink - rs.getConf().setOutputName(rWork.getName()); - - if (!context.connectedReduceSinks.contains(rs)) { - // add dependency between the two work items - TezEdgeProperty edgeProp; - if (rWork.isAutoReduceParallelism()) { - edgeProp = - new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true, - rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer); + if (operator instanceof DummyStoreOperator) { + // this is the small table side. + assert (followingWork instanceof MergeJoinWork); + MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork; + CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator(); + work.setTag(mergeJoinOp.getTagForOperator(operator)); + mergeJoinWork.addMergedWork(null, work); + tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); + for (BaseWork parentWork : tezWork.getParents(work)) { + TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work); + tezWork.disconnect(parentWork, work); + tezWork.connect(parentWork, mergeJoinWork, edgeProp); + } + work = mergeJoinWork; + } else { + // need to add this branch to the key + value info + assert operator instanceof ReduceSinkOperator + && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork) + || followingWork instanceof UnionWork); + ReduceSinkOperator rs = (ReduceSinkOperator) operator; + ReduceWork rWork = null; + if (followingWork instanceof MergeJoinWork) { + MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork; + rWork = (ReduceWork) mergeJoinWork.getMainWork(); + } else if (followingWork instanceof UnionWork) { + // this can only be possible if there is merge work followed by the union + UnionWork unionWork = (UnionWork) followingWork; + int index = getMergeIndex(tezWork, unionWork, rs); + // guaranteed to be instance of MergeJoinWork if index is valid + MergeJoinWork mergeJoinWork = (MergeJoinWork) tezWork.getChildren(unionWork).get(index); + // disconnect the connection to union work and connect to merge work + followingWork = mergeJoinWork; + rWork = (ReduceWork) mergeJoinWork.getMainWork(); } else { - edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE); + rWork = (ReduceWork) followingWork; + } + GenMapRedUtils.setKeyAndValueDesc(rWork, rs); + + // remember which parent belongs to which tag + int tag = rs.getConf().getTag(); + rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName()); + + // remember the output name of the reduce sink + rs.getConf().setOutputName(rWork.getName()); + + if (!context.connectedReduceSinks.contains(rs)) { + // add dependency between the two work items + TezEdgeProperty edgeProp; + if (rWork.isAutoReduceParallelism()) { + edgeProp = + new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true, + rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer); + } else { + edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE); + } + tezWork.connect(work, followingWork, edgeProp); + context.connectedReduceSinks.add(rs); } - tezWork.connect(work, rWork, edgeProp); - context.connectedReduceSinks.add(rs); } } else { LOG.debug("First pass. Leaf operator: "+operator); @@ -289,4 +379,28 @@ public class GenTezWork implements NodeP return null; } + + private int getMergeIndex(TezWork tezWork, UnionWork unionWork, ReduceSinkOperator rs) { + int index = 0; + for (BaseWork baseWork : tezWork.getChildren(unionWork)) { + if (baseWork instanceof MergeJoinWork) { + MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + int tag = mergeJoinWork.getMergeJoinOperator().getTagForOperator(rs); + if (tag != -1) { + return index; + } else { + index++; + } + } + } + + return -1; + } + + @SuppressWarnings("unchecked") + private Operator<? extends OperatorDesc> getParentFromStack(Node currentMergeJoinOperator, + Stack<Node> stack) { + int pos = stack.indexOf(currentMergeJoinOperator); + return (Operator<? extends OperatorDesc>) stack.get(pos - 1); + } }
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Wed Sep 24 07:03:35 2014 @@ -36,7 +36,9 @@ import org.apache.hadoop.hive.conf.HiveC import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; @@ -62,6 +64,7 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; +import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc; import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; @@ -330,10 +333,17 @@ public class TezCompiler extends TaskCom opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc()); + opRules.put(new RuleRegExp("Recoginze a Sorted Merge Join operator to setup the right edge and" + + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + + "%"), new MergeJoinProc()); + opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork)); + opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + + "%"), genTezWork); + opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(GenTezUtils.getUtils())); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Wed Sep 24 07:03:35 2014 @@ -41,6 +41,7 @@ public abstract class BaseWork extends A // Their function is mainly as root ops to give the mapjoin the correct // schema info. List<HashTableDummyOperator> dummyOps; + int tag; public BaseWork() {} @@ -100,7 +101,7 @@ public abstract class BaseWork extends A // add all children opStack.addAll(opSet); - + while(!opStack.empty()) { Operator<?> op = opStack.pop(); returnSet.add(op); @@ -139,4 +140,12 @@ public abstract class BaseWork extends A } public abstract void configureJobConf(JobConf job); + + public void setTag(int tag) { + this.tag = tag; + } + + public int getTag() { + return tag; + } } Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java Wed Sep 24 07:03:35 2014 @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Operator; + +@Explain(displayName = "Merge Join Operator") +public class CommonMergeJoinDesc extends MapJoinDesc implements Serializable { + private static final long serialVersionUID = 1L; + private int numBuckets; + private boolean isSubQuery; + private int mapJoinConversionPos; + + CommonMergeJoinDesc() { + } + + public CommonMergeJoinDesc(int numBuckets, boolean isSubQuery, int mapJoinConversionPos, + MapJoinDesc joinDesc) { + super(joinDesc); + this.numBuckets = numBuckets; + this.isSubQuery = isSubQuery; + this.mapJoinConversionPos = mapJoinConversionPos; + } + + public boolean getCustomMerge() { + return isSubQuery; + } + + public int getNumBuckets() { + return numBuckets; + } + + public int getBigTablePosition() { + return mapJoinConversionPos; + } +} Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Wed Sep 24 07:03:35 2014 @@ -69,6 +69,7 @@ public class MapJoinDesc extends JoinDes // Hash table memory usage allowed; used in case of non-staged mapjoin. private float hashtableMemoryUsage; + protected boolean genJoinKeys = true; public MapJoinDesc() { bigTableBucketNumMapping = new LinkedHashMap<String, Integer>(); @@ -332,4 +333,16 @@ public class MapJoinDesc extends JoinDes public boolean getCustomBucketMapJoin() { return this.customBucketMapJoin; } + + public boolean isMapSideJoin() { + return true; + } + + public void setGenJoinKeys(boolean genJoinKeys) { + this.genJoinKeys = genJoinKeys; + } + + public boolean getGenJoinKeys() { + return genJoinKeys; + } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Wed Sep 24 07:03:35 2014 @@ -96,6 +96,7 @@ public class MapWork extends BaseWork { private Long minSplitSize; private Long minSplitSizePerNode; private Long minSplitSizePerRack; + private final int tag = 0; //use sampled partitioning private int samplingType; @@ -126,6 +127,8 @@ public class MapWork extends BaseWork { private Map<String, List<ExprNodeDesc>> eventSourcePartKeyExprMap = new LinkedHashMap<String, List<ExprNodeDesc>>(); + private boolean doSplitsGrouping = true; + public MapWork() {} public MapWork(String name) { @@ -567,4 +570,12 @@ public class MapWork extends BaseWork { public void setEventSourcePartKeyExprMap(Map<String, List<ExprNodeDesc>> map) { this.eventSourcePartKeyExprMap = map; } + + public void setDoSplitsGrouping(boolean doSplitsGrouping) { + this.doSplitsGrouping = doSplitsGrouping; + } + + public boolean getDoSplitsGrouping() { + return this.doSplitsGrouping; + } } Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java Wed Sep 24 07:03:35 2014 @@ -0,0 +1,88 @@ +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.mapred.JobConf; + +public class MergeJoinWork extends BaseWork { + + private CommonMergeJoinOperator mergeJoinOp = null; + private final List<BaseWork> mergeWorkList = new ArrayList<BaseWork>(); + private BaseWork bigTableWork; + + public MergeJoinWork() { + super(); + } + + @Override + public String getName() { + return super.getName(); + } + + @Override + public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) { + getMainWork().replaceRoots(replacementMap); + } + + @Override + public Set<Operator<?>> getAllRootOperators() { + return getMainWork().getAllRootOperators(); + } + + @Override + public void configureJobConf(JobConf job) { + } + + public CommonMergeJoinOperator getMergeJoinOperator() { + return this.mergeJoinOp; + } + + public void setMergeJoinOperator(CommonMergeJoinOperator mergeJoinOp) { + this.mergeJoinOp = mergeJoinOp; + } + + public void addMergedWork(BaseWork work, BaseWork connectWork) { + if (work != null) { + if ((bigTableWork != null) && (bigTableWork != work)) { + assert false; + } + this.bigTableWork = work; + setName(work.getName()); + } + + if (connectWork != null) { + this.mergeWorkList.add(connectWork); + } + } + + @Explain(skipHeader=true, displayName = "Join") + public List<BaseWork> getBaseWorkList() { + return mergeWorkList; + } + + public String getBigTableAlias() { + return ((MapWork) bigTableWork).getAliasToWork().keySet().iterator().next(); + } + + @Explain(skipHeader=true, displayName = "Main") + public BaseWork getMainWork() { + return bigTableWork; + } + + @Override + public void setDummyOps(List<HashTableDummyOperator> dummyOps) { + getMainWork().setDummyOps(dummyOps); + } + + @Override + public void addDummyOp(HashTableDummyOperator dummyOp) { + getMainWork().addDummyOp(dummyOp); + } +} Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java Wed Sep 24 07:03:35 2014 @@ -20,17 +20,16 @@ package org.apache.hadoop.hive.ql.plan; import java.util.List; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; - public class OpTraits { - + List<List<String>> bucketColNames; + List<List<String>> sortColNames; int numBuckets; - - public OpTraits(List<List<String>> bucketColNames, int numBuckets) { + + public OpTraits(List<List<String>> bucketColNames, int numBuckets, List<List<String>> sortColNames) { this.bucketColNames = bucketColNames; this.numBuckets = numBuckets; + this.sortColNames = sortColNames; } public List<List<String>> getBucketColNames() { @@ -42,10 +41,18 @@ public class OpTraits { } public void setBucketColNames(List<List<String>> bucketColNames) { - this.bucketColNames = bucketColNames; + this.bucketColNames = bucketColNames; } public void setNumBuckets(int numBuckets) { this.numBuckets = numBuckets; } + + public void setSortColNames(List<List<String>> sortColNames) { + this.sortColNames = sortColNames; + } + + public List<List<String>> getSortCols() { + return sortColNames; + } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java Wed Sep 24 07:03:35 2014 @@ -47,6 +47,22 @@ import org.apache.hadoop.mapred.JobConf; @Explain(displayName = "Tez") public class TezWork extends AbstractOperatorDesc { + public enum VertexType { + AUTO_INITIALIZED_EDGES, // no custom vertex or edge + INITIALIZED_EDGES, // custom vertex and custom edge but single MR Input + MULTI_INPUT_INITIALIZED_EDGES, // custom vertex, custom edge and multi MR Input + MULTI_INPUT_UNINITIALIZED_EDGES // custom vertex, no custom edge, multi MR Input + ; + + public static boolean isCustomInputType(VertexType vertex) { + if ((vertex == null) || (vertex == AUTO_INITIALIZED_EDGES)) { + return false; + } else { + return true; + } + } + } + private static transient final Log LOG = LogFactory.getLog(TezWork.class); private static int counter; @@ -57,6 +73,7 @@ public class TezWork extends AbstractOpe private final Map<BaseWork, List<BaseWork>> invertedWorkGraph = new HashMap<BaseWork, List<BaseWork>>(); private final Map<Pair<BaseWork, BaseWork>, TezEdgeProperty> edgeProperties = new HashMap<Pair<BaseWork, BaseWork>, TezEdgeProperty>(); + private final Map<BaseWork, VertexType> workVertexTypeMap = new HashMap<BaseWork, VertexType>(); public TezWork(String name) { this.name = name + ":" + (++counter); @@ -341,4 +358,40 @@ public class TezWork extends AbstractOpe ImmutablePair workPair = new ImmutablePair(a, b); edgeProperties.put(workPair, edgeProp); } + + public void setVertexType(BaseWork w, VertexType incomingVertexType) { + VertexType vertexType = workVertexTypeMap.get(w); + if (vertexType == null) { + vertexType = VertexType.AUTO_INITIALIZED_EDGES; + } + switch (vertexType) { + case INITIALIZED_EDGES: + if (incomingVertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) { + vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES; + } + break; + + case MULTI_INPUT_INITIALIZED_EDGES: + // nothing to do + break; + + case MULTI_INPUT_UNINITIALIZED_EDGES: + if (incomingVertexType == VertexType.INITIALIZED_EDGES) { + vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES; + } + break; + + case AUTO_INITIALIZED_EDGES: + vertexType = incomingVertexType; + break; + + default: + break; + } + workVertexTypeMap.put(w, vertexType); + } + + public VertexType getVertexType(BaseWork w) { + return workVertexTypeMap.get(w); + } } Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Wed Sep 24 07:03:35 2014 @@ -331,7 +331,8 @@ public class TestOperators extends TestC Configuration hconf = new JobConf(TestOperators.class); HiveConf.setVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME, "hdfs:///testDir/testFile"); - IOContext.get().setInputPath(new Path("hdfs:///testDir/testFile")); + IOContext.get(hconf.get(Utilities.INPUT_NAME)).setInputPath( + new Path("hdfs:///testDir/testFile")); // initialize pathToAliases ArrayList<String> aliases = new ArrayList<String>(); Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java Wed Sep 24 07:03:35 2014 @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.Re import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.mapred.JobConf; @@ -92,8 +93,11 @@ public class TestTezTask { path = mock(Path.class); when(path.getFileSystem(any(Configuration.class))).thenReturn(fs); when(utils.getTezDir(any(Path.class))).thenReturn(path); - when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(LocalResource.class), - any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class))).thenAnswer(new Answer<Vertex>() { + when( + utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), + any(LocalResource.class), any(List.class), any(FileSystem.class), any(Context.class), + anyBoolean(), any(TezWork.class), any(VertexType.class))).thenAnswer( + new Answer<Vertex>() { @Override public Vertex answer(InvocationOnMock invocation) throws Throwable { @@ -103,8 +107,8 @@ public class TestTezTask { } }); - when(utils.createEdge(any(JobConf.class), any(Vertex.class), - any(Vertex.class), any(TezEdgeProperty.class))).thenAnswer(new Answer<Edge>() { + when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), + any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer<Edge>() { @Override public Edge answer(InvocationOnMock invocation) throws Throwable { Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java?rev=1627235&r1=1627234&r2=1627235&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java Wed Sep 24 07:03:35 2014 @@ -115,7 +115,8 @@ public class TestHiveBinarySearchRecordR } private void resetIOContext() { - ioContext = IOContext.get(); + conf.set(Utilities.INPUT_NAME, "TestHiveBinarySearchRecordReader"); + ioContext = IOContext.get(conf.get(Utilities.INPUT_NAME)); ioContext.setUseSorted(false); ioContext.setIsBinarySearching(false); ioContext.setEndBinarySearch(false); @@ -124,6 +125,7 @@ public class TestHiveBinarySearchRecordR } private void init() throws IOException { + conf = new JobConf(); resetIOContext(); rcfReader = mock(RCFileRecordReader.class); when(rcfReader.next((LongWritable)anyObject(), @@ -131,7 +133,6 @@ public class TestHiveBinarySearchRecordR // Since the start is 0, and the length is 100, the first call to sync should be with the value // 50 so return that for getPos() when(rcfReader.getPos()).thenReturn(50L); - conf = new JobConf(); conf.setBoolean("hive.input.format.sorted", true); TableDesc tblDesc = Utilities.defaultTd; Added: hive/trunk/ql/src/test/queries/clientpositive/tez_smb_1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/tez_smb_1.q?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/tez_smb_1.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/tez_smb_1.q Wed Sep 24 07:03:35 2014 @@ -0,0 +1,38 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +set hive.enforce.bucketing=true; +set hive.enforce.sorting = true; +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = true; +set hive.auto.convert.sortmerge.join = true; + +set hive.auto.convert.join.noconditionaltask.size=500; + +explain +select count(*) from tab s1 join tab s3 on s1.key=s3.key; + +select s1.key, s1.value, s3.value from tab s1 join tab s3 on s1.key=s3.key; +select count(*) from tab s2; + Added: hive/trunk/ql/src/test/queries/clientpositive/tez_smb_main.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/tez_smb_main.q?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/tez_smb_main.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/tez_smb_main.q Wed Sep 24 07:03:35 2014 @@ -0,0 +1,84 @@ +explain +select * from src a join src1 b on a.key = b.key; + +select * from src a join src1 b on a.key = b.key; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +set hive.enforce.bucketing=true; +set hive.enforce.sorting = true; +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = true; +set hive.auto.convert.sortmerge.join = true; + +explain +select count(*) +from tab a join tab_part b on a.key = b.key; + +select count(*) +from tab a join tab_part b on a.key = b.key; + +set hive.auto.convert.join.noconditionaltask.size=2000; +explain +select count (*) +from tab a join tab_part b on a.key = b.key; + +select count(*) +from tab a join tab_part b on a.key = b.key; + +set hive.auto.convert.join.noconditionaltask.size=1000; +explain +select count (*) +from tab a join tab_part b on a.key = b.key; + +select count(*) +from tab a join tab_part b on a.key = b.key; + +set hive.auto.convert.join.noconditionaltask.size=500; +explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; +select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; + +explain select count(*) from tab a join tab_part b on a.value = b.value; +select count(*) from tab a join tab_part b on a.value = b.value; + +explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key); + +set hive.auto.convert.join.noconditionaltask.size=10000; +explain select count(*) from tab a join tab_part b on a.value = b.value; +select count(*) from tab a join tab_part b on a.value = b.value; + +explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; +select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; + +explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key); + Added: hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out?rev=1627235&view=auto ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out (added) and hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out Wed Sep 24 07:03:35 2014 differ Added: hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out Wed Sep 24 07:03:35 2014 @@ -0,0 +1,369 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain +select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain +select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 5 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 5 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +40 +PREHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 3 <- Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 0 Reducer 2 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +8 Added: hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out?rev=1627235&view=auto ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out (added) and hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out Wed Sep 24 07:03:35 2014 differ Added: hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out?rev=1627235&view=auto ============================================================================== Files hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out (added) and hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out Wed Sep 24 07:03:35 2014 differ Added: hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_13.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_13.q.out?rev=1627235&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_13.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_13.q.out Wed Sep 24 07:03:35 2014 @@ -0,0 +1,692 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE dest1(k1 int, k2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(k1 int, k2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(k1 string, k2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(k1 string, k2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9
