Author: ehans Date: Fri Jan 17 02:08:46 2014 New Revision: 1558987 URL: http://svn.apache.org/r1558987 Log: HIVE-5595: Implement vectorized SMB JOIN (Remus Rusanu via Eric Hanson)
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Fri Jan 17 02:08:46 2014 @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -121,6 +122,7 @@ public final class OperatorFactory { vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class)); vectorOpvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, VectorGroupByOperator.class)); vectorOpvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class, VectorMapJoinOperator.class)); + vectorOpvec.add(new OpTuple<SMBJoinDesc>(SMBJoinDesc.class, VectorSMBMapJoinOperator.class)); vectorOpvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class, VectorReduceSinkOperator.class)); vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class)); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Fri Jan 17 02:08:46 2014 @@ -226,6 +226,11 @@ public class SMBMapJoinOperator extends public void cleanUpInputFileChangedOp() throws HiveException { inputFileChanged = true; } + + protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException { + return JoinUtil.computeKeys(row, joinKeys[alias], + joinKeysObjectInspectors[alias]); + } @Override public void processOp(Object row, int tag) throws HiveException { @@ -260,8 +265,8 @@ public class SMBMapJoinOperator extends byte alias = (byte) tag; // compute keys and values as StandardObjects - ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys[alias], - joinKeysObjectInspectors[alias]); + List<Object> key = smbJoinComputeKeys(row, alias); + List<Object> value = getFilteredValue(alias, row); @@ -495,7 +500,7 @@ public class SMBMapJoinOperator extends return smallestOne == null ? null : result; } - private boolean processKey(byte alias, ArrayList<Object> key) + private boolean processKey(byte alias, List<Object> key) throws HiveException { List<Object> keyWritable = keyWritables[alias]; if (keyWritable == null) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Jan 17 02:08:46 2014 @@ -3192,10 +3192,24 @@ public final class Utilities { } } - public static void clearWorkMap() { + /** + * Returns true if a plan is both configured for vectorized execution + * and vectorization is allowed. The plan may be configured for vectorization + * but vectorization dissalowed eg. for FetchOperator execution. + */ + public static boolean isVectorMode(Configuration conf) { + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && + Utilities.getPlanPath(conf) != null && Utilities + .getMapRedWork(conf).getMapWork().getVectorMode()) { + return true; + } + return false; + } + + public static void clearWorkMap() { gWorkMap.clear(); } - + /** * Create a temp dir in specified baseDir * This can go away once hive moves to support only JDK 7 Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1558987&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Fri Jan 17 02:08:46 2014 @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +/** + * VectorSMBJoinOperator. + * Implements the vectorized SMB join operator. The implementation relies on the row-mode SMB join operator. + * It accepts a vectorized batch input from the big table and iterates over the batch, calling the parent row-mode + * implementation for each row in the batch. + */ +public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements VectorizationContextRegion { + + private static final Log LOG = LogFactory.getLog( + VectorSMBMapJoinOperator.class.getName()); + + private static final long serialVersionUID = 1L; + + private int tagLen; + + private transient VectorizedRowBatch outputBatch; + private transient VectorizationContext vOutContext = null; + private transient VectorizedRowBatchCtx vrbCtx = null; + + private String fileKey; + + private VectorExpression[] bigTableValueExpressions; + + private VectorExpression[] bigTableFilterExpressions; + + private VectorExpression[] keyExpressions; + + private VectorExpressionWriter[] keyOutputWriters; + + private transient VectorHashKeyWrapperBatch keyWrapperBatch; + + private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners; + + private transient int batchIndex = -1; + + private transient VectorHashKeyWrapper[] keyValues; + + private transient SMBJoinKeyEvaluator keyEvaluator; + + private transient VectorExpressionWriter[] valueWriters; + + private interface SMBJoinKeyEvaluator { + List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException; +} + + public VectorSMBMapJoinOperator() { + super(); + } + + public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf) + throws HiveException { + this(); + SMBJoinDesc desc = (SMBJoinDesc) conf; + this.conf = desc; + + order = desc.getTagOrder(); + numAliases = desc.getExprs().size(); + posBigTable = (byte) desc.getPosBigTable(); + filterMaps = desc.getFilterMap(); + tagLen = desc.getTagLength(); + noOuterJoin = desc.isNoOuterJoin(); + + // Must obtain vectorized equivalents for filter and value expressions + + Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + + List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); + keyExpressions = vContext.getVectorExpressions(keyDesc); + keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); + + Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs(); + bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); + + // Vectorized join operators need to create a new vectorization region for child operators. + + List<String> outColNames = desc.getOutputColumnNames(); + + Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size()); + + int outColIndex = 0; + for(String outCol: outColNames) { + mapOutCols.put(outCol, outColIndex++); + } + + vOutContext = new VectorizationContext(mapOutCols, outColIndex); + vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); + this.fileKey = vOutContext.getFileKey(); + } + + @Override + protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException { + if (alias == this.posBigTable) { + VectorizedRowBatch inBatch = (VectorizedRowBatch) row; + return keyEvaluator.evaluate(keyValues[batchIndex]); + } else { + return super.smbJoinComputeKeys(row, alias); + } + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector); + + outputBatch = vrbCtx.createVectorizedRowBatch(); + + keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); + + outputVectorAssigners = new HashMap<ObjectInspector, VectorColumnAssign[]>(); + + // This key evaluator translates from the vectorized VectorHashKeyWrapper format + // into the row-mode MapJoinKey + keyEvaluator = new SMBJoinKeyEvaluator() { + private List<Object> key; + + public SMBJoinKeyEvaluator init() { + key = new ArrayList<Object>(); + for(int i = 0; i < keyExpressions.length; ++i) { + key.add(null); + } + return this; + } + + @Override + public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException { + for(int i = 0; i < keyExpressions.length; ++i) { + key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i])); + } + return key; + }; + }.init(); + + Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs(); + List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable); + + // We're hijacking the big table evaluators and replacing them with our own custom ones + // which are going to return values from the input batch vector expressions + List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size()); + + VectorExpressionWriterFactory.processVectorExpressions( + bigTableExpressions, + new VectorExpressionWriterFactory.ListOIDClosure() { + + @Override + public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) { + valueWriters = writers; + joinValuesObjectInspectors[posBigTable] = oids; + } + }); + + for(int i=0; i<bigTableExpressions.size(); ++i) { + ExprNodeDesc desc = bigTableExpressions.get(i); + VectorExpression vectorExpr = bigTableValueExpressions[i]; + + // This is a vectorized aware evaluator + ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc) { + int columnIndex;; + int writerIndex; + + public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) { + this.columnIndex = columnIndex; + this.writerIndex = writerIndex; + return this; + } + + @Override + public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { + throw new HiveException("should never reach here"); + } + + @Override + protected Object _evaluate(Object row, int version) throws HiveException { + VectorizedRowBatch inBatch = (VectorizedRowBatch) row; + int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex; + return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex); + } + }.initVectorExpr(vectorExpr.getOutputColumn(), i); + vectorNodeEvaluators.add(eval); + } + // Now replace the old evaluators with our own + joinValues[posBigTable] = vectorNodeEvaluators; + + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + byte alias = (byte) tag; + + if (alias != this.posBigTable) { + super.processOp(row, tag); + } else { + + VectorizedRowBatch inBatch = (VectorizedRowBatch) row; + + if (null != bigTableFilterExpressions) { + for(VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(inBatch); + } + } + + if (null != bigTableValueExpressions) { + for(VectorExpression ve : bigTableValueExpressions) { + ve.evaluate(inBatch); + } + } + + keyWrapperBatch.evaluateBatch(inBatch); + keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); + + // This implementation of vectorized JOIN is delegating all the work + // to the row-mode implementation by hijacking the big table node evaluators + // and calling the row-mode join processOp for each row in the input batch. + // Since the JOIN operator is not fully vectorized anyway at the moment + // (due to the use of row-mode small-tables) this is a reasonable trade-off. + // + for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex ) { + super.processOp(row, tag); + } + + // Set these two to invalid values so any attempt to use them + // outside the inner loop results in NPE/OutOfBounds errors + batchIndex = -1; + keyValues = null; + } + } + + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted && 0 < outputBatch.size) { + flushOutput(); + } + } + + @Override + protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { + Object[] values = (Object[]) row; + VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); + if (null == vcas) { + Map<String, Map<String, Integer>> allColumnMaps = Utilities. + getMapRedWork(hconf).getMapWork().getScratchColumnMap(); + Map<String, Integer> columnMap = allColumnMaps.get(fileKey); + vcas = VectorColumnAssignFactory.buildAssigners( + outputBatch, outputOI, columnMap, conf.getOutputColumnNames()); + outputVectorAssigners.put(outputOI, vcas); + } + for (int i = 0; i < values.length; ++i) { + vcas[i].assignObjectValue(values[i], outputBatch.size); + } + ++outputBatch.size; + if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + flushOutput(); + } + } + + private void flushOutput() throws HiveException { + forward(outputBatch, null); + outputBatch.reset(); + } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vOutContext; + } +} Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Jan 17 02:08:46 2014 @@ -299,11 +299,7 @@ public class OrcInputFormat implements } private boolean isVectorMode(Configuration conf) { - if (Utilities.getPlanPath(conf) != null && Utilities - .getMapRedWork(conf).getMapWork().getVectorMode()) { - return true; - } - return false; + return Utilities.isVectorMode(conf); } /** Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 17 02:08:46 2014 @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.Ma import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -78,6 +79,7 @@ import org.apache.hadoop.hive.ql.plan.Ma import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -555,6 +557,8 @@ public class Vectorizer implements Physi case MAPJOIN: if (op instanceof MapJoinOperator) { ret = validateMapJoinOperator((MapJoinOperator) op); + } else if (op instanceof SMBMapJoinOperator) { + ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); } break; case GROUPBY: @@ -583,6 +587,12 @@ public class Vectorizer implements Physi return ret; } + private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { + SMBJoinDesc desc = op.getConf(); + // Validation is the same as for map join, since the 'small' tables are not vectorized + return validateMapJoinDesc(desc); + } + private boolean validateTableScanOperator(TableScanOperator op) { TableScanDesc desc = op.getConf(); return !desc.isGatherStats(); @@ -590,6 +600,10 @@ public class Vectorizer implements Physi private boolean validateMapJoinOperator(MapJoinOperator op) { MapJoinDesc desc = op.getConf(); + return validateMapJoinDesc(desc); + } + + private boolean validateMapJoinDesc(MapJoinDesc desc) { byte posBigTable = (byte) desc.getPosBigTable(); List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable); List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable); Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Fri Jan 17 02:08:46 2014 @@ -146,39 +146,63 @@ public class TestVectorizer { Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER)); Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION)); } + + /** + * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests. + */ + private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> mop, MapJoinDesc mjdesc) { + mjdesc.setPosBigTable(0); + List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>(); + expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false)); + Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>(); + keyMap.put((byte)0, expr); + mjdesc.setKeys(keyMap); + mjdesc.setExprs(keyMap); + //Set filter expression + GenericUDFOPEqual udf = new GenericUDFOPEqual(); + ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc(); + equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + equalExprDesc.setGenericUDF(udf); + List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); + children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false)); + children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false)); + equalExprDesc.setChildren(children1); + List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>(); + filterExpr.add(equalExprDesc); + Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>(); + filterMap.put((byte) 0, expr); + mjdesc.setFilters(filterMap); + } + + /** + * testValidateMapJoinOperator validates that the Map join operator can be vectorized. + */ @Test public void testValidateMapJoinOperator() { MapJoinOperator mop = new MapJoinOperator(); MapJoinDesc mjdesc = new MapJoinDesc(); - mjdesc.setPosBigTable(0); - List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>(); - expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false)); - Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>(); - keyMap.put((byte)0, expr); - mjdesc.setKeys(keyMap); - mjdesc.setExprs(keyMap); - - //Set filter expression - GenericUDFOPEqual udf = new GenericUDFOPEqual(); - ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc(); - equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); - equalExprDesc.setGenericUDF(udf); - List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2); - children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false)); - children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false)); - equalExprDesc.setChildren(children1); - List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>(); - filterExpr.add(equalExprDesc); - Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>(); - filterMap.put((byte) 0, expr); - mjdesc.setFilters(filterMap); + + prepareAbstractMapJoin(mop, mjdesc); mop.setConf(mjdesc); - + Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateOperator(mop)); - SMBMapJoinOperator smbmop = new SMBMapJoinOperator(mop); - Assert.assertFalse(vectorizer.validateOperator(smbmop)); + } + + + /** + * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized. + */ + @Test + public void testValidateSMBJoinOperator() { + SMBMapJoinOperator mop = new SMBMapJoinOperator(); + SMBJoinDesc mjdesc = new SMBJoinDesc(); + + prepareAbstractMapJoin(mop, mjdesc); + mop.setConf(mjdesc); + + Vectorizer vectorizer = new Vectorizer(); + Assert.assertTrue(vectorizer.validateOperator(mop)); } } Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q?rev=1558987&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q Fri Jan 17 02:08:46 2014 @@ -0,0 +1,46 @@ +create table vsmb_bucket_1(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC; +create table vsmb_bucket_2(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC; + +create table vsmb_bucket_RC(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS RCFILE; + +create table vsmb_bucket_TXT(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS TEXTFILE; + +insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2; +insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2; +insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2; +insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2; + +set hive.vectorized.execution.enabled=true; +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.auto.convert.sortmerge.join.noconditionaltask = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +explain +select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; +select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; + +explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key; + +-- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box +-- explain +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; + +explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key; Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out?rev=1558987&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out Fri Jan 17 02:08:46 2014 @@ -0,0 +1,370 @@ +PREHOOK: query: create table vsmb_bucket_1(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table vsmb_bucket_1(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@vsmb_bucket_1 +PREHOOK: query: create table vsmb_bucket_2(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table vsmb_bucket_2(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@vsmb_bucket_2 +PREHOOK: query: create table vsmb_bucket_RC(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table vsmb_bucket_RC(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@vsmb_bucket_RC +PREHOOK: query: create table vsmb_bucket_TXT(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table vsmb_bucket_TXT(key int, value string) + CLUSTERED BY (key) + SORTED BY (key) INTO 1 BUCKETS + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@vsmb_bucket_TXT +PREHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@vsmb_bucket_1 +POSTHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@vsmb_bucket_1 +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +PREHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@vsmb_bucket_2 +POSTHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@vsmb_bucket_2 +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +PREHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@vsmb_bucket_rc +POSTHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@vsmb_bucket_rc +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +PREHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@vsmb_bucket_txt +POSTHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@vsmb_bucket_txt +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +PREHOOK: query: explain +select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 1 + Vectorized execution: true + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@vsmb_bucket_1 +PREHOOK: Input: default@vsmb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vsmb_bucket_1 +POSTHOOK: Input: default@vsmb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +PREHOOK: query: explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_RC) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + Vectorized execution: true + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@vsmb_bucket_1 +PREHOOK: Input: default@vsmb_bucket_rc +#### A masked pattern was here #### +POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vsmb_bucket_1 +POSTHOOK: Input: default@vsmb_bucket_rc +#### A masked pattern was here #### +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box +-- explain +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; + +explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box +-- explain +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; +-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key; + +explain +select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_TXT) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + Vectorized execution: true + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@vsmb_bucket_1 +PREHOOK: Input: default@vsmb_bucket_txt +#### A masked pattern was here #### +POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vsmb_bucket_1 +POSTHOOK: Input: default@vsmb_bucket_txt +#### A masked pattern was here #### +POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p +528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p