HIVE-18146: Vectorization: VectorMapJoinOperator Decimal64ColumnVector key/value cast bug (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/65cd866e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/65cd866e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/65cd866e Branch: refs/heads/master Commit: 65cd866eb72b14d68e19178b20a5d8d3410c602a Parents: 07fe7e2 Author: Matt McCline <mmccl...@hortonworks.com> Authored: Wed Nov 29 10:47:19 2017 -0600 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Wed Nov 29 10:47:19 2017 -0600 ---------------------------------------------------------------------- .../exec/vector/VectorHashKeyWrapperBatch.java | 2 + .../ql/exec/vector/VectorMapJoinOperator.java | 7 +- .../ql/exec/vector/VectorizationContext.java | 14 + .../mapjoin/VectorMapJoinCommonOperator.java | 4 +- .../hive/ql/optimizer/physical/Vectorizer.java | 40 +- .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 19 +- .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 24 + .../hadoop/hive/ql/plan/VectorMapJoinInfo.java | 25 +- .../exec/vector/mapjoin/MapJoinTestConfig.java | 30 +- .../clientpositive/vector_decimal_mapjoin.q | 46 +- .../llap/vector_decimal_mapjoin.q.out | 1303 +++++++++++++++++- .../clientpositive/llap/vector_join30.q.out | 2 + .../llap/vector_left_outer_join2.q.out | 4 + .../llap/vector_leftsemi_mapjoin.q.out | 46 + .../llap/vector_nullsafe_join.q.out | 20 + .../spark/vector_decimal_mapjoin.q.out | 890 +++++++++++- .../clientpositive/vector_between_columns.q.out | 2 + .../vector_binary_join_groupby.q.out | 4 + .../clientpositive/vector_char_mapjoin1.q.out | 6 + .../clientpositive/vector_complex_join.q.out | 2 + .../clientpositive/vector_decimal_mapjoin.q.out | 732 +++++++++- .../clientpositive/vector_groupby_mapjoin.q.out | 3 + .../clientpositive/vector_include_no_sel.q.out | 1 + .../vector_interval_mapjoin.q.out | 2 + .../vector_left_outer_join2.q.out | 8 + .../clientpositive/vector_outer_join0.q.out | 4 + .../clientpositive/vector_outer_join1.q.out | 8 + .../clientpositive/vector_outer_join2.q.out | 4 + .../clientpositive/vector_outer_join3.q.out | 6 +- .../clientpositive/vector_outer_join4.q.out | 6 +- .../clientpositive/vector_outer_join6.q.out | 4 +- .../vector_outer_reference_windowed.q.out | 8 +- .../clientpositive/vectorized_mapjoin.q.out | 2 + .../clientpositive/vectorized_mapjoin2.q.out | 1 + 34 files changed, 3030 insertions(+), 249 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 74b9c58..c0b74ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -1007,6 +1007,8 @@ public class VectorHashKeyWrapperBatch extends VectorColumnSetInfo { case DECIMAL: return keyOutputWriter.writeValue( kw.getDecimal(columnTypeSpecificIndex)); + case DECIMAL_64: + throw new RuntimeException("Getting writable for DECIMAL_64 not supported"); case TIMESTAMP: return keyOutputWriter.writeValue( kw.getTimestamp(columnTypeSpecificIndex)); http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index b8d7150..2584d28 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -98,12 +98,9 @@ public class VectorMapJoinOperator extends VectorMapJoinBaseOperator { bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); - List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); - keyExpressions = vContext.getVectorExpressions(keyDesc); + keyExpressions = this.vectorDesc.getAllBigTableKeyExpressions(); - // We're only going to evaluate the big table vectorized expressions, - Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs(); - bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); + bigTableValueExpressions = this.vectorDesc.getAllBigTableValueExpressions(); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 13eff51..d5fccb5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -696,6 +696,20 @@ public class VectorizationContext { return expr; } + public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List<ExprNodeDesc> exprNodes) + throws HiveException { + VectorExpression[] vecExprs = + getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + final int size = vecExprs.length; + for (int i = 0; i < size; i++) { + VectorExpression vecExpr = vecExprs[i]; + if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { + vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); + } + } + return vecExprs; + } + public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) throws HiveException { return getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); } http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 4afbc03..b1fe818 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -253,12 +253,12 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos(); - bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions(); + bigTableKeyExpressions = vectorMapJoinInfo.getSlimmedBigTableKeyExpressions(); bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames(); bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); - bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions(); + bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 263d2c7..6500682 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -3008,7 +3008,10 @@ public class Vectorizer implements PhysicalPlanResolver { // information first.... List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); - VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); + + // For now, we don't support joins on or using DECIMAL_64. + VectorExpression[] allBigTableKeyExpressions = + vContext.getVectorExpressionsUpConvertDecimal64(keyDesc); final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; boolean supportsKeyTypes = true; // Assume. HashSet<String> notSupportedKeyTypes = new HashSet<String>(); @@ -3019,7 +3022,7 @@ public class Vectorizer implements PhysicalPlanResolver { String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>(); - VectorExpression[] bigTableKeyExpressions; + VectorExpression[] slimmedBigTableKeyExpressions; for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { VectorExpression ve = allBigTableKeyExpressions[i]; if (!IdentityExpression.isColumnOnly(ve)) { @@ -3043,13 +3046,16 @@ public class Vectorizer implements PhysicalPlanResolver { bigTableKeyTypeInfos[i] = typeInfo; } if (bigTableKeyExpressionsList.size() == 0) { - bigTableKeyExpressions = null; + slimmedBigTableKeyExpressions = null; } else { - bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); + slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); } List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable); - VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); + + // For now, we don't support joins on or using DECIMAL_64. + VectorExpression[] allBigTableValueExpressions = + vContext.getVectorExpressionsUpConvertDecimal64(bigTableExprs); boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, @@ -3071,7 +3077,7 @@ public class Vectorizer implements PhysicalPlanResolver { String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>(); - VectorExpression[] bigTableValueExpressions; + VectorExpression[] slimmedBigTableValueExpressions; for (int i = 0; i < bigTableValueColumnMap.length; i++) { VectorExpression ve = allBigTableValueExpressions[i]; if (!IdentityExpression.isColumnOnly(ve)) { @@ -3084,20 +3090,25 @@ public class Vectorizer implements PhysicalPlanResolver { bigTableValueTypeInfos[i] = exprNode.getTypeInfo(); } if (bigTableValueExpressionsList.size() == 0) { - bigTableValueExpressions = null; + slimmedBigTableValueExpressions = null; } else { - bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]); + slimmedBigTableValueExpressions = + bigTableValueExpressionsList.toArray(new VectorExpression[0]); } vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap); vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames); vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos); - vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions); + vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions); + + vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions); vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap); vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames); vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos); - vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions); + vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions); + + vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* * Small table information. @@ -3948,7 +3959,10 @@ public class Vectorizer implements PhysicalPlanResolver { GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf(); List<ExprNodeDesc> keysDesc = groupByDesc.getKeys(); - VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc); + + // For now, we don't support group by on DECIMAL_64 keys. + VectorExpression[] vecKeyExpressions = + vContext.getVectorExpressionsUpConvertDecimal64(keysDesc); ArrayList<AggregationDesc> aggrDesc = groupByDesc.getAggregators(); final int size = aggrDesc.size(); @@ -4425,10 +4439,10 @@ public class Vectorizer implements PhysicalPlanResolver { if (vectorTaskColumnInfo != null) { VectorMapJoinInfo vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); - if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) { + if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } - if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) { + if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } } http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index ef8dd05..a1941db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -404,7 +404,8 @@ public class MapJoinDesc extends JoinDesc implements Serializable { super(vectorMapJoinDesc, vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); this.mapJoinDesc = mapJoinDesc; this.vectorMapJoinDesc = vectorMapJoinDesc; - vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); + vectorMapJoinInfo = + (vectorMapJoinDesc != null ? vectorMapJoinDesc.getVectorMapJoinInfo() : null); } private VectorizationCondition[] createNativeConditions() { @@ -480,10 +481,10 @@ public class MapJoinDesc extends JoinDesc implements Serializable { @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getBigTableKeyExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + return vectorExpressionsToStringList( + isNative ? + vectorMapJoinInfo.getSlimmedBigTableKeyExpressions() : + vectorMapJoinDesc.getAllBigTableKeyExpressions()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) @@ -500,10 +501,10 @@ public class MapJoinDesc extends JoinDesc implements Serializable { @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getBigTableValueExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + return vectorExpressionsToStringList( + isNative ? + vectorMapJoinInfo.getSlimmedBigTableValueExpressions() : + vectorMapJoinDesc.getAllBigTableValueExpressions()); } @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 99a4958..99602a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -97,6 +98,9 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { private VectorMapJoinVariation vectorMapJoinVariation; private boolean minMaxEnabled; + private VectorExpression[] allBigTableKeyExpressions; + private VectorExpression[] allBigTableValueExpressions; + private VectorMapJoinInfo vectorMapJoinInfo; public VectorMapJoinDesc() { @@ -105,6 +109,10 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { hashTableKeyType = HashTableKeyType.NONE; vectorMapJoinVariation = VectorMapJoinVariation.NONE; minMaxEnabled = false; + + allBigTableKeyExpressions = null; + allBigTableValueExpressions = null; + vectorMapJoinInfo = null; } @@ -162,6 +170,22 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { this.minMaxEnabled = minMaxEnabled; } + public VectorExpression[] getAllBigTableKeyExpressions() { + return allBigTableKeyExpressions; + } + + public void setAllBigTableKeyExpressions(VectorExpression[] allBigTableKeyExpressions) { + this.allBigTableKeyExpressions = allBigTableKeyExpressions; + } + + public VectorExpression[] getAllBigTableValueExpressions() { + return allBigTableValueExpressions; + } + + public void setAllBigTableValueExpressions(VectorExpression[] allBigTableValueExpressions) { + this.allBigTableValueExpressions = allBigTableValueExpressions; + } + public void setVectorMapJoinInfo(VectorMapJoinInfo vectorMapJoinInfo) { Preconditions.checkState(vectorMapJoinInfo != null); this.vectorMapJoinInfo = vectorMapJoinInfo; http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 7432efa..c1c9acc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -41,12 +41,12 @@ public class VectorMapJoinInfo { private int[] bigTableKeyColumnMap; private String[] bigTableKeyColumnNames; private TypeInfo[] bigTableKeyTypeInfos; - private VectorExpression[] bigTableKeyExpressions; + private VectorExpression[] slimmedBigTableKeyExpressions; private int[] bigTableValueColumnMap; private String[] bigTableValueColumnNames; private TypeInfo[] bigTableValueTypeInfos; - private VectorExpression[] bigTableValueExpressions; + private VectorExpression[] slimmedBigTableValueExpressions; private VectorColumnOutputMapping bigTableRetainedMapping; private VectorColumnOutputMapping bigTableOuterKeyMapping; @@ -58,12 +58,12 @@ public class VectorMapJoinInfo { bigTableKeyColumnMap = null; bigTableKeyColumnNames = null; bigTableKeyTypeInfos = null; - bigTableKeyExpressions = null; + slimmedBigTableKeyExpressions = null; bigTableValueColumnMap = null; bigTableValueColumnNames = null; bigTableValueTypeInfos = null; - bigTableValueExpressions = null; + slimmedBigTableValueExpressions = null; bigTableRetainedMapping = null; bigTableOuterKeyMapping = null; @@ -96,12 +96,12 @@ public class VectorMapJoinInfo { this.bigTableKeyTypeInfos = bigTableKeyTypeInfos; } - public VectorExpression[] getBigTableKeyExpressions() { - return bigTableKeyExpressions; + public VectorExpression[] getSlimmedBigTableKeyExpressions() { + return slimmedBigTableKeyExpressions; } - public void setBigTableKeyExpressions(VectorExpression[] bigTableKeyExpressions) { - this.bigTableKeyExpressions = bigTableKeyExpressions; + public void setSlimmedBigTableKeyExpressions(VectorExpression[] slimmedBigTableKeyExpressions) { + this.slimmedBigTableKeyExpressions = slimmedBigTableKeyExpressions; } @@ -129,12 +129,13 @@ public class VectorMapJoinInfo { this.bigTableValueTypeInfos = bigTableValueTypeInfos; } - public VectorExpression[] getBigTableValueExpressions() { - return bigTableValueExpressions; + public VectorExpression[] getSlimmedBigTableValueExpressions() { + return slimmedBigTableValueExpressions; } - public void setBigTableValueExpressions(VectorExpression[] bigTableValueExpressions) { - this.bigTableValueExpressions = bigTableValueExpressions; + public void setSlimmedBigTableValueExpressions( + VectorExpression[] slimmedBigTableValueExpressions) { + this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index eec1f65..b77efcd 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -211,12 +212,16 @@ public class MapJoinTestConfig { vectorMapJoinInfo.setBigTableKeyColumnMap(testDesc.bigTableKeyColumnNums); vectorMapJoinInfo.setBigTableKeyColumnNames(testDesc.bigTableKeyColumnNames); vectorMapJoinInfo.setBigTableKeyTypeInfos(testDesc.bigTableKeyTypeInfos); - vectorMapJoinInfo.setBigTableKeyExpressions(null); + vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(null); + + vectorDesc.setAllBigTableKeyExpressions(null); vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); - vectorMapJoinInfo.setBigTableValueExpressions(null); + vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); + + vectorDesc.setAllBigTableValueExpressions(null); VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); @@ -542,6 +547,17 @@ public class MapJoinTestConfig { // This is what the Vectorizer class does. VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc(); + + byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); + VectorExpression[] allBigTableKeyExpressions = + vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable)); + vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions); + + Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs(); + VectorExpression[] allBigTableValueExpressions = + vContext.getVectorExpressions(exprs.get(posBigTable)); + vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); + List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos); boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0); if (!isOuterAndFiltered) { @@ -603,6 +619,16 @@ public class MapJoinTestConfig { VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc); + byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); + VectorExpression[] slimmedBigTableKeyExpressions = + vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable)); + vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions); + + Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs(); + VectorExpression[] slimmedBigTableValueExpressions = + vContext.getVectorExpressions(exprs.get(posBigTable)); + vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions); + VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator( testDesc.vectorMapJoinVariation, http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 495be4d..940cc12 100644 --- a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -22,10 +22,10 @@ STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k; -CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC; -INSERT INTO TABLE t1 select `dec` from over1k; -CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC; -INSERT INTO TABLE t2 select `dec` from over1k; +CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC; +INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k; +CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC; +INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k; explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); @@ -34,6 +34,13 @@ select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`); +explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`); + +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`); + + + -- DECIMAL_64 CREATE TABLE over1k_small(t tinyint, @@ -45,17 +52,17 @@ CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_small; -CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC; -INSERT INTO TABLE t1 select `dec` from over1k_small; -CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC; -INSERT INTO TABLE t2 select `dec` from over1k_small; +CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE; +INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small; +CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE; +INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small; explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); @@ -63,3 +70,24 @@ select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.` -- SORT_QUERY_RESULTS select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + + +set hive.vectorized.input.format.supports.enabled=none; + +explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +-- SORT_QUERY_RESULTS + +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); + +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); +