http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java new file mode 100644 index 0000000..e0a6198 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Why a node did not vectorize. + * + */ +public class VectorizerReason { + + private static long serialVersionUID = 1L; + + public static enum VectorizerNodeIssue { + NONE, + NODE_ISSUE, + OPERATOR_ISSUE, + EXPRESSION_ISSUE + } + + private final VectorizerNodeIssue vectorizerNodeIssue; + + private final Operator<? extends OperatorDesc> operator; + + private final String expressionTitle; + + private final String issue; + + private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue, + Operator<? extends OperatorDesc> operator, String expressionTitle, String issue) { + this.vectorizerNodeIssue = vectorizerNodeIssue; + this.operator = operator; + this.expressionTitle = expressionTitle; + this.issue = issue; + } + + public static VectorizerReason createNodeIssue(String issue) { + return new VectorizerReason( + VectorizerNodeIssue.NODE_ISSUE, + null, + null, + issue); + } + + public static VectorizerReason createOperatorIssue(Operator<? extends OperatorDesc> operator, + String issue) { + return new VectorizerReason( + VectorizerNodeIssue.OPERATOR_ISSUE, + operator, + null, + issue); + } + + public static VectorizerReason createExpressionIssue(Operator<? extends OperatorDesc> operator, + String expressionTitle, String issue) { + return new VectorizerReason( + VectorizerNodeIssue.EXPRESSION_ISSUE, + operator, + expressionTitle, + issue); + } + + @Override + public VectorizerReason clone() { + return new VectorizerReason(vectorizerNodeIssue, operator, expressionTitle, issue); + } + + public VectorizerNodeIssue getVectorizerNodeIssue() { + return vectorizerNodeIssue; + } + + public Operator<? extends OperatorDesc> getOperator() { + return operator; + } + + public String getExpressionTitle() { + return expressionTitle; + } + + public String getIssue() { + return issue; + } + + @Override + public String toString() { + String reason; + switch (vectorizerNodeIssue) { + case NODE_ISSUE: + reason = (issue == null ? "unknown" : issue); + break; + case OPERATOR_ISSUE: + reason = (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + case EXPRESSION_ISSUE: + reason = expressionTitle + " expression for " + + (operator == null ? "Unknown" : operator.getType()) + " operator: " + + (issue == null ? "unknown" : issue); + break; + default: + reason = "Unknown " + vectorizerNodeIssue; + } + return reason; + } +}
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 4a8ff15..1f118dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -27,12 +27,27 @@ import org.apache.hadoop.fs.Path; */ public class ExplainConfiguration { + + public enum VectorizationDetailLevel { + + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1); + + public final int rank; + VectorizationDetailLevel(int rank) { + this.rank = rank; + } + }; + private boolean extended = false; private boolean formatted = false; private boolean dependency = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; + private boolean vectorization = false; + private boolean vectorizationOnly = false; + private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private Path explainRootPath; private Map<String, Long> opIdToRuntimeNumRows; @@ -98,6 +113,30 @@ public class ExplainConfiguration { this.userLevelExplain = userLevelExplain; } + public boolean isVectorization() { + return vectorization; + } + + public void setVectorization(boolean vectorization) { + this.vectorization = vectorization; + } + + public boolean isVectorizationOnly() { + return vectorizationOnly; + } + + public void setVectorizationOnly(boolean vectorizationOnly) { + this.vectorizationOnly = vectorizationOnly; + } + + public VectorizationDetailLevel getVectorizationDetailLevel() { + return vectorizationDetailLevel; + } + + public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDetailLevel) { + this.vectorizationDetailLevel = vectorizationDetailLevel; + } + public Path getExplainRootPath() { return explainRootPath; } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 300542e..f62cf9a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -70,7 +71,9 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { @SuppressWarnings("unchecked") @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - for (int i = 1; i < ast.getChildCount(); i++) { + final int childCount = ast.getChildCount(); + int i = 1; // Skip TOK_QUERY. + while (i < childCount) { int explainOptions = ast.getChild(i).getType(); if (explainOptions == HiveParser.KW_FORMATTED) { config.setFormatted(true); @@ -85,7 +88,40 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { } else if (explainOptions == HiveParser.KW_ANALYZE) { config.setAnalyze(AnalyzeState.RUNNING); config.setExplainRootPath(ctx.getMRTmpPath()); + } else if (explainOptions == HiveParser.KW_VECTORIZATION) { + config.setVectorization(true); + if (i + 1 < childCount) { + int vectorizationOption = ast.getChild(i + 1).getType(); + + // [ONLY] + if (vectorizationOption == HiveParser.TOK_ONLY) { + config.setVectorizationOnly(true); + i++; + if (i + 1 >= childCount) { + break; + } + vectorizationOption = ast.getChild(i + 1).getType(); + } + + // [SUMMARY|OPERATOR|EXPRESSION|DETAIL] + if (vectorizationOption == HiveParser.TOK_SUMMARY) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY); + i++; + } else if (vectorizationOption == HiveParser.TOK_OPERATOR) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR); + i++; + } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION); + i++; + } else if (vectorizationOption == HiveParser.TOK_DETAIL) { + config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL); + i++; + } + } + } else { + // UNDONE: UNKNOWN OPTION? } + i++; } ctx.setExplainConfig(config); http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 5d3fa6a..025ea10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -335,6 +335,11 @@ KW_KEY: 'KEY'; KW_ABORT: 'ABORT'; KW_EXTRACT: 'EXTRACT'; KW_FLOOR: 'FLOOR'; +KW_VECTORIZATION: 'VECTORIZATION'; +KW_SUMMARY: 'SUMMARY'; +KW_OPERATOR: 'OPERATOR'; +KW_EXPRESSION: 'EXPRESSION'; +KW_DETAIL: 'DETAIL'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 5c16c55..eebd875 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -380,6 +380,11 @@ TOK_ROLLBACK; TOK_SET_AUTOCOMMIT; TOK_CACHE_METADATA; TOK_ABORT_TRANSACTIONS; +TOK_ONLY; +TOK_SUMMARY; +TOK_OPERATOR; +TOK_EXPRESSION; +TOK_DETAIL; } @@ -717,7 +722,28 @@ explainStatement explainOption @init { msgs.push("explain option"); } @after { msgs.pop(); } - : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE| + (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + ; + +vectorizationOnly +@init { pushMsg("vectorization's only clause", state); } +@after { popMsg(state); } + : KW_ONLY + -> ^(TOK_ONLY) + ; + +vectorizatonDetail +@init { pushMsg("vectorization's detail level clause", state); } +@after { popMsg(state); } + : KW_SUMMARY + -> ^(TOK_SUMMARY) + | KW_OPERATOR + -> ^(TOK_OPERATOR) + | KW_EXPRESSION + -> ^(TOK_EXPRESSION) + | KW_DETAIL + -> ^(TOK_DETAIL) ; execStatement http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 50987c3..13f6879 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -719,6 +719,12 @@ nonReserved | KW_VALIDATE | KW_NOVALIDATE | KW_KEY + | KW_VECTORIZATION + | KW_SUMMARY + | KW_OPERATOR + | KW_EXPRESSION + | KW_DETAIL + ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index e217bdf..2c14203 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -29,6 +29,10 @@ import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; + + // Extra parameters only for vectorization. + protected VectorDesc vectorDesc; + protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map<String, String> opProps; @@ -64,6 +68,14 @@ public class AbstractOperatorDesc implements OperatorDesc { this.vectorMode = vm; } + public void setVectorDesc(VectorDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorDesc getVectorDesc() { + return vectorDesc; + } + @Override public OpTraits getTraits() { return opTraits; http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java index 5157ebd..4304b11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java @@ -18,10 +18,24 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.exec.Operator; + public class AbstractVectorDesc implements VectorDesc { + private static long serialVersionUID = 1L; + + private Class<?> vectorOpClass; + @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException("clone not supported"); } + + public void setVectorOp(Class<?> vectorOpClass) { + this.vectorOpClass = vectorOpClass; + } + + public Class<?> getVectorOpClass() { + return vectorOpClass; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java index 264f959..c5294f0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.IOException; +import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.io.DataOutputBuffer; @@ -60,4 +63,25 @@ public class AppMasterEventDesc extends AbstractOperatorDesc { public void writeEventHeader(DataOutputBuffer buffer) throws IOException { // nothing to add } + + public class AppMasterEventOperatorExplainVectorization extends OperatorExplainVectorization { + + private final AppMasterEventDesc appMasterEventDesc; + private final VectorAppMasterEventDesc vectorAppMasterEventDesc; + + public AppMasterEventOperatorExplainVectorization(AppMasterEventDesc appMasterEventDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.appMasterEventDesc = appMasterEventDesc; + vectorAppMasterEventDesc = (VectorAppMasterEventDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { + if (vectorDesc == null) { + return null; + } + return new AppMasterEventOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 13a0811..b061d5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; @@ -33,7 +34,9 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -65,12 +68,25 @@ public abstract class BaseWork extends AbstractOperatorDesc { private String name; - // Vectorization. + /* + * Vectorization. + */ + + // This will be true if a node was examined by the Vectorizer class. + protected boolean vectorizationExamined; + + protected boolean vectorizationEnabled; protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; + private VectorizerReason notVectorizedReason; + + private boolean groupByVectorOutput; + private boolean allNative; + private boolean usesVectorUDFAdaptor; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -163,6 +179,22 @@ public abstract class BaseWork extends AbstractOperatorDesc { // ----------------------------------------------------------------------------------------------- + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public void setVectorizationEnabled(boolean vectorizationEnabled) { + this.vectorizationEnabled = vectorizationEnabled; + } + + public boolean getVectorizationEnabled() { + return vectorizationEnabled; + } + /* * The vectorization context for creating the VectorizedRowBatch for the node. */ @@ -174,23 +206,160 @@ public abstract class BaseWork extends AbstractOperatorDesc { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - /* - * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable - * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated - * vectorizing this node. - * - * When Vectorized Input File Format looks at this flag, it can determine whether it should - * operate vectorized or not. In some modes, the node can be vectorized but use row - * serialization. - */ - public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { - this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { + this.notVectorizedReason = notVectorizedReason; + } + + public VectorizerReason getNotVectorizedReason() { + return notVectorizedReason; + } + + public void setGroupByVectorOutput(boolean groupByVectorOutput) { + this.groupByVectorOutput = groupByVectorOutput; + } + + public boolean getGroupByVectorOutput() { + return groupByVectorOutput; + } + + public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { + this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; } - public boolean getUseVectorizedInputFileFormat() { - return useVectorizedInputFileFormat; + public boolean getUsesVectorUDFAdaptor() { + return usesVectorUDFAdaptor; } + public void setAllNative(boolean allNative) { + this.allNative = allNative; + } + + public boolean getAllNative() { + return allNative; + } + + public static class BaseExplainVectorization { + + private final BaseWork baseWork; + + public BaseExplainVectorization(BaseWork baseWork) { + this.baseWork = baseWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return baseWork.getVectorizationEnabled(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean vectorized() { + if (!baseWork.getVectorizationEnabled()) { + return null; + } + return baseWork.getVectorMode(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String notVectorizedReason() { + if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { + return null; + } + VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); + if (notVectorizedReason == null) { + return "Unknown"; + } + return notVectorizedReason.toString(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean groupByRowOutputCascade() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getGroupByVectorOutput(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean nativeVectorized() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getAllNative(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Boolean usesVectorUDFAdaptor() { + if (!baseWork.getVectorMode()) { + return null; + } + return baseWork.getUsesVectorUDFAdaptor(); + } + + public static class RowBatchContextExplainVectorization { + + private final VectorizedRowBatchCtx vectorizedRowBatchCtx; + + public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) { + this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; + } + + private List<String> getColumns(int startIndex, int count) { + String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); + TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + List<String> result = new ArrayList<String>(count); + final int end = startIndex + count; + for (int i = startIndex; i < end; i++) { + result.add(rowColumnNames[i] + ":" + rowColumnTypeInfos[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getDataColumns() { + return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getPartitionColumns() { + return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getDataColumnNums() { + int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); + if (dataColumnNums == null) { + return null; + } + return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getDataColumnCount() { + return vectorizedRowBatchCtx.getDataColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public int getPartitionColumnCount() { + return vectorizedRowBatchCtx.getPartitionColumnCount(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getScratchColumnTypeNames() { + return Arrays.asList(vectorizedRowBatchCtx.getScratchColumnTypeNames()); + } + + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public RowBatchContextExplainVectorization vectorizedRowBatchContext() { + if (!baseWork.getVectorMode()) { + return null; + } + return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx()); + } + } + + // ----------------------------------------------------------------------------------------------- /** http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index b0b6c3a..7b16ad7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -45,4 +45,33 @@ public @interface Explain { boolean displayOnlyOnTrue() default false; boolean skipHeader() default false; + + // By default, many existing @Explain classes/methods are NON_VECTORIZED. + // + // Vectorized methods/classes have detail levels: + // SUMMARY, OPERATOR, EXPRESSION, or DETAIL. + // As you go to the right you get more detail and the information for the previous level(s) is + // included. The default is SUMMARY. + // + // The "path" enumerations are used to mark methods/classes that lead to vectorization specific + // ones so we can avoid displaying headers for things that have no vectorization information + // below. + // + // For example, the TezWork class is marked SUMMARY_PATH because it leads to both + // SUMMARY and OPERATOR methods/classes. And, MapWork.getAllRootOperators is marked OPERATOR_PATH + // because we only display operator information for OPERATOR. + // + // EXPRESSION and DETAIL typically live inside SUMMARY or OPERATOR classes. + // + public enum Vectorization { + SUMMARY_PATH(4), OPERATOR_PATH(3), + SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1), + NON_VECTORIZED(Integer.MAX_VALUE); + + public final int rank; + Vectorization(int rank) { + this.rank = rank; + } + }; + Vectorization vectorization() default Vectorization.NON_VECTORIZED; } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 9f4767c..805357c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.parse.ParseContext; /** @@ -117,6 +118,18 @@ public class ExplainWork implements Serializable { return config.isFormatted(); } + public boolean isVectorization() { + return config.isVectorization(); + } + + public boolean isVectorizationOnly() { + return config.isVectorizationOnly(); + } + + public VectorizationDetailLevel isVectorizationDetailLevel() { + return config.getVectorizationDetailLevel(); + } + public ParseContext getParseContext() { return pCtx; } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 8ea6440..3c69f69 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; @@ -30,14 +31,17 @@ import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -307,4 +311,43 @@ public class FetchWork implements Serializable { return ret; } + + // ----------------------------------------------------------------------------------------------- + + private boolean vectorizationExamined; + + public void setVectorizationExamined(boolean vectorizationExamined) { + this.vectorizationExamined = vectorizationExamined; + } + + public boolean getVectorizationExamined() { + return vectorizationExamined; + } + + public class FetchExplainVectorization { + + private final FetchWork fetchWork; + + public FetchExplainVectorization(FetchWork fetchWork) { + this.fetchWork = fetchWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean enabled() { + return false; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> enabledConditionsNotMet() { + return VectorizationCondition.getConditionsSupported(false); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Fetch Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FetchExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new FetchExplainVectorization(this); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 07ed4fd..bbc5f10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * FileSinkDesc. @@ -474,4 +474,19 @@ public class FileSinkDesc extends AbstractOperatorDesc { this.statsTmpDir = statsCollectionTempDir; } + public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "File Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FileSinkOperatorExplainVectorization getFileSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new FileSinkOperatorExplainVectorization(vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index fa20798..ff69775 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -177,6 +178,7 @@ public class FilterDesc extends AbstractOperatorDesc { this.syntheticJoinPredicate = syntheticJoinPredicate; } + @Override public Object clone() { FilterDesc filterDesc = new FilterDesc(getPredicate().clone(), getIsSamplingPred()); @@ -186,4 +188,30 @@ public class FilterDesc extends AbstractOperatorDesc { filterDesc.setSortedFilter(isSortedFilter()); return filterDesc; } + + public class FilterOperatorExplainVectorization extends OperatorExplainVectorization { + + private final FilterDesc filterDesc; + private final VectorFilterDesc vectorFilterDesc; + + public FilterOperatorExplainVectorization(FilterDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorFilterDesc = (VectorFilterDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "predicateExpression", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getPredicateExpression() { + return vectorFilterDesc.getPredicateExpression().toString(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Filter Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public FilterOperatorExplainVectorization getFilterVectorization() { + if (vectorDesc == null) { + return null; + } + return new FilterOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..204277e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -19,13 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** @@ -71,11 +76,8 @@ public class GroupByDesc extends AbstractOperatorDesc { transient private boolean isDistinct; private boolean dontResetAggrsDistinct; - // Extra parameters only for vectorization. - private VectorGroupByDesc vectorDesc; - public GroupByDesc() { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; } public GroupByDesc( @@ -106,7 +108,7 @@ public class GroupByDesc extends AbstractOperatorDesc { final boolean groupingSetsPresent, final int groupingSetsPosition, final boolean isDistinct) { - vectorDesc = new VectorGroupByDesc(); + vectorDesc = null; this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -120,14 +122,6 @@ public class GroupByDesc extends AbstractOperatorDesc { this.isDistinct = isDistinct; } - public void setVectorDesc(VectorGroupByDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorGroupByDesc getVectorDesc() { - return vectorDesc; - } - public Mode getMode() { return mode; } @@ -311,4 +305,66 @@ public class GroupByDesc extends AbstractOperatorDesc { this.isDistinct = isDistinct; } + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { + + private final GroupByDesc groupByDesc; + private final VectorGroupByDesc vectorGroupByDesc; + + public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) { + // Native vectorization not supported. + super(vectorDesc, false); + this.groupByDesc = groupByDesc; + vectorGroupByDesc = (VectorGroupByDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getKeysExpression() { + return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getAggregators() { + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + List<String> vecAggrList = new ArrayList<String>(vecAggregators.length); + for (VectorAggregateExpression vecAggr : vecAggregators) { + vecAggrList.add(vecAggr.toString()); + } + return vecAggrList; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getGroupByRowOutputCascade() { + return vectorGroupByDesc.isVectorOutput(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getVectorOutputConditionsNotMet() { + List<String> results = new ArrayList<String>(); + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); + for (VectorAggregateExpression vecAggr : vecAggregators) { + Category category = Vectorizer.aggregationOutputCategory(vecAggr); + if (category != ObjectInspector.Category.PRIMITIVE) { + results.add( + "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); + } + } + if (results.size() == 0) { + return null; + } + return results; + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public GroupByOperatorExplainVectorization getGroupByVectorization() { + if (vectorDesc == null) { + return null; + } + return new GroupByOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index 94ac41e..a338319 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index 8448a41..45ec431 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -17,7 +17,10 @@ */ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -73,4 +76,19 @@ public class LimitDesc extends AbstractOperatorDesc { this.leastRows = leastRows; } + public class LimitOperatorExplainVectorization extends OperatorExplainVectorization { + + public LimitOperatorExplainVectorization(LimitDesc limitDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Limit Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public LimitOperatorExplainVectorization getLimitVectorization() { + if (vectorDesc == null) { + return null; + } + return new LimitOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index ec35860..3633fde 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -20,14 +20,24 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; /** * Map Join operator Descriptor implementation. @@ -73,17 +83,16 @@ public class MapJoinDesc extends JoinDesc implements Serializable { private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; - // Extra parameters only for vectorization. - private VectorMapJoinDesc vectorDesc; - public MapJoinDesc() { - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; bigTableBucketNumMapping = new LinkedHashMap<String, Integer>(); } public MapJoinDesc(MapJoinDesc clone) { super(clone); - vectorDesc = new VectorMapJoinDesc(clone.vectorDesc); + if (clone.vectorDesc != null) { + throw new RuntimeException("Clone with vectorization desc not supported"); + } this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; @@ -108,7 +117,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable { final int posBigTable, final JoinCondDesc[] conds, final Map<Byte, List<ExprNodeDesc>> filters, boolean noOuterJoin, String dumpFilePrefix) { super(values, outputColumnNames, noOuterJoin, conds, filters, null); - vectorDesc = new VectorMapJoinDesc(); + vectorDesc = null; this.keys = keys; this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; @@ -119,14 +128,6 @@ public class MapJoinDesc extends JoinDesc implements Serializable { initRetainExprList(); } - public void setVectorDesc(VectorMapJoinDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorMapJoinDesc getVectorDesc() { - return vectorDesc; - } - private void initRetainExprList() { retainList = new HashMap<Byte, List<Integer>>(); Set<Entry<Byte, List<ExprNodeDesc>>> set = super.getExprs().entrySet(); @@ -388,4 +389,193 @@ public class MapJoinDesc extends JoinDesc implements Serializable { public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + + // Use LinkedHashSet to give predictable display order. + private static Set<String> vectorizableMapJoinNativeEngines = + new LinkedHashSet<String>(Arrays.asList("tez", "spark")); + + public class MapJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final MapJoinDesc mapJoinDesc; + private final VectorMapJoinDesc vectorMapJoinDesc; + private final VectorMapJoinInfo vectorMapJoinInfo; + + private VectorizationCondition[] nativeConditions; + + public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { + // VectorMapJoinOperator is not native vectorized. + super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); + this.mapJoinDesc = mapJoinDesc; + vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; + vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorMapJoinDesc.getIsVectorizationMapJoinNativeEnabled(); + + String engine = vectorMapJoinDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); + + boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); + boolean isHybridHashJoin = vectorMapJoinDesc.getIsHybridHashJoin(); + + boolean whenFastHashTableThenNoHybrid = + (!isFastHashTableEnabled ? true : !isHybridHashJoin); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + vectorMapJoinDesc.getOneMapJoinCondition(), + "One MapJoin Condition"), + new VectorizationCondition( + !vectorMapJoinDesc.getHasNullSafes(), + "No nullsafe"), + new VectorizationCondition( + vectorMapJoinDesc.getSupportsKeyTypes(), + "Supports Key Types"), + new VectorizationCondition( + !vectorMapJoinDesc.getIsEmptyKey(), + "Not empty key"), + new VectorizationCondition( + whenFastHashTableThenNoHybrid, + "When Fast Hash Table, then requires no Hybrid Hash Join"), + new VectorizationCondition( + vectorMapJoinDesc.getSmallTableExprVectorizes(), + "Small table vectorizes"), + }; + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getBigTableKeyExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableKeyColumns() { + if (!isNative) { + return null; + } + int[] bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); + if (bigTableKeyColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableKeyColumnMap); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getBigTableValueExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableValueColumns() { + if (!isNative) { + return null; + } + int[] bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); + if (bigTableValueColumnMap.length == 0) { + return null; + } + return Arrays.toString(bigTableValueColumnMap); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSmallTableColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getBigTableOuterKey() { + if (!isNative || vectorMapJoinDesc.operatorVariation() != OperatorVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumns() { + if (!isNative) { + return null; + } + return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getNativeNotSupportedKeyTypes() { + return vectorMapJoinDesc.getNotSupportedKeyTypes(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapJoinOperatorExplainVectorization getMapJoinVectorization() { + if (vectorDesc == null || this instanceof SMBJoinDesc) { + return null; + } + return new MapJoinOperatorExplainVectorization(this, vectorDesc); + } + + public class SMBJoinOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SMBJoinDesc smbJoinDesc; + private final VectorSMBJoinDesc vectorSMBJoinDesc; + + public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, VectorDesc vectorDesc) { + // Native vectorization NOT supported. + super(vectorDesc, false); + this.smbJoinDesc = smbJoinDesc; + vectorSMBJoinDesc = (VectorSMBJoinDesc) vectorDesc; + } + } + + // Handle dual nature. + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { + if (vectorDesc == null || !(this instanceof SMBJoinDesc)) { + return null; + } + return new SMBJoinOperatorExplainVectorization((SMBJoinDesc) this, vectorDesc); + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 5cc3663..081c511 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -26,6 +26,7 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -47,8 +48,10 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -140,6 +143,12 @@ public class MapWork extends BaseWork { private VectorizedRowBatch vectorizedRowBatch; + private VectorizerReason notEnabledInputFileFormatReason; + + private Set<String> vectorizationInputFileFormatClassNameSet; + private List<String> vectorizationEnabledConditionsMet; + private List<String> vectorizationEnabledConditionsNotMet; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -357,7 +366,7 @@ public class MapWork extends BaseWork { return nameToSplitSample; } - @Explain(displayName = "LLAP IO") + @Explain(displayName = "LLAP IO", vectorization = Vectorization.SUMMARY_PATH) public String getLlapIoDesc() { return llapIoDesc; } @@ -429,7 +438,8 @@ public class MapWork extends BaseWork { } } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -459,7 +469,8 @@ public class MapWork extends BaseWork { } @Override - @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Set<Operator<? extends OperatorDesc>> getAllRootOperators() { Set<Operator<?>> opSet = new LinkedHashSet<Operator<?>>(); @@ -716,4 +727,86 @@ public class MapWork extends BaseWork { public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } + + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + + public void setNotEnabledInputFileFormatReason(VectorizerReason notEnabledInputFileFormatReason) { + this.notEnabledInputFileFormatReason = notEnabledInputFileFormatReason; + } + + public VectorizerReason getNotEnabledInputFileFormatReason() { + return notEnabledInputFileFormatReason; + } + + public void setVectorizationInputFileFormatClassNameSet(Set<String> vectorizationInputFileFormatClassNameSet) { + this.vectorizationInputFileFormatClassNameSet = vectorizationInputFileFormatClassNameSet; + } + + public Set<String> getVectorizationInputFileFormatClassNameSet() { + return vectorizationInputFileFormatClassNameSet; + } + + public void setVectorizationEnabledConditionsMet(ArrayList<String> vectorizationEnabledConditionsMet) { + this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); + } + + public List<String> getVectorizationEnabledConditionsMet() { + return vectorizationEnabledConditionsMet; + } + + public void setVectorizationEnabledConditionsNotMet(List<String> vectorizationEnabledConditionsNotMet) { + this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + } + + public List<String> getVectorizationEnabledConditionsNotMet() { + return vectorizationEnabledConditionsNotMet; + } + + public class MapExplainVectorization extends BaseExplainVectorization { + + private final MapWork mapWork; + + public MapExplainVectorization(MapWork mapWork) { + super(mapWork); + this.mapWork = mapWork; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFileFormats", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public Set<String> inputFileFormats() { + return mapWork.getVectorizationInputFileFormatClassNameSet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> enabledConditionsMet() { + return mapWork.getVectorizationEnabledConditionsMet(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> enabledConditionsNotMet() { + return mapWork.getVectorizationEnabledConditionsNotMet(); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Map Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MapExplainVectorization getMapExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MapExplainVectorization(this); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java index 82143a6..76b5138 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java @@ -32,13 +32,15 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index aa7f6ed..af9adc2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -24,14 +24,15 @@ import java.util.List; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; - +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredWork. * */ -@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -40,7 +41,8 @@ public class MapredWork extends AbstractOperatorDesc { private boolean finalMapRed; - @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public MapWork getMapWork() { return mapWork; } @@ -49,7 +51,8 @@ public class MapredWork extends AbstractOperatorDesc { this.mapWork = mapWork; } - @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public ReduceWork getReduceWork() { return reduceWork; } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java new file mode 100644 index 0000000..bdf9859 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +public class OperatorExplainVectorization { + + protected final VectorDesc vectorDesc; + + protected final boolean isNative; + + public OperatorExplainVectorization(VectorDesc vectorDesc, boolean isNative) { + this.vectorDesc = vectorDesc; + this.isNative = isNative; + } + + public List<String> vectorExpressionsToStringList(VectorExpression[] vectorExpressions) { + if (vectorExpressions == null) { + return null; + } + List<String> vecExprList = new ArrayList<String>(vectorExpressions.length); + for (VectorExpression vecExpr : vectorExpressions) { + vecExprList.add(vecExpr.toString()); + } + return vecExprList; + } + + public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + return Arrays.toString(outputColumns); + } + + public List<String> columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] inputColumns = vectorColumnMapping.getInputColumns(); + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + ArrayList<String> result = new ArrayList<String>(size); + for (int i = 0; i < size; i++) { + result.add(inputColumns[i] + " -> " + outputColumns[i]); + } + return result; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "className", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getClassName() { + return vectorDesc.getVectorOpClass().getSimpleName(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "native", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public boolean getNative() { + return isNative; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index d7e404c..b8c2d42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -19,11 +19,18 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,9 +128,6 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class); - // Extra parameters only for vectorization. - private VectorReduceSinkDesc vectorDesc; - public ReduceSinkDesc() { } @@ -187,14 +191,6 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { return desc; } - public void setVectorDesc(VectorReduceSinkDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorReduceSinkDesc getVectorDesc() { - return vectorDesc; - } - public java.util.ArrayList<java.lang.String> getOutputKeyColumnNames() { return outputKeyColumnNames; } @@ -490,4 +486,105 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { this.hasOrderBy = hasOrderBy; } + // Use LinkedHashSet to give predictable display order. + private static Set<String> vectorizableReduceSinkNativeEngines = + new LinkedHashSet<String>(Arrays.asList("tez", "spark")); + + public class ReduceSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final ReduceSinkDesc reduceSinkDesc; + private final VectorReduceSinkDesc vectorReduceSinkDesc; + private final VectorReduceSinkInfo vectorReduceSinkInfo; + + private VectorizationCondition[] nativeConditions; + + public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, VectorDesc vectorDesc) { + // VectorReduceSinkOperator is not native vectorized. + super(vectorDesc, ((VectorReduceSinkDesc) vectorDesc).reduceSinkKeyType()!= ReduceSinkKeyType.NONE); + this.reduceSinkDesc = reduceSinkDesc; + vectorReduceSinkDesc = (VectorReduceSinkDesc) vectorDesc; + vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo(); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getKeyExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkKeyExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "valueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getValueExpression() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); + } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled(); + + String engine = vectorReduceSinkDesc.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName), + new VectorizationCondition( + !vectorReduceSinkDesc.getAcidChange(), + "Not ACID UPDATE or DELETE"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasBuckets(), + "No buckets"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasTopN(), + "No TopN"), + new VectorizationCondition( + vectorReduceSinkDesc.getUseUniformHash(), + "Uniform Hash"), + new VectorizationCondition( + !vectorReduceSinkDesc.getHasDistinctColumns(), + "No DISTINCT columns"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsKeyBinarySortable(), + "BinarySortableSerDe for keys"), + new VectorizationCondition( + vectorReduceSinkDesc.getIsValueLazyBinary(), + "LazyBinarySerDe for values") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getNativeConditionsMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getNativeConditionsNotMet() { + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new ReduceSinkOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 72fc4ca..f4ab2a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -19,17 +19,23 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -89,6 +95,9 @@ public class ReduceWork extends BaseWork { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; + private boolean reduceVectorizationEnabled; + private String vectorReduceEngine; + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -142,7 +151,8 @@ public class ReduceWork extends BaseWork { this.tagToValueDesc = tagToValueDesc; } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -160,7 +170,8 @@ public class ReduceWork extends BaseWork { return null; } - @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, + vectorization = Vectorization.OPERATOR_PATH) public Operator<?> getReducer() { return reducer; } @@ -252,4 +263,81 @@ public class ReduceWork extends BaseWork { public void setMaxReduceTasks(int maxReduceTasks) { this.maxReduceTasks = maxReduceTasks; } + + public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) { + this.reduceVectorizationEnabled = reduceVectorizationEnabled; + } + + public boolean getReduceVectorizationEnabled() { + return reduceVectorizationEnabled; + } + + public void setVectorReduceEngine(String vectorReduceEngine) { + this.vectorReduceEngine = vectorReduceEngine; + } + + public String getVectorReduceEngine() { + return vectorReduceEngine; + } + + // Use LinkedHashSet to give predictable display order. + private static Set<String> reduceVectorizableEngines = + new LinkedHashSet<String>(Arrays.asList("tez", "spark")); + + public class ReduceExplainVectorization extends BaseExplainVectorization { + + private final ReduceWork reduceWork; + + private VectorizationCondition[] reduceVectorizationConditions; + + public ReduceExplainVectorization(ReduceWork reduceWork) { + super(reduceWork); + this.reduceWork = reduceWork; + } + + private VectorizationCondition[] createReduceExplainVectorizationConditions() { + + boolean enabled = reduceWork.getReduceVectorizationEnabled(); + + String engine = reduceWork.getVectorReduceEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines; + + boolean engineInSupported = reduceVectorizableEngines.contains(engine); + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname), + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName) + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getEnableConditionsMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(reduceVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getEnableConditionsNotMet() { + if (reduceVectorizationConditions == null) { + reduceVectorizationConditions = createReduceExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public ReduceExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new ReduceExplainVectorization(this); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index 67a8327..0601ce0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -19,8 +19,11 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; + import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -135,4 +138,36 @@ public class SelectDesc extends AbstractOperatorDesc { public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } + + + public class SelectOperatorExplainVectorization extends OperatorExplainVectorization { + + private final SelectDesc selectDesc; + private final VectorSelectDesc vectorSelectDesc; + + public SelectOperatorExplainVectorization(SelectDesc selectDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.selectDesc = selectDesc; + vectorSelectDesc = (VectorSelectDesc) vectorDesc; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "selectExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List<String> getSelectExpressions() { + return vectorExpressionsToStringList(vectorSelectDesc.getSelectExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProjectedOutputColumns() { + return Arrays.toString(vectorSelectDesc.getProjectedOutputColumns()); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Select Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SelectOperatorExplainVectorization getSelectVectorization() { + if (vectorDesc == null) { + return null; + } + return new SelectOperatorExplainVectorization(this, vectorDesc); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java index 8833ae3..260bc07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + /** * Map Join operator Descriptor implementation. * @@ -43,4 +46,26 @@ public class SparkHashTableSinkDesc extends HashTableSinkDesc { public void setTag(byte tag) { this.tag = tag; } + + public class SparkHashTableSinkOperatorExplainVectorization extends OperatorExplainVectorization { + + private final HashTableSinkDesc filterDesc; + private final VectorSparkHashTableSinkDesc vectorHashTableSinkDesc; + + public SparkHashTableSinkOperatorExplainVectorization(HashTableSinkDesc filterDesc, VectorDesc vectorDesc) { + // Native vectorization supported. + super(vectorDesc, true); + this.filterDesc = filterDesc; + vectorHashTableSinkDesc = (VectorSparkHashTableSinkDesc) vectorDesc; + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Spark Hash Table Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public SparkHashTableSinkOperatorExplainVectorization getHashTableSinkVectorization() { + if (vectorDesc == null) { + return null; + } + return new SparkHashTableSinkOperatorExplainVectorization(this, vectorDesc); + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index bb5dd79..066e32d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; - import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -34,6 +33,7 @@ import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import com.google.common.base.Preconditions; @@ -43,7 +43,7 @@ import com.google.common.base.Preconditions; * roots and and ReduceWork at all other nodes. */ @SuppressWarnings("serial") -@Explain(displayName = "Spark") +@Explain(displayName = "Spark", vectorization = Vectorization.SUMMARY_PATH) public class SparkWork extends AbstractOperatorDesc { private static int counter; private final String name; @@ -76,7 +76,7 @@ public class SparkWork extends AbstractOperatorDesc { /** * @return a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices") + @Explain(displayName = "Vertices", vectorization = Vectorization.SUMMARY_PATH) public Map<String, BaseWork> getWorkMap() { Map<String, BaseWork> result = new LinkedHashMap<String, BaseWork>(); for (BaseWork w: getAllWork()) {