http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java deleted file mode 100644 index e0a6198..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java +++ /dev/null @@ -1,123 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.physical; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -/** - * Why a node did not vectorize. - * - */ -public class VectorizerReason { - - private static long serialVersionUID = 1L; - - public static enum VectorizerNodeIssue { - NONE, - NODE_ISSUE, - OPERATOR_ISSUE, - EXPRESSION_ISSUE - } - - private final VectorizerNodeIssue vectorizerNodeIssue; - - private final Operator<? extends OperatorDesc> operator; - - private final String expressionTitle; - - private final String issue; - - private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue, - Operator<? extends OperatorDesc> operator, String expressionTitle, String issue) { - this.vectorizerNodeIssue = vectorizerNodeIssue; - this.operator = operator; - this.expressionTitle = expressionTitle; - this.issue = issue; - } - - public static VectorizerReason createNodeIssue(String issue) { - return new VectorizerReason( - VectorizerNodeIssue.NODE_ISSUE, - null, - null, - issue); - } - - public static VectorizerReason createOperatorIssue(Operator<? extends OperatorDesc> operator, - String issue) { - return new VectorizerReason( - VectorizerNodeIssue.OPERATOR_ISSUE, - operator, - null, - issue); - } - - public static VectorizerReason createExpressionIssue(Operator<? extends OperatorDesc> operator, - String expressionTitle, String issue) { - return new VectorizerReason( - VectorizerNodeIssue.EXPRESSION_ISSUE, - operator, - expressionTitle, - issue); - } - - @Override - public VectorizerReason clone() { - return new VectorizerReason(vectorizerNodeIssue, operator, expressionTitle, issue); - } - - public VectorizerNodeIssue getVectorizerNodeIssue() { - return vectorizerNodeIssue; - } - - public Operator<? extends OperatorDesc> getOperator() { - return operator; - } - - public String getExpressionTitle() { - return expressionTitle; - } - - public String getIssue() { - return issue; - } - - @Override - public String toString() { - String reason; - switch (vectorizerNodeIssue) { - case NODE_ISSUE: - reason = (issue == null ? "unknown" : issue); - break; - case OPERATOR_ISSUE: - reason = (operator == null ? "Unknown" : operator.getType()) + " operator: " + - (issue == null ? "unknown" : issue); - break; - case EXPRESSION_ISSUE: - reason = expressionTitle + " expression for " + - (operator == null ? "Unknown" : operator.getType()) + " operator: " + - (issue == null ? "unknown" : issue); - break; - default: - reason = "Unknown " + vectorizerNodeIssue; - } - return reason; - } -}
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 1f118dc..4a8ff15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -27,27 +27,12 @@ import org.apache.hadoop.fs.Path; */ public class ExplainConfiguration { - - public enum VectorizationDetailLevel { - - SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1); - - public final int rank; - VectorizationDetailLevel(int rank) { - this.rank = rank; - } - }; - private boolean extended = false; private boolean formatted = false; private boolean dependency = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; - private boolean vectorization = false; - private boolean vectorizationOnly = false; - private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; - private Path explainRootPath; private Map<String, Long> opIdToRuntimeNumRows; @@ -113,30 +98,6 @@ public class ExplainConfiguration { this.userLevelExplain = userLevelExplain; } - public boolean isVectorization() { - return vectorization; - } - - public void setVectorization(boolean vectorization) { - this.vectorization = vectorization; - } - - public boolean isVectorizationOnly() { - return vectorizationOnly; - } - - public void setVectorizationOnly(boolean vectorizationOnly) { - this.vectorizationOnly = vectorizationOnly; - } - - public VectorizationDetailLevel getVectorizationDetailLevel() { - return vectorizationDetailLevel; - } - - public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDetailLevel) { - this.vectorizationDetailLevel = vectorizationDetailLevel; - } - public Path getExplainRootPath() { return explainRootPath; } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index f62cf9a..300542e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -45,7 +45,6 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -71,9 +70,7 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { @SuppressWarnings("unchecked") @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - final int childCount = ast.getChildCount(); - int i = 1; // Skip TOK_QUERY. - while (i < childCount) { + for (int i = 1; i < ast.getChildCount(); i++) { int explainOptions = ast.getChild(i).getType(); if (explainOptions == HiveParser.KW_FORMATTED) { config.setFormatted(true); @@ -88,40 +85,7 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { } else if (explainOptions == HiveParser.KW_ANALYZE) { config.setAnalyze(AnalyzeState.RUNNING); config.setExplainRootPath(ctx.getMRTmpPath()); - } else if (explainOptions == HiveParser.KW_VECTORIZATION) { - config.setVectorization(true); - if (i + 1 < childCount) { - int vectorizationOption = ast.getChild(i + 1).getType(); - - // [ONLY] - if (vectorizationOption == HiveParser.TOK_ONLY) { - config.setVectorizationOnly(true); - i++; - if (i + 1 >= childCount) { - break; - } - vectorizationOption = ast.getChild(i + 1).getType(); - } - - // [SUMMARY|OPERATOR|EXPRESSION|DETAIL] - if (vectorizationOption == HiveParser.TOK_SUMMARY) { - config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY); - i++; - } else if (vectorizationOption == HiveParser.TOK_OPERATOR) { - config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR); - i++; - } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) { - config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION); - i++; - } else if (vectorizationOption == HiveParser.TOK_DETAIL) { - config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL); - i++; - } - } - } else { - // UNDONE: UNKNOWN OPTION? } - i++; } ctx.setExplainConfig(config); http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 025ea10..5d3fa6a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -335,11 +335,6 @@ KW_KEY: 'KEY'; KW_ABORT: 'ABORT'; KW_EXTRACT: 'EXTRACT'; KW_FLOOR: 'FLOOR'; -KW_VECTORIZATION: 'VECTORIZATION'; -KW_SUMMARY: 'SUMMARY'; -KW_OPERATOR: 'OPERATOR'; -KW_EXPRESSION: 'EXPRESSION'; -KW_DETAIL: 'DETAIL'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index eebd875..5c16c55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -380,11 +380,6 @@ TOK_ROLLBACK; TOK_SET_AUTOCOMMIT; TOK_CACHE_METADATA; TOK_ABORT_TRANSACTIONS; -TOK_ONLY; -TOK_SUMMARY; -TOK_OPERATOR; -TOK_EXPRESSION; -TOK_DETAIL; } @@ -722,28 +717,7 @@ explainStatement explainOption @init { msgs.push("explain option"); } @after { msgs.pop(); } - : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE| - (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) - ; - -vectorizationOnly -@init { pushMsg("vectorization's only clause", state); } -@after { popMsg(state); } - : KW_ONLY - -> ^(TOK_ONLY) - ; - -vectorizatonDetail -@init { pushMsg("vectorization's detail level clause", state); } -@after { popMsg(state); } - : KW_SUMMARY - -> ^(TOK_SUMMARY) - | KW_OPERATOR - -> ^(TOK_OPERATOR) - | KW_EXPRESSION - -> ^(TOK_EXPRESSION) - | KW_DETAIL - -> ^(TOK_DETAIL) + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION|KW_ANALYZE ; execStatement http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 13f6879..50987c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -719,12 +719,6 @@ nonReserved | KW_VALIDATE | KW_NOVALIDATE | KW_KEY - | KW_VECTORIZATION - | KW_SUMMARY - | KW_OPERATOR - | KW_EXPRESSION - | KW_DETAIL - ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index 2c14203..e217bdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -29,10 +29,6 @@ import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; - - // Extra parameters only for vectorization. - protected VectorDesc vectorDesc; - protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map<String, String> opProps; @@ -68,14 +64,6 @@ public class AbstractOperatorDesc implements OperatorDesc { this.vectorMode = vm; } - public void setVectorDesc(VectorDesc vectorDesc) { - this.vectorDesc = vectorDesc; - } - - public VectorDesc getVectorDesc() { - return vectorDesc; - } - @Override public OpTraits getTraits() { return opTraits; http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java index 4304b11..5157ebd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java @@ -18,24 +18,10 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.exec.Operator; - public class AbstractVectorDesc implements VectorDesc { - private static long serialVersionUID = 1L; - - private Class<?> vectorOpClass; - @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException("clone not supported"); } - - public void setVectorOp(Class<?> vectorOpClass) { - this.vectorOpClass = vectorOpClass; - } - - public Class<?> getVectorOpClass() { - return vectorOpClass; - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java index c5294f0..264f959 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AppMasterEventDesc.java @@ -19,10 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.io.IOException; -import java.util.List; -import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.io.DataOutputBuffer; @@ -63,25 +60,4 @@ public class AppMasterEventDesc extends AbstractOperatorDesc { public void writeEventHeader(DataOutputBuffer buffer) throws IOException { // nothing to add } - - public class AppMasterEventOperatorExplainVectorization extends OperatorExplainVectorization { - - private final AppMasterEventDesc appMasterEventDesc; - private final VectorAppMasterEventDesc vectorAppMasterEventDesc; - - public AppMasterEventOperatorExplainVectorization(AppMasterEventDesc appMasterEventDesc, VectorDesc vectorDesc) { - // Native vectorization supported. - super(vectorDesc, true); - this.appMasterEventDesc = appMasterEventDesc; - vectorAppMasterEventDesc = (VectorAppMasterEventDesc) vectorDesc; - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "App Master Event Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public AppMasterEventOperatorExplainVectorization getAppMasterEventVectorization() { - if (vectorDesc == null) { - return null; - } - return new AppMasterEventOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index b061d5e..13a0811 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; @@ -34,9 +33,7 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -68,25 +65,12 @@ public abstract class BaseWork extends AbstractOperatorDesc { private String name; - /* - * Vectorization. - */ - - // This will be true if a node was examined by the Vectorizer class. - protected boolean vectorizationExamined; - - protected boolean vectorizationEnabled; + // Vectorization. protected VectorizedRowBatchCtx vectorizedRowBatchCtx; protected boolean useVectorizedInputFileFormat; - private VectorizerReason notVectorizedReason; - - private boolean groupByVectorOutput; - private boolean allNative; - private boolean usesVectorUDFAdaptor; - protected boolean llapMode = false; protected boolean uberMode = false; @@ -179,22 +163,6 @@ public abstract class BaseWork extends AbstractOperatorDesc { // ----------------------------------------------------------------------------------------------- - public void setVectorizationExamined(boolean vectorizationExamined) { - this.vectorizationExamined = vectorizationExamined; - } - - public boolean getVectorizationExamined() { - return vectorizationExamined; - } - - public void setVectorizationEnabled(boolean vectorizationEnabled) { - this.vectorizationEnabled = vectorizationEnabled; - } - - public boolean getVectorizationEnabled() { - return vectorizationEnabled; - } - /* * The vectorization context for creating the VectorizedRowBatch for the node. */ @@ -206,160 +174,23 @@ public abstract class BaseWork extends AbstractOperatorDesc { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - public void setNotVectorizedReason(VectorizerReason notVectorizedReason) { - this.notVectorizedReason = notVectorizedReason; - } - - public VectorizerReason getNotVectorizedReason() { - return notVectorizedReason; - } - - public void setGroupByVectorOutput(boolean groupByVectorOutput) { - this.groupByVectorOutput = groupByVectorOutput; - } - - public boolean getGroupByVectorOutput() { - return groupByVectorOutput; - } - - public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) { - this.usesVectorUDFAdaptor = usesVectorUDFAdaptor; - } - - public boolean getUsesVectorUDFAdaptor() { - return usesVectorUDFAdaptor; - } - - public void setAllNative(boolean allNative) { - this.allNative = allNative; + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; } - public boolean getAllNative() { - return allNative; + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; } - public static class BaseExplainVectorization { - - private final BaseWork baseWork; - - public BaseExplainVectorization(BaseWork baseWork) { - this.baseWork = baseWork; - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public boolean enabled() { - return baseWork.getVectorizationEnabled(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public Boolean vectorized() { - if (!baseWork.getVectorizationEnabled()) { - return null; - } - return baseWork.getVectorMode(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String notVectorizedReason() { - if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { - return null; - } - VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); - if (notVectorizedReason == null) { - return "Unknown"; - } - return notVectorizedReason.toString(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "groupByVectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public Boolean groupByRowOutputCascade() { - if (!baseWork.getVectorMode()) { - return null; - } - return baseWork.getGroupByVectorOutput(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public Boolean nativeVectorized() { - if (!baseWork.getVectorMode()) { - return null; - } - return baseWork.getAllNative(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public Boolean usesVectorUDFAdaptor() { - if (!baseWork.getVectorMode()) { - return null; - } - return baseWork.getUsesVectorUDFAdaptor(); - } - - public static class RowBatchContextExplainVectorization { - - private final VectorizedRowBatchCtx vectorizedRowBatchCtx; - - public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) { - this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; - } - - private List<String> getColumns(int startIndex, int count) { - String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); - List<String> result = new ArrayList<String>(count); - final int end = startIndex + count; - for (int i = startIndex; i < end; i++) { - result.add(rowColumnNames[i] + ":" + rowColumnTypeInfos[i]); - } - return result; - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getDataColumns() { - return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getPartitionColumns() { - return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getDataColumnNums() { - int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); - if (dataColumnNums == null) { - return null; - } - return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public int getDataColumnCount() { - return vectorizedRowBatchCtx.getDataColumnCount(); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public int getPartitionColumnCount() { - return vectorizedRowBatchCtx.getPartitionColumnCount(); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getScratchColumnTypeNames() { - return Arrays.asList(vectorizedRowBatchCtx.getScratchColumnTypeNames()); - } - - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public RowBatchContextExplainVectorization vectorizedRowBatchContext() { - if (!baseWork.getVectorMode()) { - return null; - } - return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx()); - } - } - - // ----------------------------------------------------------------------------------------------- /** http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index 7b16ad7..b0b6c3a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -45,33 +45,4 @@ public @interface Explain { boolean displayOnlyOnTrue() default false; boolean skipHeader() default false; - - // By default, many existing @Explain classes/methods are NON_VECTORIZED. - // - // Vectorized methods/classes have detail levels: - // SUMMARY, OPERATOR, EXPRESSION, or DETAIL. - // As you go to the right you get more detail and the information for the previous level(s) is - // included. The default is SUMMARY. - // - // The "path" enumerations are used to mark methods/classes that lead to vectorization specific - // ones so we can avoid displaying headers for things that have no vectorization information - // below. - // - // For example, the TezWork class is marked SUMMARY_PATH because it leads to both - // SUMMARY and OPERATOR methods/classes. And, MapWork.getAllRootOperators is marked OPERATOR_PATH - // because we only display operator information for OPERATOR. - // - // EXPRESSION and DETAIL typically live inside SUMMARY or OPERATOR classes. - // - public enum Vectorization { - SUMMARY_PATH(4), OPERATOR_PATH(3), - SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1), - NON_VECTORIZED(Integer.MAX_VALUE); - - public final int rank; - Vectorization(int rank) { - this.rank = rank; - } - }; - Vectorization vectorization() default Vectorization.NON_VECTORIZED; } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 805357c..9f4767c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; import org.apache.hadoop.hive.ql.parse.ParseContext; /** @@ -118,18 +117,6 @@ public class ExplainWork implements Serializable { return config.isFormatted(); } - public boolean isVectorization() { - return config.isVectorization(); - } - - public boolean isVectorizationOnly() { - return config.isVectorizationOnly(); - } - - public VectorizationDetailLevel isVectorizationDetailLevel() { - return config.getVectorizationDetailLevel(); - } - public ParseContext getParseContext() { return pCtx; } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 3c69f69..8ea6440 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -22,7 +22,6 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.fs.Path; @@ -31,17 +30,14 @@ import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; -import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -311,43 +307,4 @@ public class FetchWork implements Serializable { return ret; } - - // ----------------------------------------------------------------------------------------------- - - private boolean vectorizationExamined; - - public void setVectorizationExamined(boolean vectorizationExamined) { - this.vectorizationExamined = vectorizationExamined; - } - - public boolean getVectorizationExamined() { - return vectorizationExamined; - } - - public class FetchExplainVectorization { - - private final FetchWork fetchWork; - - public FetchExplainVectorization(FetchWork fetchWork) { - this.fetchWork = fetchWork; - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public boolean enabled() { - return false; - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> enabledConditionsNotMet() { - return VectorizationCondition.getConditionsSupported(false); - } - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "Fetch Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public FetchExplainVectorization getMapExplainVectorization() { - if (!getVectorizationExamined()) { - return null; - } - return new FetchExplainVectorization(this); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index bbc5f10..07ed4fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + /** * FileSinkDesc. @@ -474,19 +474,4 @@ public class FileSinkDesc extends AbstractOperatorDesc { this.statsTmpDir = statsCollectionTempDir; } - public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { - - public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { - // Native vectorization not supported. - super(vectorDesc, false); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "File Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public FileSinkOperatorExplainVectorization getFileSinkVectorization() { - if (vectorDesc == null) { - return null; - } - return new FileSinkOperatorExplainVectorization(vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index ff69775..fa20798 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; @@ -178,7 +177,6 @@ public class FilterDesc extends AbstractOperatorDesc { this.syntheticJoinPredicate = syntheticJoinPredicate; } - @Override public Object clone() { FilterDesc filterDesc = new FilterDesc(getPredicate().clone(), getIsSamplingPred()); @@ -188,30 +186,4 @@ public class FilterDesc extends AbstractOperatorDesc { filterDesc.setSortedFilter(isSortedFilter()); return filterDesc; } - - public class FilterOperatorExplainVectorization extends OperatorExplainVectorization { - - private final FilterDesc filterDesc; - private final VectorFilterDesc vectorFilterDesc; - - public FilterOperatorExplainVectorization(FilterDesc filterDesc, VectorDesc vectorDesc) { - // Native vectorization supported. - super(vectorDesc, true); - this.filterDesc = filterDesc; - vectorFilterDesc = (VectorFilterDesc) vectorDesc; - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "predicateExpression", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getPredicateExpression() { - return vectorFilterDesc.getPredicateExpression().toString(); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Filter Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public FilterOperatorExplainVectorization getFilterVectorization() { - if (vectorDesc == null) { - return null; - } - return new FilterOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 204277e..99791e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -19,18 +19,13 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** @@ -76,8 +71,11 @@ public class GroupByDesc extends AbstractOperatorDesc { transient private boolean isDistinct; private boolean dontResetAggrsDistinct; + // Extra parameters only for vectorization. + private VectorGroupByDesc vectorDesc; + public GroupByDesc() { - vectorDesc = null; + vectorDesc = new VectorGroupByDesc(); } public GroupByDesc( @@ -108,7 +106,7 @@ public class GroupByDesc extends AbstractOperatorDesc { final boolean groupingSetsPresent, final int groupingSetsPosition, final boolean isDistinct) { - vectorDesc = null; + vectorDesc = new VectorGroupByDesc(); this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -122,6 +120,14 @@ public class GroupByDesc extends AbstractOperatorDesc { this.isDistinct = isDistinct; } + public void setVectorDesc(VectorGroupByDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorGroupByDesc getVectorDesc() { + return vectorDesc; + } + public Mode getMode() { return mode; } @@ -305,66 +311,4 @@ public class GroupByDesc extends AbstractOperatorDesc { this.isDistinct = isDistinct; } - public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { - - private final GroupByDesc groupByDesc; - private final VectorGroupByDesc vectorGroupByDesc; - - public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) { - // Native vectorization not supported. - super(vectorDesc, false); - this.groupByDesc = groupByDesc; - vectorGroupByDesc = (VectorGroupByDesc) vectorDesc; - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getKeysExpression() { - return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getAggregators() { - VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); - List<String> vecAggrList = new ArrayList<String>(vecAggregators.length); - for (VectorAggregateExpression vecAggr : vecAggregators) { - vecAggrList.add(vecAggr.toString()); - } - return vecAggrList; - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public boolean getGroupByRowOutputCascade() { - return vectorGroupByDesc.isVectorOutput(); - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getVectorOutputConditionsNotMet() { - List<String> results = new ArrayList<String>(); - VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); - for (VectorAggregateExpression vecAggr : vecAggregators) { - Category category = Vectorizer.aggregationOutputCategory(vecAggr); - if (category != ObjectInspector.Category.PRIMITIVE) { - results.add( - "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); - } - } - if (results.size() == 0) { - return null; - } - return results; - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumns() { - return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public GroupByOperatorExplainVectorization getGroupByVectorization() { - if (vectorDesc == null) { - return null; - } - return new GroupByOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index a338319..94ac41e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -26,7 +26,6 @@ import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index 45ec431..8448a41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -17,10 +17,7 @@ */ package org.apache.hadoop.hive.ql.plan; -import java.util.List; - import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -76,19 +73,4 @@ public class LimitDesc extends AbstractOperatorDesc { this.leastRows = leastRows; } - public class LimitOperatorExplainVectorization extends OperatorExplainVectorization { - - public LimitOperatorExplainVectorization(LimitDesc limitDesc, VectorDesc vectorDesc) { - // Native vectorization supported. - super(vectorDesc, true); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Limit Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public LimitOperatorExplainVectorization getLimitVectorization() { - if (vectorDesc == null) { - return null; - } - return new LimitOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 3633fde..ec35860 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -20,24 +20,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; /** * Map Join operator Descriptor implementation. @@ -83,16 +73,17 @@ public class MapJoinDesc extends JoinDesc implements Serializable { private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; + // Extra parameters only for vectorization. + private VectorMapJoinDesc vectorDesc; + public MapJoinDesc() { - vectorDesc = null; + vectorDesc = new VectorMapJoinDesc(); bigTableBucketNumMapping = new LinkedHashMap<String, Integer>(); } public MapJoinDesc(MapJoinDesc clone) { super(clone); - if (clone.vectorDesc != null) { - throw new RuntimeException("Clone with vectorization desc not supported"); - } + vectorDesc = new VectorMapJoinDesc(clone.vectorDesc); this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; @@ -117,7 +108,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable { final int posBigTable, final JoinCondDesc[] conds, final Map<Byte, List<ExprNodeDesc>> filters, boolean noOuterJoin, String dumpFilePrefix) { super(values, outputColumnNames, noOuterJoin, conds, filters, null); - vectorDesc = null; + vectorDesc = new VectorMapJoinDesc(); this.keys = keys; this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; @@ -128,6 +119,14 @@ public class MapJoinDesc extends JoinDesc implements Serializable { initRetainExprList(); } + public void setVectorDesc(VectorMapJoinDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorMapJoinDesc getVectorDesc() { + return vectorDesc; + } + private void initRetainExprList() { retainList = new HashMap<Byte, List<Integer>>(); Set<Entry<Byte, List<ExprNodeDesc>>> set = super.getExprs().entrySet(); @@ -389,193 +388,4 @@ public class MapJoinDesc extends JoinDesc implements Serializable { public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } - - // Use LinkedHashSet to give predictable display order. - private static Set<String> vectorizableMapJoinNativeEngines = - new LinkedHashSet<String>(Arrays.asList("tez", "spark")); - - public class MapJoinOperatorExplainVectorization extends OperatorExplainVectorization { - - private final MapJoinDesc mapJoinDesc; - private final VectorMapJoinDesc vectorMapJoinDesc; - private final VectorMapJoinInfo vectorMapJoinInfo; - - private VectorizationCondition[] nativeConditions; - - public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { - // VectorMapJoinOperator is not native vectorized. - super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); - this.mapJoinDesc = mapJoinDesc; - vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; - vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); - } - - private VectorizationCondition[] createNativeConditions() { - - boolean enabled = vectorMapJoinDesc.getIsVectorizationMapJoinNativeEnabled(); - - String engine = vectorMapJoinDesc.getEngine(); - String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; - boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); - - boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); - boolean isHybridHashJoin = vectorMapJoinDesc.getIsHybridHashJoin(); - - boolean whenFastHashTableThenNoHybrid = - (!isFastHashTableEnabled ? true : !isHybridHashJoin); - - VectorizationCondition[] conditions = new VectorizationCondition[] { - new VectorizationCondition( - enabled, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED.varname), - new VectorizationCondition( - engineInSupported, - engineInSupportedCondName), - new VectorizationCondition( - vectorMapJoinDesc.getOneMapJoinCondition(), - "One MapJoin Condition"), - new VectorizationCondition( - !vectorMapJoinDesc.getHasNullSafes(), - "No nullsafe"), - new VectorizationCondition( - vectorMapJoinDesc.getSupportsKeyTypes(), - "Supports Key Types"), - new VectorizationCondition( - !vectorMapJoinDesc.getIsEmptyKey(), - "Not empty key"), - new VectorizationCondition( - whenFastHashTableThenNoHybrid, - "When Fast Hash Table, then requires no Hybrid Hash Join"), - new VectorizationCondition( - vectorMapJoinDesc.getSmallTableExprVectorizes(), - "Small table vectorizes"), - }; - return conditions; - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getNativeConditionsMet() { - if (nativeConditions == null) { - nativeConditions = createNativeConditions(); - } - return VectorizationCondition.getConditionsMet(nativeConditions); - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getNativeConditionsNotMet() { - if (nativeConditions == null) { - nativeConditions = createNativeConditions(); - } - return VectorizationCondition.getConditionsNotMet(nativeConditions); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getBigTableKeyExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableKeyExpressions()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableKeyColumns() { - if (!isNative) { - return null; - } - int[] bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); - if (bigTableKeyColumnMap.length == 0) { - return null; - } - return Arrays.toString(bigTableKeyColumnMap); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getBigTableValueExpressions() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableValueExpressions()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableValueColumns() { - if (!isNative) { - return null; - } - int[] bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap(); - if (bigTableValueColumnMap.length == 0) { - return null; - } - return Arrays.toString(bigTableValueColumnMap); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getSmallTableColumns() { - if (!isNative) { - return null; - } - return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumns() { - if (!isNative) { - return null; - } - return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getBigTableOuterKey() { - if (!isNative || vectorMapJoinDesc.operatorVariation() != OperatorVariation.OUTER) { - return null; - } - return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableRetainedColumns() { - if (!isNative) { - return null; - } - return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getNativeNotSupportedKeyTypes() { - return vectorMapJoinDesc.getNotSupportedKeyTypes(); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public MapJoinOperatorExplainVectorization getMapJoinVectorization() { - if (vectorDesc == null || this instanceof SMBJoinDesc) { - return null; - } - return new MapJoinOperatorExplainVectorization(this, vectorDesc); - } - - public class SMBJoinOperatorExplainVectorization extends OperatorExplainVectorization { - - private final SMBJoinDesc smbJoinDesc; - private final VectorSMBJoinDesc vectorSMBJoinDesc; - - public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, VectorDesc vectorDesc) { - // Native vectorization NOT supported. - super(vectorDesc, false); - this.smbJoinDesc = smbJoinDesc; - vectorSMBJoinDesc = (VectorSMBJoinDesc) vectorDesc; - } - } - - // Handle dual nature. - @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { - if (vectorDesc == null || !(this instanceof SMBJoinDesc)) { - return null; - } - return new SMBJoinOperatorExplainVectorization((SMBJoinDesc) this, vectorDesc); - } - } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 081c511..5cc3663 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -26,7 +26,6 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -48,10 +47,8 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; -import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -143,12 +140,6 @@ public class MapWork extends BaseWork { private VectorizedRowBatch vectorizedRowBatch; - private VectorizerReason notEnabledInputFileFormatReason; - - private Set<String> vectorizationInputFileFormatClassNameSet; - private List<String> vectorizationEnabledConditionsMet; - private List<String> vectorizationEnabledConditionsNotMet; - // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -366,7 +357,7 @@ public class MapWork extends BaseWork { return nameToSplitSample; } - @Explain(displayName = "LLAP IO", vectorization = Vectorization.SUMMARY_PATH) + @Explain(displayName = "LLAP IO") public String getLlapIoDesc() { return llapIoDesc; } @@ -438,8 +429,7 @@ public class MapWork extends BaseWork { } } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -469,8 +459,7 @@ public class MapWork extends BaseWork { } @Override - @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.OPERATOR_PATH) + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Set<Operator<? extends OperatorDesc>> getAllRootOperators() { Set<Operator<?>> opSet = new LinkedHashSet<Operator<?>>(); @@ -727,86 +716,4 @@ public class MapWork extends BaseWork { public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } - - /* - * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable - * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated - * vectorizing this node. - * - * When Vectorized Input File Format looks at this flag, it can determine whether it should - * operate vectorized or not. In some modes, the node can be vectorized but use row - * serialization. - */ - public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { - this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; - } - - public boolean getUseVectorizedInputFileFormat() { - return useVectorizedInputFileFormat; - } - - public void setNotEnabledInputFileFormatReason(VectorizerReason notEnabledInputFileFormatReason) { - this.notEnabledInputFileFormatReason = notEnabledInputFileFormatReason; - } - - public VectorizerReason getNotEnabledInputFileFormatReason() { - return notEnabledInputFileFormatReason; - } - - public void setVectorizationInputFileFormatClassNameSet(Set<String> vectorizationInputFileFormatClassNameSet) { - this.vectorizationInputFileFormatClassNameSet = vectorizationInputFileFormatClassNameSet; - } - - public Set<String> getVectorizationInputFileFormatClassNameSet() { - return vectorizationInputFileFormatClassNameSet; - } - - public void setVectorizationEnabledConditionsMet(ArrayList<String> vectorizationEnabledConditionsMet) { - this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); - } - - public List<String> getVectorizationEnabledConditionsMet() { - return vectorizationEnabledConditionsMet; - } - - public void setVectorizationEnabledConditionsNotMet(List<String> vectorizationEnabledConditionsNotMet) { - this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); - } - - public List<String> getVectorizationEnabledConditionsNotMet() { - return vectorizationEnabledConditionsNotMet; - } - - public class MapExplainVectorization extends BaseExplainVectorization { - - private final MapWork mapWork; - - public MapExplainVectorization(MapWork mapWork) { - super(mapWork); - this.mapWork = mapWork; - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFileFormats", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public Set<String> inputFileFormats() { - return mapWork.getVectorizationInputFileFormatClassNameSet(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> enabledConditionsMet() { - return mapWork.getVectorizationEnabledConditionsMet(); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabledConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> enabledConditionsNotMet() { - return mapWork.getVectorizationEnabledConditionsNotMet(); - } - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "Map Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public MapExplainVectorization getMapExplainVectorization() { - if (!getVectorizationExamined()) { - return null; - } - return new MapExplainVectorization(this); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java index 76b5138..82143a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java @@ -32,15 +32,13 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index af9adc2..aa7f6ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -24,15 +24,14 @@ import java.util.List; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; + /** * MapredWork. * */ -@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -41,8 +40,7 @@ public class MapredWork extends AbstractOperatorDesc { private boolean finalMapRed; - @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) + @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public MapWork getMapWork() { return mapWork; } @@ -51,8 +49,7 @@ public class MapredWork extends AbstractOperatorDesc { this.mapWork = mapWork; } - @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) + @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ReduceWork getReduceWork() { return reduceWork; } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java deleted file mode 100644 index bdf9859..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; - -public class OperatorExplainVectorization { - - protected final VectorDesc vectorDesc; - - protected final boolean isNative; - - public OperatorExplainVectorization(VectorDesc vectorDesc, boolean isNative) { - this.vectorDesc = vectorDesc; - this.isNative = isNative; - } - - public List<String> vectorExpressionsToStringList(VectorExpression[] vectorExpressions) { - if (vectorExpressions == null) { - return null; - } - List<String> vecExprList = new ArrayList<String>(vectorExpressions.length); - for (VectorExpression vecExpr : vectorExpressions) { - vecExprList.add(vecExpr.toString()); - } - return vecExprList; - } - - public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) { - final int size = vectorColumnMapping.getCount(); - if (size == 0) { - return null; - } - int[] outputColumns = vectorColumnMapping.getOutputColumns(); - return Arrays.toString(outputColumns); - } - - public List<String> columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { - final int size = vectorColumnMapping.getCount(); - if (size == 0) { - return null; - } - int[] inputColumns = vectorColumnMapping.getInputColumns(); - int[] outputColumns = vectorColumnMapping.getOutputColumns(); - ArrayList<String> result = new ArrayList<String>(size); - for (int i = 0; i < size; i++) { - result.add(inputColumns[i] + " -> " + outputColumns[i]); - } - return result; - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "className", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getClassName() { - return vectorDesc.getVectorOpClass().getSimpleName(); - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "native", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public boolean getNative() { - return isNative; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index b8c2d42..d7e404c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -19,18 +19,11 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; -import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; -import java.util.Set; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -128,6 +121,9 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class); + // Extra parameters only for vectorization. + private VectorReduceSinkDesc vectorDesc; + public ReduceSinkDesc() { } @@ -191,6 +187,14 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { return desc; } + public void setVectorDesc(VectorReduceSinkDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorReduceSinkDesc getVectorDesc() { + return vectorDesc; + } + public java.util.ArrayList<java.lang.String> getOutputKeyColumnNames() { return outputKeyColumnNames; } @@ -486,105 +490,4 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { this.hasOrderBy = hasOrderBy; } - // Use LinkedHashSet to give predictable display order. - private static Set<String> vectorizableReduceSinkNativeEngines = - new LinkedHashSet<String>(Arrays.asList("tez", "spark")); - - public class ReduceSinkOperatorExplainVectorization extends OperatorExplainVectorization { - - private final ReduceSinkDesc reduceSinkDesc; - private final VectorReduceSinkDesc vectorReduceSinkDesc; - private final VectorReduceSinkInfo vectorReduceSinkInfo; - - private VectorizationCondition[] nativeConditions; - - public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, VectorDesc vectorDesc) { - // VectorReduceSinkOperator is not native vectorized. - super(vectorDesc, ((VectorReduceSinkDesc) vectorDesc).reduceSinkKeyType()!= ReduceSinkKeyType.NONE); - this.reduceSinkDesc = reduceSinkDesc; - vectorReduceSinkDesc = (VectorReduceSinkDesc) vectorDesc; - vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo(); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getKeyExpression() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkKeyExpressions()); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "valueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getValueExpression() { - if (!isNative) { - return null; - } - return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); - } - - private VectorizationCondition[] createNativeConditions() { - - boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled(); - - String engine = vectorReduceSinkDesc.getEngine(); - String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; - boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); - - VectorizationCondition[] conditions = new VectorizationCondition[] { - new VectorizationCondition( - enabled, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED.varname), - new VectorizationCondition( - engineInSupported, - engineInSupportedCondName), - new VectorizationCondition( - !vectorReduceSinkDesc.getAcidChange(), - "Not ACID UPDATE or DELETE"), - new VectorizationCondition( - !vectorReduceSinkDesc.getHasBuckets(), - "No buckets"), - new VectorizationCondition( - !vectorReduceSinkDesc.getHasTopN(), - "No TopN"), - new VectorizationCondition( - vectorReduceSinkDesc.getUseUniformHash(), - "Uniform Hash"), - new VectorizationCondition( - !vectorReduceSinkDesc.getHasDistinctColumns(), - "No DISTINCT columns"), - new VectorizationCondition( - vectorReduceSinkDesc.getIsKeyBinarySortable(), - "BinarySortableSerDe for keys"), - new VectorizationCondition( - vectorReduceSinkDesc.getIsValueLazyBinary(), - "LazyBinarySerDe for values") - }; - return conditions; - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getNativeConditionsMet() { - if (nativeConditions == null) { - nativeConditions = createNativeConditions(); - } - return VectorizationCondition.getConditionsMet(nativeConditions); - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getNativeConditionsNotMet() { - if (nativeConditions == null) { - nativeConditions = createNativeConditions(); - } - return VectorizationCondition.getConditionsNotMet(nativeConditions); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { - if (vectorDesc == null) { - return null; - } - return new ReduceSinkOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index f4ab2a0..72fc4ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -19,23 +19,17 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; -import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -95,9 +89,6 @@ public class ReduceWork extends BaseWork { private ObjectInspector keyObjectInspector = null; private ObjectInspector valueObjectInspector = null; - private boolean reduceVectorizationEnabled; - private String vectorReduceEngine; - /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -151,8 +142,7 @@ public class ReduceWork extends BaseWork { this.tagToValueDesc = tagToValueDesc; } - @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.SUMMARY_PATH) + @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getExecutionMode() { if (vectorMode) { if (llapMode) { @@ -170,8 +160,7 @@ public class ReduceWork extends BaseWork { return null; } - @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, - vectorization = Vectorization.OPERATOR_PATH) + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Operator<?> getReducer() { return reducer; } @@ -263,81 +252,4 @@ public class ReduceWork extends BaseWork { public void setMaxReduceTasks(int maxReduceTasks) { this.maxReduceTasks = maxReduceTasks; } - - public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) { - this.reduceVectorizationEnabled = reduceVectorizationEnabled; - } - - public boolean getReduceVectorizationEnabled() { - return reduceVectorizationEnabled; - } - - public void setVectorReduceEngine(String vectorReduceEngine) { - this.vectorReduceEngine = vectorReduceEngine; - } - - public String getVectorReduceEngine() { - return vectorReduceEngine; - } - - // Use LinkedHashSet to give predictable display order. - private static Set<String> reduceVectorizableEngines = - new LinkedHashSet<String>(Arrays.asList("tez", "spark")); - - public class ReduceExplainVectorization extends BaseExplainVectorization { - - private final ReduceWork reduceWork; - - private VectorizationCondition[] reduceVectorizationConditions; - - public ReduceExplainVectorization(ReduceWork reduceWork) { - super(reduceWork); - this.reduceWork = reduceWork; - } - - private VectorizationCondition[] createReduceExplainVectorizationConditions() { - - boolean enabled = reduceWork.getReduceVectorizationEnabled(); - - String engine = reduceWork.getVectorReduceEngine(); - String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines; - - boolean engineInSupported = reduceVectorizableEngines.contains(engine); - - VectorizationCondition[] conditions = new VectorizationCondition[] { - new VectorizationCondition( - enabled, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname), - new VectorizationCondition( - engineInSupported, - engineInSupportedCondName) - }; - return conditions; - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getEnableConditionsMet() { - if (reduceVectorizationConditions == null) { - reduceVectorizationConditions = createReduceExplainVectorizationConditions(); - } - return VectorizationCondition.getConditionsMet(reduceVectorizationConditions); - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getEnableConditionsNotMet() { - if (reduceVectorizationConditions == null) { - reduceVectorizationConditions = createReduceExplainVectorizationConditions(); - } - return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions); - } - } - - @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public ReduceExplainVectorization getReduceExplainVectorization() { - if (!getVectorizationExamined()) { - return null; - } - return new ReduceExplainVectorization(this); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index 0601ce0..67a8327 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -19,11 +19,8 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; - import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; /** @@ -138,36 +135,4 @@ public class SelectDesc extends AbstractOperatorDesc { public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } - - - public class SelectOperatorExplainVectorization extends OperatorExplainVectorization { - - private final SelectDesc selectDesc; - private final VectorSelectDesc vectorSelectDesc; - - public SelectOperatorExplainVectorization(SelectDesc selectDesc, VectorDesc vectorDesc) { - // Native vectorization supported. - super(vectorDesc, true); - this.selectDesc = selectDesc; - vectorSelectDesc = (VectorSelectDesc) vectorDesc; - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "selectExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List<String> getSelectExpressions() { - return vectorExpressionsToStringList(vectorSelectDesc.getSelectExpressions()); - } - - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumns() { - return Arrays.toString(vectorSelectDesc.getProjectedOutputColumns()); - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Select Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public SelectOperatorExplainVectorization getSelectVectorization() { - if (vectorDesc == null) { - return null; - } - return new SelectOperatorExplainVectorization(this, vectorDesc); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java index 260bc07..8833ae3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkHashTableSinkDesc.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - /** * Map Join operator Descriptor implementation. * @@ -46,26 +43,4 @@ public class SparkHashTableSinkDesc extends HashTableSinkDesc { public void setTag(byte tag) { this.tag = tag; } - - public class SparkHashTableSinkOperatorExplainVectorization extends OperatorExplainVectorization { - - private final HashTableSinkDesc filterDesc; - private final VectorSparkHashTableSinkDesc vectorHashTableSinkDesc; - - public SparkHashTableSinkOperatorExplainVectorization(HashTableSinkDesc filterDesc, VectorDesc vectorDesc) { - // Native vectorization supported. - super(vectorDesc, true); - this.filterDesc = filterDesc; - vectorHashTableSinkDesc = (VectorSparkHashTableSinkDesc) vectorDesc; - } - } - - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Spark Hash Table Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public SparkHashTableSinkOperatorExplainVectorization getHashTableSinkVectorization() { - if (vectorDesc == null) { - return null; - } - return new SparkHashTableSinkOperatorExplainVectorization(this, vectorDesc); - } - } http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index 066e32d..bb5dd79 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -33,7 +34,6 @@ import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import com.google.common.base.Preconditions; @@ -43,7 +43,7 @@ import com.google.common.base.Preconditions; * roots and and ReduceWork at all other nodes. */ @SuppressWarnings("serial") -@Explain(displayName = "Spark", vectorization = Vectorization.SUMMARY_PATH) +@Explain(displayName = "Spark") public class SparkWork extends AbstractOperatorDesc { private static int counter; private final String name; @@ -76,7 +76,7 @@ public class SparkWork extends AbstractOperatorDesc { /** * @return a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices", vectorization = Vectorization.SUMMARY_PATH) + @Explain(displayName = "Vertices") public Map<String, BaseWork> getWorkMap() { Map<String, BaseWork> result = new LinkedHashMap<String, BaseWork>(); for (BaseWork w: getAllWork()) {