http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java new file mode 100644 index 0000000..78d543a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingLongAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[0] = avg; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + nonNullGroupCount = 0; + avg = 0; + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java new file mode 100644 index 0000000..94d19b3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long max; + + public VectorPTFEvaluatorStreamingLongMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMax = longColVector.vector[0]; + + if (isNull) { + max = repeatedMax; + isNull = false; + } else if (repeatedMax > max) { + max = repeatedMax; + } + outputVector[0] = max; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[0] = max; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + max = firstValue; + isNull = false; + } else if (firstValue > max) { + max = firstValue; + } + + // Output row i max. + outputVector[i++] = max; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = 0; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java new file mode 100644 index 0000000..2d7caf3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long min; + + public VectorPTFEvaluatorStreamingLongMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMin = longColVector.vector[0]; + + if (isNull) { + min = repeatedMin; + isNull = false; + } else if (repeatedMin < min) { + min = repeatedMin; + } + outputVector[0] = min; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[0] = min; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + outputVector[i] = min; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + min = firstValue; + isNull = false; + } else if (firstValue < min) { + min = firstValue; + } + + // Output row i min. + outputVector[i++] = min; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + + // Output row i min. + outputVector[i] = min; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = 0; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java new file mode 100644 index 0000000..76bca6b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + + public VectorPTFEvaluatorStreamingLongSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final long repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[0] = sum; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + + // Output row i sum. + outputVector[i++] = sum; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index ff89775..b0340d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -162,7 +162,10 @@ public class VectorPTFGroupBatches { // Streaming evaluators fill in their results during the evaluate call. for (VectorPTFEvaluatorBase evaluator : evaluators) { - evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + evaluator.evaluateGroupBatch(batch); + if (isLastGroupBatch) { + evaluator.doLastBatchWork(); + } } } @@ -170,7 +173,10 @@ public class VectorPTFGroupBatches { throws HiveException { for (VectorPTFEvaluatorBase evaluator : evaluators) { - evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + evaluator.evaluateGroupBatch(batch); + if (isLastGroupBatch) { + evaluator.doLastBatchWork(); + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 9bb104d..1956125 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -228,6 +228,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -2823,6 +2824,18 @@ public class Vectorizer implements PhysicalPlanResolver { setOperatorIssue(functionName + " only UNBOUNDED start frame is supported"); return false; } + List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i]; + final boolean isSingleParameter = + (exprNodeDescList != null && + exprNodeDescList.size() == 1); + final ExprNodeDesc singleExprNodeDesc = + (isSingleParameter ? exprNodeDescList.get(0) : null); + final TypeInfo singleTypeInfo = + (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null); + final PrimitiveCategory singlePrimitiveCategory = + (singleTypeInfo instanceof PrimitiveTypeInfo ? + ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null); + switch (windowFrameDef.getWindowType()) { case RANGE: if (!windowFrameDef.getEnd().isCurrentRow()) { @@ -2831,15 +2844,25 @@ public class Vectorizer implements PhysicalPlanResolver { } break; case ROWS: - if (!windowFrameDef.isEndUnbounded()) { - setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type"); - return false; + { + boolean isRowEndCurrent = + (windowFrameDef.getEnd().isCurrentRow() && + (supportedFunctionType == SupportedFunctionType.AVG || + supportedFunctionType == SupportedFunctionType.MAX || + supportedFunctionType == SupportedFunctionType.MIN || + supportedFunctionType == SupportedFunctionType.SUM) && + isSingleParameter && + singlePrimitiveCategory != null); + if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) { + setOperatorIssue( + functionName + " UNBOUNDED end frame is required for ROWS window type"); + return false; + } } break; default: throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); } - List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i]; if (exprNodeDescList != null && exprNodeDescList.size() > 1) { setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); return false; http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java index 830b8c8..53886fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -50,6 +50,19 @@ import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMin; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongSum; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -138,27 +151,46 @@ public class VectorPTFDesc extends AbstractVectorDesc { WindowFrameDef windowFrameDef, Type columnVectorType, VectorExpression inputVectorExpression, int outputColumnNum) { + final boolean isRowEndCurrent = + (windowFrameDef.getWindowType() == WindowType.ROWS && + windowFrameDef.getEnd().isCurrentRow()); + VectorPTFEvaluatorBase evaluator; switch (functionType) { case ROW_NUMBER: - evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); break; case RANK: - evaluator = new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case DENSE_RANK: - evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case MIN: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -167,13 +199,25 @@ public class VectorPTFDesc extends AbstractVectorDesc { case MAX: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -182,13 +226,25 @@ public class VectorPTFDesc extends AbstractVectorDesc { case SUM: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -197,13 +253,25 @@ public class VectorPTFDesc extends AbstractVectorDesc { case AVG: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/ptf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index 808d8c8..3fa2655 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -563,7 +563,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1616,7 +1616,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1792,7 +1792,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2029,7 +2029,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3812,7 +3812,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index e16f843..9f49f2e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -659,7 +659,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -1381,7 +1381,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2106,7 +2106,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2781,16 +2781,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2829,13 +2841,32 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3439,16 +3470,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -3487,13 +3530,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4100,16 +4163,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double, bigint] Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -4148,13 +4223,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 3:double, col 3:double, col 3:double, col 3:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 7, 2, 1, 3] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [ConstantVectorExpression(val 0) -> 8:int] + streamingColumns: [4, 5, 6, 7] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/vector_windowing.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 6a132b8..cf6af00 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -68,16 +68,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -112,13 +124,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -938,7 +971,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1139,7 +1172,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1443,7 +1476,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1786,16 +1819,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1830,13 +1875,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1962,16 +2028,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2006,13 +2084,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5422,7 +5521,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Select Operator @@ -8349,16 +8448,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8385,15 +8496,38 @@ STAGE PLANS: name: min window function: GenericUDAFMinEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin] + functionInputExpressions: [col 3:double, col 3:double] + functionNames: [sum, min] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 0:string, col 1:string] + outputColumns: [4, 5, 1, 0, 2, 3] + outputTypes: [double, double, string, string, int, double] + streamingColumns: [4, 5] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: double), min_window_1 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) outputColumnNames: sum_window_0, min_window_1, _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 5, 1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [4, 5, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) Reducer 3 @@ -8777,16 +8911,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8807,13 +8953,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 3:double] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 1, 0, 2, 3] + outputTypes: [double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 2) -> 5:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9663,16 +9830,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [string, string] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, string] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int) outputColumnNames: _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -9693,13 +9872,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:int] + functionNames: [sum] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:int] + outputColumns: [2, 1] + outputTypes: [bigint, int] + partitionExpressions: [ConstantVectorExpression(val Manufacturer#6) -> 3:string] + streamingColumns: [2] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'Manufacturer#6' (type: string), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 2] + selectExpressions: ConstantVectorExpression(val Manufacturer#6) -> 4:string Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 2bb7730..5ea866b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -307,16 +307,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col4:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -344,13 +356,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:double, col 1:double] + functionNames: [rank, sum] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:double] + outputColumns: [3, 4, 0, 2, 1] + outputTypes: [int, double, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + selectExpressions: DoubleColSubtractDoubleScalar(col 4:double, val 5.0) -> 5:double Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1538,16 +1571,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1568,13 +1613,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string, col 0:string] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), avg_window_0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
