HIVE-20245: Vectorization: Fix NULL / Wrong Results issues in BETWEEN / IN (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edc53cc0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edc53cc0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edc53cc0 Branch: refs/heads/master Commit: edc53cc0d95e983c371a224943dd866210f0c65c Parents: 65f02d2 Author: Matt McCline <[email protected]> Authored: Mon Jul 30 21:18:44 2018 -0500 Committer: Matt McCline <[email protected]> Committed: Mon Jul 30 21:18:44 2018 -0500 ---------------------------------------------------------------------- .../ExpressionTemplates/ColumnBetween.txt | 162 +++ .../Decimal64ColumnBetween.txt | 50 + .../DecimalColumnBetween.txt | 188 ++++ .../ExpressionTemplates/FilterColumnBetween.txt | 6 +- .../ExpressionTemplates/StringColumnBetween.txt | 191 ++++ .../TimestampColumnBetween.txt | 177 +++ .../TruncStringColumnBetween.txt | 31 + .../hive/ql/exec/vector/VectorExtractRow.java | 13 +- .../ql/exec/vector/VectorizationContext.java | 358 +++++-- .../exec/vector/expressions/CastLongToDate.java | 93 -- .../expressions/ConstantVectorExpression.java | 223 +++- .../expressions/Decimal64ColumnInList.java | 65 ++ .../FilterDecimal64ColumnBetween.java | 68 ++ .../FilterDecimal64ColumnInList.java | 68 ++ .../FilterDecimal64ColumnNotBetween.java | 68 ++ .../expressions/FilterLongColumnInList.java | 5 +- .../vector/expressions/LongColumnInList.java | 4 +- .../vector/expressions/StringColumnInList.java | 7 +- .../vector/expressions/VectorUDFDateLong.java | 35 - .../ql/exec/vector/udf/VectorUDFArgDesc.java | 68 +- .../hive/ql/udf/generic/GenericUDFDate.java | 3 +- .../hive/ql/udf/generic/GenericUDFToDate.java | 3 +- .../ql/exec/vector/VectorRandomRowSource.java | 171 ++- .../expressions/TestVectorArithmetic.java | 4 +- .../vector/expressions/TestVectorBetweenIn.java | 1014 ++++++++++++++++++ .../expressions/TestVectorCastStatement.java | 21 +- .../expressions/TestVectorDateAddSub.java | 6 +- .../vector/expressions/TestVectorDateDiff.java | 2 +- .../expressions/TestVectorFilterCompare.java | 2 +- .../TestVectorGenericDateExpressions.java | 7 +- .../expressions/TestVectorIfStatement.java | 6 +- .../vector/expressions/TestVectorNegative.java | 2 +- .../exec/vector/expressions/TestVectorNull.java | 513 +++++++++ .../expressions/TestVectorStringConcat.java | 2 +- .../expressions/TestVectorStringUnary.java | 2 +- .../vector/expressions/TestVectorSubStr.java | 2 +- .../expressions/TestVectorTimestampExtract.java | 2 +- .../llap/vector_annotate_stats_select.q.out | 19 +- .../clientpositive/llap/vector_between_in.q.out | 8 +- .../llap/vector_case_when_2.q.out | 4 +- .../clientpositive/llap/vector_udf_inline.q.out | 2 +- .../clientpositive/llap/vectorization_10.q.out | 2 +- .../clientpositive/llap/vectorization_7.q.out | 4 +- .../clientpositive/llap/vectorization_8.q.out | 4 +- .../llap/vectorization_short_regress.q.out | 4 +- .../clientpositive/llap/vectorized_casts.q.out | 2 +- .../llap/vectorized_date_funcs.q.out | 12 +- .../llap/vectorized_timestamp_funcs.q.out | 4 +- .../parquet_vectorization_10.q.out | 2 +- .../parquet_vectorization_7.q.out | 4 +- .../parquet_vectorization_8.q.out | 4 +- .../spark/parquet_vectorization_10.q.out | 2 +- .../spark/parquet_vectorization_7.q.out | 4 +- .../spark/parquet_vectorization_8.q.out | 4 +- ...k_vectorized_dynamic_partition_pruning.q.out | 2 +- .../spark/vector_between_in.q.out | 8 +- .../clientpositive/spark/vectorization_10.q.out | 2 +- .../spark/vectorization_short_regress.q.out | 4 +- .../spark/vectorized_timestamp_funcs.q.out | 4 +- .../clientpositive/vector_case_when_2.q.out | 4 +- .../clientpositive/vectorization_10.q.out | 2 +- .../clientpositive/vectorization_7.q.out | 4 +- .../clientpositive/vectorization_8.q.out | 4 +- .../clientpositive/vectorized_casts.q.out | 2 +- .../clientpositive/vectorized_date_funcs.q.out | 12 +- .../apache/hadoop/hive/tools/GenVectorCode.java | 89 ++ 66 files changed, 3475 insertions(+), 389 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt new file mode 100644 index 0000000..1aa398a --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.<InputColumnVectorType>; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is [NOT] BETWEEN two constants. + */ +public class <ClassName> extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + + protected final <OperandType> leftValue; + protected final <OperandType> rightValue; + + public <ClassName>(int colNum, <OperandType> leftValue, <OperandType> rightValue, int outputColumnNum) { + super(outputColumnNum); + this.colNum = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + public <ClassName>() { + super(); + + // Dummy final assignments. + colNum = -1; + leftValue = 0; + rightValue = 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + <InputColumnVectorType> inputColVector = (<InputColumnVectorType>) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + <OperandType>[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final <OperandType> repeatValue = vector[0]; + outputVector[0] = <OptionalNot>(repeatValue < leftValue || repeatValue > rightValue) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final <OperandType> value = vector[i]; + outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final <OperandType> value = vector[i]; + outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final <OperandType> value = vector[i]; + outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + final <OperandType> value = vector[i]; + outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + final <OperandType> value = vector[i]; + outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1; + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, colNum) + ", left " + leftValue + ", right " + rightValue; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt new file mode 100644 index 0000000..1763cbd --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class <ClassName> extends <BaseClassName> { + private static final long serialVersionUID = 1L; + + public <ClassName>() { + super(); + } + + public <ClassName>(int colNum, long leftValue, long rightValue, int outputColumnNum) { + super(colNum, leftValue, rightValue, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64Left " + leftValue + ", decimalLeft " + writable1.toString() + + ", decimal64Right " + rightValue + ", decimalRight " + writable2.toString(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt new file mode 100644 index 0000000..6fd1301 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; + +import java.util.Arrays; +import java.util.HashSet; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class <ClassName> extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputColumn; + + protected final HiveDecimal leftValue; + protected final HiveDecimal rightValue; + + public <ClassName>() { + super(); + + // Dummy final assignments. + inputColumn = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public <ClassName>(int colNum, HiveDecimal leftValue, HiveDecimal rightValue, + int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColumnVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + final HiveDecimalWritable repeatValue = vector[0]; + outputVector[0] = + <OptionalNot>(DecimalUtil.compare(repeatValue, leftValue) < 0 || + DecimalUtil.compare(repeatValue, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputColumn) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 0664cbf..47dd42f 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -34,12 +34,12 @@ public class <ClassName> extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; + protected final int colNum; // The comparison is of the form "column BETWEEN leftValue AND rightValue". // NOTE: These can be set later by FilterColumnBetweenDynamicValue.txt so they are not final. - private <OperandType> leftValue; - private <OperandType> rightValue; + protected <OperandType> leftValue; + protected <OperandType> rightValue; public <ClassName>(int colNum, <OperandType> leftValue, <OperandType> rightValue) { super(); http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt new file mode 100644 index 0000000..798cb95 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.util.Arrays; + +public class <ClassName> extends VectorExpression { + private static final long serialVersionUID = 1L; + + protected final int inputCol; + + private final byte[] left; + private final byte[] right; + + public <ClassName>() { + super(); + + // Dummy final assignments. + inputCol = -1; + left = null; + right = null; + } + + public <ClassName>(int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.left = left; + this.right = right; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] length = inputColVector.length; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final byte[] repeatBytes = vector[0]; + final int repeatStart = start[0]; + final int repeatLength = length[0]; + outputVector[0] = + <OptionalNot>(StringExpr.compare(repeatBytes, repeatStart, repeatLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, repeatBytes, repeatStart, repeatLength) < 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + displayUtf8Bytes(left) + ", right " + displayUtf8Bytes(right); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt new file mode 100644 index 0000000..db42577 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashSet; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is BETWEEN two constants. + */ +public class <ClassName> extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputCol; + + private final Timestamp leftValue; + private final Timestamp rightValue; + + public <ClassName>() { + super(); + + // Dummy final assignments. + inputCol = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public <ClassName>(int colNum, Timestamp leftValue, Timestamp rightValue, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + outputVector[0] = + <OptionalNot>(inputColVector.compareTo(0, leftValue) < 0 || + inputColVector.compareTo(0, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = + <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = + <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputVector[i] = + <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputVector[i] = + <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + outputVector[i] = + <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt new file mode 100644 index 0000000..4ab8440 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +public class <ClassName> extends <BaseClassName> { + private static final long serialVersionUID = 1L; + + public <ClassName>() { + super(); + } + + public <ClassName>(int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(colNum, left, right, outputColumnNum); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index f5f4d72..e1482e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -183,12 +183,19 @@ public class VectorExtractRow { * @param logicalColumnIndex * @return */ - private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, + int logicalColumnIndex) { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; final ColumnVector colVector = batch.cols[projectionColumnNum]; - return extractRowColumn( - colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex); + final TypeInfo typeInfo = typeInfos[logicalColumnIndex]; + // try { + return extractRowColumn( + colVector, typeInfo, objectInspectors[logicalColumnIndex], batchIndex); + // } catch (Exception e){ + // throw new RuntimeException("Error evaluating column number " + projectionColumnNum + + // ", typeInfo " + typeInfo.toString() + ", batchIndex " + batchIndex); + // } } public Object extractRowColumn( http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index e541217..97e4059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1381,32 +1381,7 @@ public class VectorizationContext { } } - switch (vectorArgType) { - case INT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue(), typeInfo); - case DATE: - return new ConstantVectorExpression(outCol, DateWritableV2.dateToDays((Date) constantValue), typeInfo); - case TIMESTAMP: - return new ConstantVectorExpression(outCol, - ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), typeInfo); - case INTERVAL_YEAR_MONTH: - return new ConstantVectorExpression(outCol, - ((HiveIntervalYearMonth) constantValue).getTotalMonths(), typeInfo); - case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue, typeInfo); - case FLOAT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue(), typeInfo); - case DECIMAL: - return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeInfo); - case STRING: - return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes(), typeInfo); - case CHAR: - return new ConstantVectorExpression(outCol, ((HiveChar) constantValue), typeInfo); - case VARCHAR: - return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue), typeInfo); - default: - throw new HiveException("Unsupported constant type: " + typeName + ", object class " + constantValue.getClass().getSimpleName()); - } + return ConstantVectorExpression.create(outCol, constantValue, typeInfo); } private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, @@ -1431,33 +1406,30 @@ public class VectorizationContext { */ private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException { + + if (childExprList.size() != 1) { + return null; + } ExprNodeDesc childExpr = childExprList.get(0); + if (!(childExpr instanceof ExprNodeColumnDesc)) { + + // Some vector operators like VectorSelectOperator optimize out IdentityExpression out of + // their vector expression list and don't evaluate the children, so just return the + // child expression here instead of IdentityExpression. + return getVectorExpression(childExpr); + } + int identityCol; TypeInfo identityTypeInfo; DataTypePhysicalVariation identityDataTypePhysicalVariation; - VectorExpression v1 = null; - if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); - identityCol = v1.getOutputColumnNum(); - identityTypeInfo = v1.getOutputTypeInfo(); - identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation(); - } else if (childExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; - identityCol = getInputColumnIndex(colDesc.getColumn()); - identityTypeInfo = colDesc.getTypeInfo(); - - // CONSIDER: Validation of type information - - identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); - } else { - throw new HiveException("Expression not supported: "+childExpr); - } - VectorExpression ve = new IdentityExpression(identityCol); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + identityCol = getInputColumnIndex(colDesc.getColumn()); + identityTypeInfo = colDesc.getTypeInfo(); - if (v1 != null) { - ve.setChildExpressions(new VectorExpression [] {v1}); - } + identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); + + VectorExpression ve = new IdentityExpression(identityCol); ve.setInputTypeInfos(identityTypeInfo); ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); @@ -1468,7 +1440,6 @@ public class VectorizationContext { return ve; } - private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException { if (exprNodeDesc instanceof ExprNodeColumnDesc) { int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc); @@ -1626,6 +1597,20 @@ public class VectorizationContext { VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); + return createDecimal64VectorExpression( + vectorClass, childExprs, childrenMode, + isDecimal64ScaleEstablished, decimal64ColumnScale, + returnTypeInfo, returnDataTypePhysicalVariation); + } + + private VectorExpression createDecimal64VectorExpression(Class<?> vectorClass, + List<ExprNodeDesc> childExprs, VectorExpressionDescriptor.Mode childrenMode, + boolean isDecimal64ScaleEstablished, int decimal64ColumnScale, + TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation) + throws HiveException { + + final int numChildren = childExprs.size(); + /* * Custom build arguments. */ @@ -1659,8 +1644,7 @@ public class VectorizationContext { children.add(filterExpr); } arguments[i] = colIndex; - } else { - Preconditions.checkState(childExpr instanceof ExprNodeConstantDesc); + } else if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childExpr; if (typeInfo instanceof DecimalTypeInfo) { if (!isDecimal64ScaleEstablished) { @@ -1681,6 +1665,8 @@ public class VectorizationContext { (scalarValue == null) ? getConstantVectorExpression(null, typeInfo, childrenMode) : scalarValue; } + } else { + return null; } } @@ -2040,8 +2026,28 @@ public class VectorizationContext { return ve; } + // Handle strange case of TO_DATE(date) or CAST(date to DATE) + private VectorExpression getIdentityForDateToDate(List<ExprNodeDesc> childExprs, + TypeInfo returnTypeInfo) + throws HiveException { + if (childExprs.size() != 1) { + return null; + } + TypeInfo childTypeInfo = childExprs.get(0).getTypeInfo(); + if (childTypeInfo.getCategory() != Category.PRIMITIVE || + ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) { + return null; + } + if (returnTypeInfo.getCategory() != Category.PRIMITIVE || + ((PrimitiveTypeInfo) returnTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) { + return null; + } + return getIdentityExpression(childExprs); + } + private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, - List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) + throws HiveException { List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; @@ -2049,8 +2055,8 @@ public class VectorizationContext { //First handle special cases. If one of the special case methods cannot handle it, // it returns null. VectorExpression ve = null; - if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); + if (udf instanceof GenericUDFBetween) { + ve = getBetweenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIf) { @@ -2083,6 +2089,8 @@ public class VectorizationContext { ve = getCastToBinary(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) { + ve = getIdentityForDateToDate(childExpr, returnType); } if (ve != null) { return ve; @@ -2444,14 +2452,42 @@ public class VectorizationContext { expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); ((IDoubleInExpr) expr).setInListValues(inValsD); } else if (isDecimalFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class); - HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; - for (int i = 0; i != inValsD.length; i++) { - inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( - (ExprNodeConstantDesc) childrenForInList.get(i)); + + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr); + if (tryDecimal64) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class); + final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(); + expr = createDecimal64VectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ scale, + returnType, DataTypePhysicalVariation.NONE); + if (expr != null) { + long[] inVals = new long[childrenForInList.size()]; + for (int i = 0; i != inVals.length; i++) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i); + HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue(); + final long decimal64Scalar = + new HiveDecimalWritable(hiveDecimal).serialize64(scale); + inVals[i] = decimal64Scalar; + } + ((ILongInExpr) expr).setInListValues(inVals); + } + } + if (expr == null) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimalColumnInList.class : DecimalColumnInList.class); + expr = createVectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; + for (int i = 0; i != inValsD.length; i++) { + inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( + (ExprNodeConstantDesc) childrenForInList.get(i)); + } + ((IDecimalInExpr) expr).setInListValues(inValsD); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IDecimalInExpr) expr).setInListValues(inValsD); } else if (isDateFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); long[] inVals = new long[childrenForInList.size()]; @@ -2973,21 +3009,32 @@ public class VectorizationContext { return null; } - /* Get a [NOT] BETWEEN filter expression. This is treated as a special case + private VectorExpression tryDecimal64Between(VectorExpressionDescriptor.Mode mode, boolean isNot, + ExprNodeDesc colExpr, List<ExprNodeDesc> childrenAfterNot, TypeInfo returnTypeInfo) + throws HiveException { + final Class<?> cl; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = (isNot ? Decimal64ColumnNotBetween.class : Decimal64ColumnBetween.class); + } else { + cl = (isNot ? FilterDecimal64ColumnNotBetween.class : FilterDecimal64ColumnBetween.class); + } + return + createDecimal64VectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(), + returnTypeInfo, DataTypePhysicalVariation.NONE); + } + + /* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case * because the NOT is actually specified in the expression tree as the first argument, * and we don't want any runtime cost for that. So creating the VectorExpression * needs to be done differently than the standard way where all arguments are * passed to the VectorExpression constructor. */ - private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { - - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - - // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer - // knows to revert to row-at-a-time execution. - return null; - } + private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) + throws HiveException { boolean hasDynamicValues = false; @@ -2995,6 +3042,11 @@ public class VectorizationContext { if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) { hasDynamicValues = true; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + + // Projection mode is not applicable. + return null; + } } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { return null; @@ -3021,7 +3073,7 @@ public class VectorizationContext { } List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>(); - + boolean wereCastUdfs = false; for (ExprNodeDesc desc: childExpr.subList(1, 4)) { if (commonType.equals(desc.getTypeInfo())) { castChildren.add(desc); @@ -3030,6 +3082,7 @@ public class VectorizationContext { ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc })); castChildren.add(engfd); + wereCastUdfs = true; } } String colType = commonType.getTypeName(); @@ -3040,55 +3093,141 @@ public class VectorizationContext { // determine class Class<?> cl = null; if (isIntFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterLongColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isIntFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } else if (isFloatFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDoubleColumnBetweenDynamicValue.class : - FilterDoubleColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDoubleColumnBetweenDynamicValue.class : + FilterDoubleColumnBetween.class); + } } else if (isFloatFamily(colType) && notKeywordPresent) { - cl = FilterDoubleColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnNotBetween.class; + } else { + cl = FilterDoubleColumnNotBetween.class; + } } else if (colType.equals("string") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterStringColumnBetweenDynamicValue.class : - FilterStringColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterStringColumnBetweenDynamicValue.class : + FilterStringColumnBetween.class); + } } else if (colType.equals("string") && notKeywordPresent) { - cl = FilterStringColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnNotBetween.class; + } else { + cl = FilterStringColumnNotBetween.class; + } } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterVarCharColumnBetweenDynamicValue.class : - FilterVarCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterVarCharColumnBetweenDynamicValue.class : + FilterVarCharColumnBetween.class); + } } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterVarCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnNotBetween.class; + } else { + cl = FilterVarCharColumnNotBetween.class; + } } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterCharColumnBetweenDynamicValue.class : - FilterCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterCharColumnBetweenDynamicValue.class : + FilterCharColumnBetween.class); + } } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnNotBetween.class; + } else { + cl = FilterCharColumnNotBetween.class; + } } else if (colType.equals("timestamp") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterTimestampColumnBetweenDynamicValue.class : - FilterTimestampColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterTimestampColumnBetweenDynamicValue.class : + FilterTimestampColumnBetween.class); + } } else if (colType.equals("timestamp") && notKeywordPresent) { - cl = FilterTimestampColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnNotBetween.class; + } else { + cl = FilterTimestampColumnNotBetween.class; + } } else if (isDecimalFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDecimalColumnBetweenDynamicValue.class : - FilterDecimalColumnBetween.class); + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ false, colExpr, childrenAfterNot, + returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDecimalColumnBetweenDynamicValue.class : + FilterDecimalColumnBetween.class); + } } else if (isDecimalFamily(colType) && notKeywordPresent) { - cl = FilterDecimalColumnNotBetween.class; + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ true, colExpr, childrenAfterNot, returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnNotBetween.class; + } else { + cl = FilterDecimalColumnNotBetween.class; + } } else if (isDateFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDateColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDateColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isDateFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } - return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { @@ -3379,11 +3518,12 @@ public class VectorizationContext { argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) - if (child.getTypeInfo().getCategory() != Category.PRIMITIVE) { + if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && + child.getTypeInfo().getCategory() != Category.STRUCT) { // Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant. throw new HiveException( - "Unable to vectorize custom UDF. Complex type constants not supported: " + child); + "Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child); } argDescs[i].setConstant((ExprNodeConstantDesc) child); } else if (child instanceof ExprNodeDynamicValueDesc) { @@ -3523,7 +3663,11 @@ public class VectorizationContext { private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc) throws HiveException { Object o = getScalarValue(constDesc); - if (o instanceof Integer) { + if (o instanceof Byte) { + return (Byte) o; + } if (o instanceof Short) { + return (Short) o; + } else if (o instanceof Integer) { return (Integer) o; } else if (o instanceof Long) { return (Long) o; http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java deleted file mode 100644 index f99bd69..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; - -import java.sql.Date; - -/** - * Casts a timestamp and date vector to a date vector. - */ -public class CastLongToDate extends VectorExpression { - private static final long serialVersionUID = 1L; - - private int inputColumn; - private transient Date date = new Date(0); - - public CastLongToDate() { - super(); - } - - public CastLongToDate(int inputColumn, int outputColumnNum) { - super(outputColumnNum); - this.inputColumn = inputColumn; - } - - @Override - public void evaluate(VectorizedRowBatch batch) throws HiveException { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn]; - int[] sel = batch.selected; - int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; - - if (n == 0) { - - // Nothing to do - return; - } - - PrimitiveCategory primitiveCategory = - ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); - switch (primitiveCategory) { - case DATE: - inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); - break; - default: - throw new Error("Unsupported input type " + primitiveCategory.name()); - } - } - - @Override - public String vectorExpressionParameters() { - return getColumnParamString(0, inputColumn); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATE) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8ae8a54..0a16e08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -20,14 +20,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -45,6 +54,7 @@ public class ConstantVectorExpression extends VectorExpression { private HiveDecimal decimalValue = null; private Timestamp timestampValue = null; private HiveIntervalDayTime intervalDayTimeValue = null; + private ConstantVectorExpression[] structValue; private boolean isNullValue = false; private final ColumnVector.Type type; @@ -122,15 +132,135 @@ public class ConstantVectorExpression extends VectorExpression { } /* + public static VectorExpression createList(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setListValue(value); + return result; + } + + public static VectorExpression createMap(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setMapValue(value); + return result; + } + */ + + public static ConstantVectorExpression createStruct(int outputColumnNum, Object value, + TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setStructValue(value); + return result; + } + + /* + public static VectorExpression createUnion(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setUnionValue(value); + return result; + } + */ + + public static ConstantVectorExpression create(int outputColumnNum, Object constantValue, TypeInfo outputTypeInfo) + throws HiveException { + + if (constantValue == null) { + return new ConstantVectorExpression(outputColumnNum, outputTypeInfo, true); + } + + Category category = outputTypeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) outputTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + case BOOLEAN: + if (((Boolean) constantValue).booleanValue()) { + return new ConstantVectorExpression(outputColumnNum, 1, outputTypeInfo); + } else { + return new ConstantVectorExpression(outputColumnNum, 0, outputTypeInfo); + } + case BYTE: + case SHORT: + case INT: + case LONG: + return new ConstantVectorExpression( + outputColumnNum, ((Number) constantValue).longValue(), outputTypeInfo); + case FLOAT: + case DOUBLE: + return new ConstantVectorExpression( + outputColumnNum, ((Number) constantValue).doubleValue(), outputTypeInfo); + case DATE: + return new ConstantVectorExpression( + outputColumnNum, DateWritableV2.dateToDays((Date) constantValue), outputTypeInfo); + case TIMESTAMP: + return new ConstantVectorExpression( + outputColumnNum, + ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), + outputTypeInfo); + case DECIMAL: + return new ConstantVectorExpression( + outputColumnNum, (HiveDecimal) constantValue, outputTypeInfo); + case STRING: + return new ConstantVectorExpression( + outputColumnNum, ((String) constantValue).getBytes(), outputTypeInfo); + case VARCHAR: + return new ConstantVectorExpression( + outputColumnNum, ((HiveVarchar) constantValue), outputTypeInfo); + case CHAR: + return new ConstantVectorExpression( + outputColumnNum, ((HiveChar) constantValue), outputTypeInfo); + case BINARY: + return new ConstantVectorExpression( + outputColumnNum, ((byte[]) constantValue), outputTypeInfo); + case INTERVAL_YEAR_MONTH: + return new ConstantVectorExpression( + outputColumnNum, + ((HiveIntervalYearMonth) constantValue).getTotalMonths(), + outputTypeInfo); + case INTERVAL_DAY_TIME: + return new ConstantVectorExpression( + outputColumnNum, + (HiveIntervalDayTime) constantValue, + outputTypeInfo); + case VOID: + case TIMESTAMPLOCALTZ: + case UNKNOWN: + default: + throw new RuntimeException("Unexpected primitive category " + primitiveCategory); + } + } + // case LIST: + // return ConstantVectorExpression.createList( + // outputColumnNum, constantValue, outputTypeInfo); + // case MAP: + // return ConstantVectorExpression.createMap( + // outputColumnNum, constantValue, outputTypeInfo); + case STRUCT: + return ConstantVectorExpression.createStruct( + outputColumnNum, constantValue, outputTypeInfo); + // case UNION: + // return ConstantVectorExpression.createUnion( + // outputColumnNum, constantValue, outputTypeInfo); + default: + throw new RuntimeException("Unexpected category " + category); + } + } + + /* * In the following evaluate* methods, since we are supporting scratch column reuse, we must * assume the column may have noNulls of false and some isNull entries true. * * So, do a proper assignments. */ - private void evaluateLong(VectorizedRowBatch vrg) { + private void evaluateLong(ColumnVector colVector) { - LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; + LongColumnVector cv = (LongColumnVector) colVector; cv.isRepeating = true; if (!isNullValue) { cv.isNull[0] = false; @@ -141,8 +271,8 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateDouble(VectorizedRowBatch vrg) { - DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; + private void evaluateDouble(ColumnVector colVector) { + DoubleColumnVector cv = (DoubleColumnVector) colVector; cv.isRepeating = true; if (!isNullValue) { cv.isNull[0] = false; @@ -153,8 +283,8 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateBytes(VectorizedRowBatch vrg) { - BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; + private void evaluateBytes(ColumnVector colVector) { + BytesColumnVector cv = (BytesColumnVector) colVector; cv.isRepeating = true; cv.initBuffer(); if (!isNullValue) { @@ -166,8 +296,8 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateDecimal(VectorizedRowBatch vrg) { - DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; + private void evaluateDecimal(ColumnVector colVector) { + DecimalColumnVector dcv = (DecimalColumnVector) colVector; dcv.isRepeating = true; if (!isNullValue) { dcv.isNull[0] = false; @@ -178,8 +308,8 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + private void evaluateTimestamp(ColumnVector colVector) { + TimestampColumnVector tcv = (TimestampColumnVector) colVector; tcv.isRepeating = true; if (!isNullValue) { tcv.isNull[0] = false; @@ -190,8 +320,8 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { - IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; + private void evaluateIntervalDayTime(ColumnVector colVector) { + IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) colVector; dcv.isRepeating = true; if (!isNullValue) { dcv.isNull[0] = false; @@ -202,8 +332,23 @@ public class ConstantVectorExpression extends VectorExpression { } } - private void evaluateVoid(VectorizedRowBatch vrg) { - VoidColumnVector voidColVector = (VoidColumnVector) vrg.cols[outputColumnNum]; + private void evaluateStruct(ColumnVector colVector) { + StructColumnVector scv = (StructColumnVector) colVector; + scv.isRepeating = true; + if (!isNullValue) { + scv.isNull[0] = false; + final int size = structValue.length; + for (int i = 0; i < size; i++) { + structValue[i].evaluateColumn(scv.fields[i]); + } + } else { + scv.isNull[0] = true; + scv.noNulls = false; + } + } + + private void evaluateVoid(ColumnVector colVector) { + VoidColumnVector voidColVector = (VoidColumnVector) colVector; voidColVector.isRepeating = true; voidColVector.isNull[0] = true; voidColVector.noNulls = false; @@ -211,27 +356,34 @@ public class ConstantVectorExpression extends VectorExpression { @Override public void evaluate(VectorizedRowBatch vrg) { + evaluateColumn(vrg.cols[outputColumnNum]); + } + + private void evaluateColumn(ColumnVector colVector) { switch (type) { case LONG: - evaluateLong(vrg); + evaluateLong(colVector); break; case DOUBLE: - evaluateDouble(vrg); + evaluateDouble(colVector); break; case BYTES: - evaluateBytes(vrg); + evaluateBytes(colVector); break; case DECIMAL: - evaluateDecimal(vrg); + evaluateDecimal(colVector); break; case TIMESTAMP: - evaluateTimestamp(vrg); + evaluateTimestamp(colVector); break; case INTERVAL_DAY_TIME: - evaluateIntervalDayTime(vrg); + evaluateIntervalDayTime(colVector); + break; + case STRUCT: + evaluateStruct(colVector); break; case VOID: - evaluateVoid(vrg); + evaluateVoid(colVector); break; default: throw new RuntimeException("Unexpected column vector type " + type); @@ -287,6 +439,17 @@ public class ConstantVectorExpression extends VectorExpression { return intervalDayTimeValue; } + public void setStructValue(Object structValue) throws HiveException { + StructTypeInfo structTypeInfo = (StructTypeInfo) outputTypeInfo; + ArrayList<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfoList.size(); + this.structValue = new ConstantVectorExpression[size]; + List<Object> fieldValueList = (List<Object>) structValue; + for (int i = 0; i < size; i++) { + this.structValue[i] = create(i, fieldValueList.get(i), fieldTypeInfoList.get(i)); + } + } + @Override public String vectorExpressionParameters() { String value; @@ -313,6 +476,24 @@ public class ConstantVectorExpression extends VectorExpression { case INTERVAL_DAY_TIME: value = intervalDayTimeValue.toString(); break; + case STRUCT: + { + StringBuilder sb = new StringBuilder(); + sb.append("STRUCT {"); + boolean isFirst = true; + final int size = structValue.length; + for (int i = 0; i < size; i++) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append(structValue[i].toString()); + } + sb.append("}"); + value = sb.toString(); + } + break; default: throw new RuntimeException("Unknown vector column type " + type); } http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java new file mode 100644 index 0000000..5632cfb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class Decimal64ColumnInList extends LongColumnInList { + + private static final long serialVersionUID = 1L; + + public Decimal64ColumnInList(int colNum, int outputColumnNum) { + super(colNum, outputColumnNum); + } + + public Decimal64ColumnInList() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0]; + final int scale = decimalTypeInfo.scale(); + HiveDecimalWritable writable = new HiveDecimalWritable(); + StringBuilder sb = new StringBuilder(); + sb.append(getColumnParamString(0, colNum)); + sb.append(", values ["); + for (long value : inListValues) { + writable.deserialize64(value, scale); + sb.append(", decimal64Val "); + sb.append(value); + sb.append(", decimalVal "); + sb.append(writable.toString()); + } + sb.append("]"); + return sb.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java new file mode 100644 index 0000000..c26a93a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class FilterDecimal64ColumnBetween extends FilterLongColumnBetween { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnBetween(int colNum, long leftValue, long rightValue) { + super(colNum, leftValue, rightValue); + } + + public FilterDecimal64ColumnBetween() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() + + ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +}
