Repository: hive Updated Branches: refs/heads/master a3060b307 -> 95dadac9f
HIVE-15939: Make cast expressions comply more to sql2011 (Zoltan Haindrich, reviewed by Ashutosh Chauhan) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95dadac9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95dadac9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95dadac9 Branch: refs/heads/master Commit: 95dadac9fa81702ce3af0d8759bee5082f2f2013 Parents: c2fc0fb Author: Zoltan Haindrich <k...@rxd.hu> Authored: Wed Dec 6 09:36:54 2017 +0100 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Wed Dec 6 09:47:55 2017 +0100 ---------------------------------------------------------------------- .../ql/exec/vector/VectorizationContext.java | 56 +------ .../vector/expressions/CastStringToBoolean.java | 49 ++++++ .../vector/expressions/FuncStringToLong.java | 145 +++++++++++++++++ .../vector/expressions/VectorExpression.java | 7 - .../apache/hadoop/hive/ql/udf/UDFToBoolean.java | 11 +- .../expressions/TestVectorMathFunctions.java | 24 +++ .../vector/expressions/TestVectorTypeCasts.java | 17 ++ .../queries/clientpositive/udf_to_boolean.q | 6 + .../vector_udf_string_to_boolean.q | 23 +++ .../clientpositive/llap/vectorized_casts.q.out | 4 +- .../results/clientpositive/udf_to_boolean.q.out | 54 +++++++ .../clientpositive/vector_empty_where.q.out | 2 +- .../vector_udf_string_to_boolean.q.out | 156 +++++++++++++++++++ .../clientpositive/vectorized_casts.q.out | 4 +- .../PrimitiveObjectInspectorUtils.java | 59 ++++++- .../TestPrimitiveObjectInspectorUtils.java | 6 + 16 files changed, 549 insertions(+), 74 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5c7d7ee..0ad6816 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -52,47 +52,11 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilter; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilterMerge; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal64ToDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalComplete; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalFinal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalPartial2; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDoubleComplete; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgFinal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLongComplete; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgPartial2; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestampComplete; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPartial2; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -100,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.*; import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -2734,24 +2697,11 @@ public class VectorizationContext { } // Long and double are handled using descriptors, string needs to be specially handled. if (isStringFamily(inputType)) { - // string casts to false if it is 0 characters long, otherwise true - VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.longTypeInfo); - int outputColumnNum = ocm.allocateOutputColumn(TypeInfoFactory.booleanTypeInfo); - VectorExpression lenToBoolExpr = - new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumnNum(), outputColumnNum); + VectorExpression lenExpr = createVectorExpression(CastStringToBoolean.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); - lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); - - lenToBoolExpr.setInputTypeInfos(lenExpr.getOutputTypeInfo()); - lenToBoolExpr.setInputDataTypePhysicalVariations(lenExpr.getOutputDataTypePhysicalVariation()); - - lenToBoolExpr.setOutputTypeInfo(TypeInfoFactory.booleanTypeInfo); - lenToBoolExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - ocm.freeOutputColumn(lenExpr.getOutputColumnNum()); - return lenToBoolExpr; + return lenExpr; } return null; } http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java new file mode 100644 index 0000000..7a44035 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +/** + * Type cast string to boolean. + */ +public class CastStringToBoolean extends FuncStringToLong { + private static final long serialVersionUID = 1L; + + public CastStringToBoolean() { + super(); + } + + public CastStringToBoolean(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(LongColumnVector outV, BytesColumnVector inV, int offset) { + + int start = inV.start[offset]; + int length = inV.length[offset]; + byte[] s = inV.vector[offset]; + boolean b = PrimitiveObjectInspectorUtils.parseBoolean(s, start, length); + outV.vector[offset] = b ? 1 : 0; + + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java new file mode 100644 index 0000000..5c0a7fa --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a long + * and return a string, optionally with additional configuration arguments. + * Used for cast(string), length(string), etc + */ +public abstract class FuncStringToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int inputCol; + private int outputCol; + + public FuncStringToLong(int inputCol, int outputCol) { + super(outputCol); + this.inputCol = inputCol; + this.outputCol = outputCol; + } + + public FuncStringToLong() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + + if (n == 0) { + //Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for (int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for (int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) + * and set position i of the output vector to the result. + */ + protected abstract void func(LongColumnVector outV, BytesColumnVector inV, int i); + + public int getOutputCol() { + return outputCol; + } + + public void setOutputCol(int outputCol) { + this.outputCol = outputCol; + } + + public int getInputCol() { + return inputCol; + } + + public void setInputCol(int inputCol) { + this.inputCol = inputCol; + } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index b5399d6..8c2894b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -21,13 +21,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -35,8 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; /** http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index d291e36..57949d9 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; @@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -47,7 +48,7 @@ import org.apache.hadoop.io.Text; * */ @VectorizedExpressions({CastLongToBooleanViaLongToLong.class, - CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, + CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, CastStringToBoolean.class, CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class, CastStringToLong.class}) public class UDFToBoolean extends UDF { private final BooleanWritable booleanWritable = new BooleanWritable(); @@ -172,10 +173,10 @@ public class UDFToBoolean extends UDF { public BooleanWritable evaluate(Text i) { if (i == null) { return null; - } else { - booleanWritable.set(i.getLength() != 0); - return booleanWritable; } + boolean b = PrimitiveObjectInspectorUtils.parseBoolean(i.getBytes(), 0, i.getLength()); + booleanWritable.set(b); + return booleanWritable; } public BooleanWritable evaluate(DateWritable d) { http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index fb79116..e89f2e5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.Arrays; import java.util.Random; @@ -253,6 +254,29 @@ public class TestVectorMathFunctions { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + BytesColumnVector inV; + LongColumnVector outV; + inV = new BytesColumnVector(); + outV = new LongColumnVector(); + inV.initBuffer(); + inV.setVal(0, StandardCharsets.UTF_8.encode("true").array()); + inV.setVal(1, StandardCharsets.UTF_8.encode("TRUE").array()); + inV.setVal(2, StandardCharsets.UTF_8.encode("TrUe").array()); + inV.setVal(3, StandardCharsets.UTF_8.encode("false").array()); + inV.setVal(4, StandardCharsets.UTF_8.encode("FALSE").array()); + inV.setVal(5, StandardCharsets.UTF_8.encode("FaLsE").array()); + inV.setVal(6, StandardCharsets.UTF_8.encode("").array()); + inV.setVal(7, StandardCharsets.UTF_8.encode("Other").array()); + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 8; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) { Random r = new Random(345); VectorizedRowBatch batch = new VectorizedRowBatch(2); http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index fb8035b..6952b45 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -128,6 +128,23 @@ public class TestVectorTypeCasts { } @Test + public void testCastStringToBoolean() { + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchStringInLongOut(); + LongColumnVector resultV = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastStringToBoolean(0, 1); + expr.evaluate(b); + Assert.assertEquals(1, resultV.vector[0]); // true + Assert.assertEquals(1, resultV.vector[1]); // true + Assert.assertEquals(1, resultV.vector[2]); // true + Assert.assertEquals(0, resultV.vector[3]); // false + Assert.assertEquals(0, resultV.vector[4]); // false + Assert.assertEquals(0, resultV.vector[5]); // false + Assert.assertEquals(0, resultV.vector[6]); // false + Assert.assertEquals(1, resultV.vector[7]); // true + } + + @Test public void testCastLongToTimestamp() { long[] longValues = new long[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(longValues); http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/queries/clientpositive/udf_to_boolean.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udf_to_boolean.q b/ql/src/test/queries/clientpositive/udf_to_boolean.q index 8bea7ab..1a50d05 100644 --- a/ql/src/test/queries/clientpositive/udf_to_boolean.q +++ b/ql/src/test/queries/clientpositive/udf_to_boolean.q @@ -12,6 +12,9 @@ SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); @@ -27,6 +30,9 @@ SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q b/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q new file mode 100644 index 0000000..eeb5ab8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled = true; +SET hive.int.timestamp.conversion.in.seconds=false; +set hive.fetch.task.conversion=none; + +create table t (s string) stored as orc; + +insert into t values ('false'); +insert into t values ('FALSE'); +insert into t values ('FaLsE'); +insert into t values ('true'); +insert into t values ('TRUE'); +insert into t values ('TrUe'); +insert into t values (''); +insert into t values ('Other'); +insert into t values ('Off'); +insert into t values ('No'); +insert into t values ('0'); +insert into t values ('1'); + +explain select s,cast(s as boolean) from t order by s; + +select s,cast(s as boolean) from t order by s; http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index ad6d97b..84b4d94 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -182,8 +182,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:st ring, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLo ng(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51 :timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStr ingGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/udf_to_boolean.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_to_boolean.q.out b/ql/src/test/results/clientpositive/udf_to_boolean.q.out index ebce364..bee030a 100644 --- a/ql/src/test/results/clientpositive/udf_to_boolean.q.out +++ b/ql/src/test/results/clientpositive/udf_to_boolean.q.out @@ -70,6 +70,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### true +PREHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true PREHOOK: query: SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -151,6 +178,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### false +PREHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false +PREHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false +PREHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false PREHOOK: query: SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vector_empty_where.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out index 609a36c..6b2c7fe 100644 --- a/ql/src/test/results/clientpositive/vector_empty_where.q.out +++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -26,7 +26,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 14:boolean)(children: CastLongToBooleanViaLongToLong(col 13:bigint)(children: StringLength(col 6:string) -> 13:bigint) -> 14:boolean) + predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean) predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out b/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out new file mode 100644 index 0000000..1b06698 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: create table t (s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values ('false') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('false') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('FALSE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FALSE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('FaLsE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FaLsE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('true') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('true') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('TRUE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TRUE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('TrUe') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TrUe') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('Other') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('Other') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('Off') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('Off') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('No') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('No') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('0') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('0') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('1') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('1') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select s,cast(s as boolean) from t order by s +PREHOOK: type: QUERY +POSTHOOK: query: explain select s,cast(s as boolean) from t order by s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), UDFToBoolean(s) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s,cast(s as boolean) from t order by s +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select s,cast(s as boolean) from t order by s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### + false +0 false +1 true +FALSE false +FaLsE false +No false +Off false +Other true +TRUE true +TrUe true +false false +true true http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vectorized_casts.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorized_casts.q.out b/ql/src/test/results/clientpositive/vectorized_casts.q.out index 96a1770..f6f2105 100644 --- a/ql/src/test/results/clientpositive/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -179,8 +179,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27: int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49: timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:times tamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGro upToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 6a4733f..88e7fb0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -22,6 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.charset.CharacterCodingException; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.time.DateTimeException; @@ -555,19 +556,69 @@ public final class PrimitiveObjectInspectorUtils { } - private static final String falseBooleans[] = { "false", "no", "off", "0", "" }; + enum FalseValues { + FALSE("false"), OFF("off"), NO("no"), ZERO("0"), EMPTY(""); + + private final byte[] bytes; + private String str; + + FalseValues(String s) { + str = s; + bytes = s.getBytes(StandardCharsets.UTF_8); + } + + public boolean accept(byte[] arr, int st) { + for (int i = 0; i < bytes.length; i++) { + byte b = arr[i + st]; + if (!(b == bytes[i] || b + 'a' - 'A' == bytes[i])) { + return false; + } + } + return true; + } + + public boolean accept(String s) { + return str.equalsIgnoreCase(s); + } + } + /** + * Parses a boolean from string + * + * Accepts "false","off","no","0" and "" as FALSE + * All other values are interpreted as true. + */ + public static boolean parseBoolean(byte[] arr, int st, int len) { + switch (len) { + case 5: + return !FalseValues.FALSE.accept(arr, st); + case 3: + return !FalseValues.OFF.accept(arr, st); + case 2: + return !FalseValues.NO.accept(arr, st); + case 1: + return !FalseValues.ZERO.accept(arr, st); + case 0: + return false; + default: + return true; + } + } + + private static final FalseValues[] FALSE_BOOLEANS = FalseValues.values(); private static boolean parseBoolean(String s) { - for(int i=0;i<falseBooleans.length;i++){ - if(falseBooleans[i].equalsIgnoreCase(s)) + for (int i = 0; i < FALSE_BOOLEANS.length; i++) { + if (FALSE_BOOLEANS[i].accept(s)) { return false; + } } return true; } private static boolean parseBoolean(Text t) { - if(t.getLength()>5) + if(t.getLength()>5) { return true; + } String strVal=t.toString(); return parseBoolean(strVal); } http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java index 9d86a54..cbf19d0 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java @@ -231,10 +231,16 @@ public class TestPrimitiveObjectInspectorUtils extends TestCase { for (String falseStr : mustEvaluateToFalse) { assertFalse(falseStr, PrimitiveObjectInspectorUtils.getBoolean(falseStr, PrimitiveObjectInspectorFactory.javaStringObjectInspector)); + + byte[] b1 = ("asd"+falseStr).getBytes(); + assertFalse(falseStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, falseStr.length())); + } for (String trueStr : mustEvaluateToTrue) { assertTrue(trueStr, PrimitiveObjectInspectorUtils.getBoolean(trueStr, PrimitiveObjectInspectorFactory.javaStringObjectInspector)); + byte[] b1 = ("asd"+trueStr).getBytes(); + assertTrue(trueStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, trueStr.length())); } } }