This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 269dc5d HIVE-20295: Remove !isNumber check after failed constant interpretation (Ivan Suller via Zoltan Haindrich) 269dc5d is described below commit 269dc5dde1e8290da93f204f2bb951bd4af40098 Author: Ivan Suller <isul...@cloudera.com> AuthorDate: Wed Feb 6 10:48:21 2019 +0100 HIVE-20295: Remove !isNumber check after failed constant interpretation (Ivan Suller via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> --- .../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 31 +++-- .../hive/ql/parse/TestTypeCheckProcFactory.java | 146 +++++++++++++++++++++ .../results/clientpositive/infer_const_type.q.out | 13 +- .../clientpositive/llap/orc_llap_counters.q.out | 11 +- .../clientpositive/llap/orc_ppd_basic.q.out | 11 +- .../clientpositive/llap/vectorization_0.q.out | 16 +-- .../clientpositive/parquet_vectorization_0.q.out | 14 +- .../spark/parquet_vectorization_0.q.out | 14 +- .../clientpositive/spark/vectorization_0.q.out | 16 +-- 9 files changed, 214 insertions(+), 58 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index b49bb36..a2dd554 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -106,6 +106,7 @@ import org.apache.hive.common.util.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; @@ -1345,7 +1346,8 @@ public class TypeCheckProcFactory { return valueDesc; } - private static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) { + @VisibleForTesting + protected static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) { if (constChild instanceof ExprNodeConstantDesc) { // Try to narrow type of constant Object constVal = ((ExprNodeConstantDesc) constChild).getValue(); @@ -1373,32 +1375,36 @@ public class TypeCheckProcFactory { return colTypeInfo; } + private static BigDecimal toBigDecimal(String val) { + if (!NumberUtils.isNumber(val)) { + throw new NumberFormatException("The given string is not a valid number: " + val); + } + return new BigDecimal(val.replaceAll("[dDfFlL]$", "")); + } + private static Object interpretConstantAsPrimitive(PrimitiveTypeInfo colTypeInfo, Object constVal, TypeInfo constTypeInfo) { - String constTypeInfoName = constTypeInfo.getTypeName(); if (constVal instanceof Number || constVal instanceof String) { try { PrimitiveTypeEntry primitiveTypeEntry = colTypeInfo.getPrimitiveTypeEntry(); if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) { - return (new Integer(constVal.toString())); + return toBigDecimal(constVal.toString()).intValueExact(); } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) { - return (new Long(constVal.toString())); + return toBigDecimal(constVal.toString()).longValueExact(); } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { - return (new Double(constVal.toString())); + return Double.valueOf(constVal.toString()); } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { - return (new Float(constVal.toString())); + return Float.valueOf(constVal.toString()); } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) { - return (new Byte(constVal.toString())); + return toBigDecimal(constVal.toString()).byteValueExact(); } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) { - return (new Short(constVal.toString())); + return toBigDecimal(constVal.toString()).shortValueExact(); } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) { return HiveDecimal.create(constVal.toString()); } - } catch (NumberFormatException nfe) { + } catch (NumberFormatException | ArithmeticException nfe) { LOG.trace("Failed to narrow type of constant", nfe); - if (!NumberUtils.isNumber(constVal.toString())) { - return null; - } + return null; } } @@ -1419,6 +1425,7 @@ public class TypeCheckProcFactory { // if column type is char and constant type is string, then convert the constant to char // type with padded spaces. + String constTypeInfoName = constTypeInfo.getTypeName(); if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && colTypeInfo instanceof CharTypeInfo) { final String constValue = constVal.toString(); final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java new file mode 100644 index 0000000..66d024a --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.when; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +/** + * Parametrized test for the TypeCheckProcFactory. + * + */ +@RunWith(Parameterized.class) +public class TestTypeCheckProcFactory { + @Mock + private PrimitiveTypeInfo typeInfo; + @Mock + private ExprNodeConstantDesc nodeDesc; + + private DefaultExprProcessor testSubject; + + @Parameters(name = "{1}") + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] {{"127", PrimitiveObjectInspectorUtils.byteTypeEntry, (byte) 127, true}, + {"32767", PrimitiveObjectInspectorUtils.shortTypeEntry, (short) 32767, true}, + {"2147483647", PrimitiveObjectInspectorUtils.intTypeEntry, 2147483647, true}, + {"9223372036854775807", PrimitiveObjectInspectorUtils.longTypeEntry, 9223372036854775807L, true}, + {"111.1", PrimitiveObjectInspectorUtils.floatTypeEntry, 111.1f, false}, + {"111.1", PrimitiveObjectInspectorUtils.doubleTypeEntry, 111.1d, false}}); + } + + private final BigDecimal maxValue; + private final PrimitiveTypeEntry constType; + private final Object expectedValue; + private final boolean intType; + + public TestTypeCheckProcFactory(String maxValue, PrimitiveTypeEntry constType, Object expectedValue, + boolean intType) { + this.maxValue = new BigDecimal(maxValue); + this.constType = constType; + this.expectedValue = expectedValue; + this.intType = intType; + } + + @Before + public void init() { + MockitoAnnotations.initMocks(this); + testSubject = new DefaultExprProcessor(); + } + + public void testOneCase(Object constValue) { + when(nodeDesc.getValue()).thenReturn(constValue); + when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); + + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + + assertNotNull(result); + assertEquals(expectedValue, result.getValue()); + } + + public void testNullCase(Object constValue) { + when(nodeDesc.getValue()).thenReturn(constValue); + when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); + + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + + assertNull(result); + } + + @Test + public void testWithSring() { + testOneCase(maxValue.toString()); + } + + @Test + public void testWithLSuffix() { + if (intType) { + testOneCase(maxValue.toString() + "L"); + } + } + + @Test + public void testWithZeroFraction() { + if (intType) { + testOneCase(maxValue.toString() + ".0"); + } + } + + @Test + public void testWithFSuffix() { + testOneCase(maxValue.toString() + "f"); + } + + @Test + public void testWithDSuffix() { + testOneCase(maxValue.toString() + "D"); + } + + @Test + public void testOverflow() { + if (intType) { + testNullCase(maxValue.add(BigDecimal.valueOf(1L)).toString()); + } + } + + @Test + public void testWithNonZeroFraction() { + if (intType) { + testNullCase("100.1"); + } + } + +} diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out index b736f4b..bbdb5be 100644 --- a/ql/src/test/results/clientpositive/infer_const_type.q.out +++ b/ql/src/test/results/clientpositive/infer_const_type.q.out @@ -108,7 +108,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### 127 32767 12345 -12345 906.0 -307.0 1234 -WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -139,10 +138,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(bi) = 9.223372036854776E18D) or null) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(bi) = 9.223372036854776E18D) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or null) (type: boolean) + predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) @@ -163,7 +161,6 @@ STAGE PLANS: Processor Tree: ListSink -WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -208,10 +205,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) = 127.0D) or (CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D)) (type: boolean) + filterExpr: ((ti = 127Y) or (CAST( si AS decimal(5,0)) = 327) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D) or (UDFToDouble(ti) = 127.0D)) (type: boolean) + predicate: ((CAST( si AS decimal(5,0)) = 327) or (i = -100) or (ti = 127Y)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) @@ -271,10 +268,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) < 127.0D) and (UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D)) (type: boolean) + filterExpr: ((ti < 127Y) and (i > 100) and (UDFToDouble(str) = 1.57D)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D) and (UDFToDouble(ti) < 127.0D)) (type: boolean) + predicate: ((UDFToDouble(str) = 1.57D) and (i > 100) and (ti < 127Y)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index 5627490..dad4b1c 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -898,7 +898,7 @@ Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 - RECORDS_IN_Map_1: 0 + RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 0 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 @@ -908,12 +908,15 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 0 RECORDS_OUT_OPERATOR_SEL_9: 0 - RECORDS_OUT_OPERATOR_TS_0: 0 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 823 + CACHE_HIT_BYTES: 354 CACHE_MISS_BYTES: 0 METADATA_CACHE_HIT: 2 - SELECTED_ROWGROUPS: 0 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1 diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out index 42c2f5b..53c6cfd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out @@ -850,7 +850,7 @@ Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 - RECORDS_IN_Map_1: 0 + RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 0 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 @@ -860,12 +860,15 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_3: 0 RECORDS_OUT_OPERATOR_SEL_2: 0 - RECORDS_OUT_OPERATOR_TS_0: 0 + RECORDS_OUT_OPERATOR_TS_0: 2100 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 823 + CACHE_HIT_BYTES: 354 CACHE_MISS_BYTES: 0 METADATA_CACHE_HIT: 2 - SELECTED_ROWGROUPS: 0 + NUM_DECODED_BATCHES: 3 + NUM_VECTOR_BATCHES: 3 + ROWS_EMITTED: 2100 + SELECTED_ROWGROUPS: 3 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index ec0712b..4c7fe06 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1640,7 +1640,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean) + filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1649,8 +1649,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...] - predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...] + predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) @@ -1658,13 +1658,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 0, 15, 18] - selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double + projectedOutputColumnNums: [3, 4, 0, 18, 21] + selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1691,14 +1691,14 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 12 includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double, double, double, double, double] + scratchColumnTypeNames: [decimal(13,3), double, bigint, bigint, bigint, double, double, double, double] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index c83b6e6..e292490 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -1487,7 +1487,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean) + filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1495,8 +1495,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tiny [...] - predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = [...] + predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) @@ -1504,13 +1504,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 0, 15, 18] - selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double + projectedOutputColumnNums: [3, 4, 0, 18, 21] + selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1536,7 +1536,7 @@ STAGE PLANS: featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 1232957..738f19a 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -1480,7 +1480,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean) + filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1488,8 +1488,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...] - predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...] + predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) @@ -1497,13 +1497,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 0, 15, 18] - selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double + projectedOutputColumnNums: [3, 4, 0, 18, 21] + selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1528,7 +1528,7 @@ STAGE PLANS: featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index ac8675c..c782c13 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1621,7 +1621,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean) + filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1630,8 +1630,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...] - predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...] + predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) @@ -1639,13 +1639,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 0, 15, 18] - selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double + projectedOutputColumnNums: [3, 4, 0, 18, 21] + selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1671,14 +1671,14 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 12 includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double, double, double, double, double] + scratchColumnTypeNames: [decimal(13,3), double, bigint, bigint, bigint, double, double, double, double] Reducer 2 Execution mode: vectorized Reduce Vectorization: