This is an automated email from the ASF dual-hosted git repository. volodymyr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 017cd95f5a104ec84552d07aea14b3358fca77ea Author: Volodymyr Vysotskyi <[email protected]> AuthorDate: Mon Mar 2 14:03:01 2020 +0200 DRILL-7615: UNION ALL query returns the wrong result for the decimal value closes #2006 --- .../java/org/apache/drill/common/types/Types.java | 55 ++++++++++++++--- .../drill/exec/physical/impl/join/JoinUtils.java | 2 +- .../physical/impl/union/UnionAllRecordBatch.java | 70 +++++++++++++++------ .../exec/store/parquet/TestVarlenDecimal.java | 71 ++++++++++++++++++---- .../drill/exec/record/MaterializedField.java | 10 --- 5 files changed, 159 insertions(+), 49 deletions(-) diff --git a/common/src/main/java/org/apache/drill/common/types/Types.java b/common/src/main/java/org/apache/drill/common/types/Types.java index 8660989..596199a 100644 --- a/common/src/main/java/org/apache/drill/common/types/Types.java +++ b/common/src/main/java/org/apache/drill/common/types/Types.java @@ -20,6 +20,7 @@ package org.apache.drill.common.types; import static org.apache.drill.common.types.TypeProtos.DataMode.REPEATED; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -132,6 +133,17 @@ public class Types { } /** + * Returns true if all specified types are decimal data types. + * + * @param types types to check + * @return true if all specified types are decimal data type. + */ + public static boolean areDecimalTypes(MinorType... types) { + return Arrays.stream(types) + .allMatch(Types::isDecimalType); + } + + /** * Returns true if specified type is decimal data type. * * @param type type to check @@ -795,15 +807,29 @@ public class Types { */ public static MajorType.Builder calculateTypePrecisionAndScale(MajorType leftType, MajorType rightType, MajorType.Builder typeBuilder) { if (leftType.getMinorType().equals(rightType.getMinorType())) { - final boolean isScalarString = Types.isScalarStringType(leftType) && Types.isScalarStringType(rightType); - final boolean isDecimal = isDecimalType(leftType); + boolean isScalarString = Types.isScalarStringType(leftType) && Types.isScalarStringType(rightType); + boolean isDecimal = isDecimalType(leftType); - if ((isScalarString || isDecimal) && leftType.hasPrecision() && rightType.hasPrecision()) { + if (isScalarString && leftType.hasPrecision() && rightType.hasPrecision()) { typeBuilder.setPrecision(Math.max(leftType.getPrecision(), rightType.getPrecision())); } - if (isDecimal && leftType.hasScale() && rightType.hasScale()) { - typeBuilder.setScale(Math.max(leftType.getScale(), rightType.getScale())); + if (isDecimal) { + int scale = Math.max(leftType.getScale(), rightType.getScale()); + // resulting precision should take into account resulting scale value and be calculated as + // sum of two components: + // - max integer digits number (precision - scale) for left and right; + // - resulting scale. + // So for the case of cast(9999 as decimal(4,0)) and cast(1.23 as decimal(3,2)) + // resulting scale would be Max(0, 2) = 2 and resulting precision + // would be Max(4 - 0, 3 - 2) + 2 = 6. + // In this case, both values would fit into decimal(6, 2): 9999.00, 1.23 + int leftNumberOfDigits = leftType.getPrecision() - leftType.getScale(); + int rightNumberOfDigits = rightType.getPrecision() - rightType.getScale(); + int precision = Math.max(leftNumberOfDigits, rightNumberOfDigits) + scale; + + typeBuilder.setPrecision(precision); + typeBuilder.setScale(scale); } } return typeBuilder; @@ -815,18 +841,31 @@ public class Types { * * @param type1 first type * @param type2 second type - * @return true if the two types are are the same minor type, mode, + * @return true if the two types have the same minor type, mode, * precision and scale */ public static boolean isSameType(MajorType type1, MajorType type2) { - return type1.getMinorType() == type2.getMinorType() && - type1.getMode() == type2.getMode() && + return isSameTypeAndMode(type1, type2) && type1.getScale() == type2.getScale() && type1.getPrecision() == type2.getPrecision(); } /** + * Check if two "core" types have the same minor type and data mode, + * ignoring subtypes and children. Primarily for non-complex types. + * + * @param first first type to check + * @param second second type to check + * @return {@code true} if the two types have the same minor type and mode, + * {@code false} otherwise + */ + public static boolean isSameTypeAndMode(MajorType first, MajorType second) { + return first.getMinorType() == second.getMinorType() + && first.getMode() == second.getMode(); + } + + /** * Requires full type equality, including fields such as precision and scale. * But, unset fields are equivalent to 0. Can't use the protobuf-provided * isEquals() which treats set and unset fields as different. diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java index 90e8558..f3e8dc0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java @@ -159,7 +159,7 @@ public class JoinUtils { // or both of them are decimal if (TypeCastRules.isNumericType(input1) && TypeCastRules.isNumericType(input2) && ((!Types.isDecimalType(input1) && !Types.isDecimalType(input2)) - || Types.isDecimalType(input1) && Types.isDecimalType(input2))) { + || Types.areDecimalTypes(input1, input2))) { return true; } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java index 2039e79..ce68fa4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java @@ -30,6 +30,7 @@ import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.exception.OutOfMemoryException; @@ -175,7 +176,7 @@ public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> { transfers.clear(); allocationVectors.clear(); - final ClassGenerator<UnionAller> cg = CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getOptions()); + ClassGenerator<UnionAller> cg = CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getOptions()); cg.getCodeGenerator().plainJavaCapable(true); // cg.getCodeGenerator().saveCodeForDebugging(true); @@ -184,38 +185,37 @@ public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> { ValueVector vvIn = vw.getValueVector(); ValueVector vvOut = container.getValueVector(index).getValueVector(); - final ErrorCollector collector = new ErrorCollectorImpl(); - // According to input data names, Minortypes, Datamodes, choose to + MaterializedField inField = vvIn.getField(); + MaterializedField outputField = vvOut.getField(); + + ErrorCollector collector = new ErrorCollectorImpl(); + // According to input data names, MinorTypes, DataModes, choose to // transfer directly, // rename columns or - // cast data types (Minortype or DataMode) - if (container.getSchema().getColumn(index).hasSameTypeAndMode(vvIn.getField()) - && vvIn.getField().getType().getMinorType() != TypeProtos.MinorType.MAP // Per DRILL-5521, existing bug for map transfer - ) { + // cast data types (MinorType or DataMode) + if (areAssignableTypes(inField.getType(), outputField.getType())) { // Transfer column TransferPair tp = vvIn.makeTransferPair(vvOut); transfers.add(tp); - } else if (vvIn.getField().getType().getMinorType() == TypeProtos.MinorType.NULL) { + } else if (inField.getType().getMinorType() == TypeProtos.MinorType.NULL) { continue; } else { // Copy data in order to rename the column - SchemaPath inputPath = SchemaPath.getSimplePath(vvIn.getField().getName()); - MaterializedField inField = vvIn.getField(); - MaterializedField outputField = vvOut.getField(); + SchemaPath inputPath = SchemaPath.getSimplePath(inField.getName()); LogicalExpression expr = ExpressionTreeMaterializer.materialize(inputPath, inputBatch, collector, context.getFunctionRegistry()); collector.reportErrors(logger); // If the inputs' DataMode is required and the outputs' DataMode is not required // cast to the one with the least restriction - if(inField.getType().getMode() == TypeProtos.DataMode.REQUIRED + if (inField.getType().getMode() == TypeProtos.DataMode.REQUIRED && outputField.getType().getMode() != TypeProtos.DataMode.REQUIRED) { expr = ExpressionTreeMaterializer.convertToNullableType(expr, inField.getType().getMinorType(), context.getFunctionRegistry(), collector); collector.reportErrors(logger); } - // If two inputs' MinorTypes are different, - // Insert a cast before the Union operation - if(inField.getType().getMinorType() != outputField.getType().getMinorType()) { + // If two inputs' MinorTypes are different or types are decimal with different scales, + // inserts a cast before the Union operation + if (isCastRequired(inField.getType(), outputField.getType())) { expr = ExpressionTreeMaterializer.addCastExpression(expr, outputField.getType(), context.getFunctionRegistry(), collector); collector.reportErrors(logger); } @@ -239,6 +239,38 @@ public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> { } } + /** + * Checks whether cast should be added for transitioning values from the one type to another one. + * {@code true} will be returned if minor types differ or scales differ for the decimal data type. + * + * @param first first type + * @param second second type + * @return {@code true} if cast should be added, {@code false} otherwise + */ + private boolean isCastRequired(MajorType first, MajorType second) { + return first.getMinorType() != second.getMinorType() + || (Types.areDecimalTypes(second.getMinorType(), first.getMinorType()) + && second.getScale() != first.getScale()); + } + + /** + * Checks whether data may be transitioned between specified types without using casts. + * {@code true} will be returned if minor types and data modes are the same for non-decimal data types, + * or if minor types, data modes and scales are the same for decimal data types. + * + * @param first first type + * @param second second type + * @return {@code true} if data may be transitioned between specified types without using casts, + * {@code false} otherwise + */ + private boolean areAssignableTypes(MajorType first, MajorType second) { + boolean areDecimalTypes = Types.areDecimalTypes(first.getMinorType(), second.getMinorType()); + + return Types.isSameTypeAndMode(first, second) + && second.getMinorType() != TypeProtos.MinorType.MAP // Per DRILL-5521, existing bug for map transfer + && (!areDecimalTypes || first.getScale() == second.getScale()); // scale should match for decimal data types + } + // The output table's column names always follow the left table, // where the output type is chosen based on DRILL's implicit casting rules private void inferOutputFieldsBothSide(final BatchSchema leftSchema, final BatchSchema rightSchema) { @@ -250,8 +282,10 @@ public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> { MaterializedField leftField = leftIter.next(); MaterializedField rightField = rightIter.next(); - if (leftField.hasSameTypeAndMode(rightField)) { - TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder().setMinorType(leftField.getType().getMinorType()).setMode(leftField.getDataMode()); + if (Types.isSameTypeAndMode(leftField.getType(), rightField.getType())) { + MajorType.Builder builder = MajorType.newBuilder() + .setMinorType(leftField.getType().getMinorType()) + .setMode(leftField.getDataMode()); builder = Types.calculateTypePrecisionAndScale(leftField.getType(), rightField.getType(), builder); container.addOrGet(MaterializedField.create(leftField.getName(), builder.build()), callBack); } else if (Types.isUntypedNull(rightField.getType())) { @@ -261,7 +295,7 @@ public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> { } else { // If the output type is not the same, // cast the column of one of the table to a data type which is the Least Restrictive - TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder(); + MajorType.Builder builder = MajorType.newBuilder(); if (leftField.getType().getMinorType() == rightField.getType().getMinorType()) { builder.setMinorType(leftField.getType().getMinorType()); builder = Types.calculateTypePrecisionAndScale(leftField.getType(), rightField.getType(), builder); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java index 072fa98..1afd3b3 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestVarlenDecimal.java @@ -18,11 +18,18 @@ package org.apache.drill.exec.store.parquet; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.categories.ParquetTest; import org.apache.drill.categories.UnlikelyTest; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.common.types.Types; import org.apache.drill.exec.ExecConstants; -import org.apache.drill.test.BaseTestQuery; import org.apache.drill.exec.planner.physical.PlannerSettings; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.BeforeClass; @@ -32,18 +39,25 @@ import org.junit.experimental.categories.Category; import java.math.BigDecimal; import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; @Category({ParquetTest.class, UnlikelyTest.class}) -public class TestVarlenDecimal extends BaseTestQuery { +public class TestVarlenDecimal extends ClusterTest { + + @BeforeClass + public static void setUp() throws Exception { + startCluster(ClusterFixture.builder(dirTestWatcher)); + } @BeforeClass public static void enableDecimalDataType() { - setSessionOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY, true); + client.alterSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY, true); } @AfterClass public static void disableDecimalDataType() { - resetSessionOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY); + client.resetSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY); } private static final String DATAFILE = "cp.`parquet/varlenDecimal.parquet`"; @@ -89,11 +103,11 @@ public class TestVarlenDecimal extends BaseTestQuery { String tableName = "jsonWithDecimals"; try { - alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "json"); + client.alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "json"); String bigDecimalValue = "987654321987654321987654321.987654321"; - test( + run( "create table dfs.tmp.%s as\n" + "select cast('%s' as decimal(36, 9)) dec36", tableName, bigDecimalValue); @@ -110,8 +124,8 @@ public class TestVarlenDecimal extends BaseTestQuery { .baselineValues(new BigDecimal(bigDecimalValue).doubleValue()) .go(); } finally { - resetSessionOption(ExecConstants.OUTPUT_FORMAT_OPTION); - test("drop table if exists dfs.tmp.%s", tableName); + client.resetSession(ExecConstants.OUTPUT_FORMAT_OPTION); + run("drop table if exists dfs.tmp.%s", tableName); } } @@ -119,11 +133,11 @@ public class TestVarlenDecimal extends BaseTestQuery { public void testWriteReadCsv() throws Exception { String tableName = "csvWithDecimals"; try { - alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "csvh"); + client.alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "csvh"); String bigDecimalValue = "987654321987654321987654321.987654321"; - test( + run( "create table dfs.tmp.%s as\n" + "select cast('%s' as decimal(36, 9)) dec36", tableName, bigDecimalValue); @@ -139,8 +153,41 @@ public class TestVarlenDecimal extends BaseTestQuery { .baselineValues(new BigDecimal(bigDecimalValue)) .go(); } finally { - resetSessionOption(ExecConstants.OUTPUT_FORMAT_OPTION); - test("drop table if exists dfs.tmp.%s", tableName); + client.resetSession(ExecConstants.OUTPUT_FORMAT_OPTION); + run("drop table if exists dfs.tmp.%s", tableName); + } + } + + @Test + public void testUnionAllWithDifferentScales() throws Exception { + try { + run("create table dfs.tmp.t as select cast(999999999999999 as decimal(15,0)) as d"); + + String query = "select cast(1000 as decimal(10,1)) as d\n" + + "union all \n" + + "select 596.000 as d \n" + + "union all \n" + + "select d from dfs.tmp.t"; + + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("d") + .baselineValues(new BigDecimal("1000.000")) + .baselineValues(new BigDecimal("596.000")) + .baselineValues(new BigDecimal("999999999999999.000")) + .go(); + + List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = Collections.singletonList(Pair.of( + SchemaPath.getSimplePath("d"), + Types.withPrecisionAndScale(MinorType.VARDECIMAL, DataMode.REQUIRED, 18, 3))); + + testBuilder() + .sqlQuery(query) + .schemaBaseLine(expectedSchema) + .go(); + } finally { + run("drop table if exists dfs.tmp.t"); } } } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java index bb734de..4e7b298 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java @@ -451,16 +451,6 @@ public class MaterializedField { return toString(true); } - /** - * Return true if two fields have identical MinorType and Mode. - * @param that - * @return - */ - public boolean hasSameTypeAndMode(MaterializedField that) { - return (getType().getMinorType() == that.getType().getMinorType()) - && (getType().getMode() == that.getType().getMode()); - } - private String toString(Collection<?> collection, int maxLen) { final StringBuilder builder = new StringBuilder(); int i = 0;
