DRILL-577: Two way implicit casts
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/cd4f281e Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/cd4f281e Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/cd4f281e Branch: refs/heads/master Commit: cd4f281e0c41707f79ccf3777d6245a13befeb18 Parents: f2ff2c9 Author: Mehant Baid <meha...@gmail.com> Authored: Wed May 7 12:25:39 2014 -0700 Committer: Jacques Nadeau <jacq...@apache.org> Committed: Wed May 7 18:43:18 2014 -0700 ---------------------------------------------------------------------- .../exec/expr/ExpressionTreeMaterializer.java | 10 ++- .../exec/resolver/ResolverTypePrecedence.java | 55 +++++++++++++- .../drill/exec/resolver/TypeCastRules.java | 45 +++++++++-- .../physical/impl/TestReverseImplicitCast.java | 78 ++++++++++++++++++++ .../functions/cast/two_way_implicit_cast.json | 36 +++++++++ 5 files changed, 217 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd4f281e/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java index a602d82..95d341b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java @@ -156,7 +156,15 @@ public class ExpressionTreeMaterializer { List<LogicalExpression> castArgs = Lists.newArrayList(); castArgs.add(call.args.get(i)); //input_expr - if (parmType.getMinorType().name().startsWith("DECIMAL")) { + if (!Types.isFixedWidthType(parmType)) { + + /* We are implicitly casting to VARCHAR so we don't have a max length, + * using an arbitrary value. We trim down the size of the stored bytes + * to the actual size so this size doesn't really matter. + */ + castArgs.add(new ValueExpressions.LongExpression(65536, null)); + } + else if (parmType.getMinorType().name().startsWith("DECIMAL")) { // Add the scale and precision to the arguments of the implicit cast castArgs.add(new ValueExpressions.LongExpression(currentArg.getMajorType().getPrecision(), null)); castArgs.add(new ValueExpressions.LongExpression(currentArg.getMajorType().getScale(), null)); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd4f281e/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java index f6d83e2..71bf616 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java @@ -19,7 +19,9 @@ package org.apache.drill.exec.resolver; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; @@ -27,7 +29,9 @@ import org.apache.drill.common.types.TypeProtos.MinorType; public class ResolverTypePrecedence { -public static final Map<MinorType, Integer> precedenceMap; + public static final Map<MinorType, Integer> precedenceMap; + public static final Map<MinorType, Set<MinorType>> secondaryImplicitCastRules; + public static int MAX_IMPLICIT_CAST_COST; static { /* The precedenceMap is used to decide whether it's allowed to implicitly "promote" @@ -74,6 +78,55 @@ public static final Map<MinorType, Integer> precedenceMap; precedenceMap.put(MinorType.INTERVALDAY, i+= 2); precedenceMap.put(MinorType.INTERVALYEAR, i+= 2); precedenceMap.put(MinorType.INTERVAL, i+= 2); + + MAX_IMPLICIT_CAST_COST = i; + + /* Currently implicit cast follows the precedence rules. + * It may be useful to perform an implicit cast in + * the opposite direction as specified by the precedence rules. + * + * For example: As per the precedence rules we can implicitly cast + * from VARCHAR ---> BIGINT , but based upon some functions (eg: substr, concat) + * it may be useful to implicitly cast from BIGINT ---> VARCHAR. + * + * To allow for such cases we have a secondary set of rules which will allow the reverse + * implicit casts. Currently we only allow the reverse implicit cast to VARCHAR so we don't + * need any cost associated with it, if we add more of these that may collide we can add costs. + */ + secondaryImplicitCastRules = new HashMap<>(); + HashSet<MinorType> rule = new HashSet<>(); + + // Following cast functions should exist + rule.add(MinorType.TINYINT); + rule.add(MinorType.SMALLINT); + rule.add(MinorType.INT); + rule.add(MinorType.BIGINT); + rule.add(MinorType.UINT1); + rule.add(MinorType.UINT2); + rule.add(MinorType.UINT4); + rule.add(MinorType.UINT8); + rule.add(MinorType.DECIMAL9); + rule.add(MinorType.DECIMAL18); + rule.add(MinorType.DECIMAL28SPARSE); + rule.add(MinorType.DECIMAL28DENSE); + rule.add(MinorType.DECIMAL38SPARSE); + rule.add(MinorType.DECIMAL38DENSE); + rule.add(MinorType.MONEY); + rule.add(MinorType.FLOAT4); + rule.add(MinorType.FLOAT8); + rule.add(MinorType.BIT); + rule.add(MinorType.FIXEDCHAR); + rule.add(MinorType.FIXED16CHAR); + rule.add(MinorType.VARCHAR); + rule.add(MinorType.DATE); + rule.add(MinorType.TIME); + rule.add(MinorType.TIMESTAMP); + rule.add(MinorType.TIMESTAMPTZ); + rule.add(MinorType.INTERVAL); + rule.add(MinorType.INTERVALYEAR); + rule.add(MinorType.INTERVALDAY); + + secondaryImplicitCastRules.put(MinorType.VARCHAR, rule); } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd4f281e/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java index 3aab08f..515843d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java @@ -778,6 +778,8 @@ public class TypeCastRules { (rules.get(to.getMinorType()) == null ? false : rules.get(to.getMinorType()).contains(from.getMinorType())); } + private static final int DATAMODE_CAST_COST = 1; + /* * code decide whether it's legal to do implicit cast. -1 : not allowed for * implicit cast > 0: cost associated with implicit cast. ==0: parms are @@ -789,7 +791,13 @@ public class TypeCastRules { if (call.args.size() != holder.getParamCount()) { return -1; } - + + // Indicates whether we used secondary cast rules + boolean secondaryCast = false; + + // number of arguments that could implicitly casts using precedence map or didn't require casting at all + int nCasts = 0; + for (int i = 0; i < holder.getParamCount(); i++) { MajorType argType = call.args.get(i).getMajorType(); MajorType parmType = holder.getParmMajorType(i); @@ -816,9 +824,18 @@ public class TypeCastRules { } if (parmVal - argVal < 0) { - return -1; + + /* Precedence rules does not allow to implicitly cast, however check + * if the seconday rules allow us to cast + */ + Set<MinorType> rules; + if ((rules = (ResolverTypePrecedence.secondaryImplicitCastRules.get(parmType.getMinorType()))) != null && + rules.contains(argType.getMinorType()) != false) { + secondaryCast = true; + } else { + return -1; + } } - // Check null vs non-null, using same logic as that in Types.softEqual() // Only when the function uses NULL_IF_NULL, nullable and non-nullable are inter-changable. // Otherwise, the function implementation is not a match. @@ -839,12 +856,30 @@ public class TypeCastRules { return -1; } else if (parmType.getMode() == DataMode.OPTIONAL && argType.getMode() == DataMode.REQUIRED) { - cost++; + cost+= DATAMODE_CAST_COST; } } } - cost += (parmVal - argVal); + int castCost; + + if ((castCost = (parmVal - argVal)) >= 0) { + nCasts++; + cost += castCost; + } + } + + if (secondaryCast) { + // We have a secondary cast for one or more of the arguments, determine the cost associated + int secondaryCastCost = Integer.MAX_VALUE - 1; + + // Subtract maximum possible implicit costs from the secondary cast cost + secondaryCastCost -= (nCasts * (ResolverTypePrecedence.MAX_IMPLICIT_CAST_COST + DATAMODE_CAST_COST)); + + // Add cost of implicitly casting the rest of the arguments that didn't use secondary casting + secondaryCastCost += cost; + + return secondaryCastCost; } return cost; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd4f281e/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestReverseImplicitCast.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestReverseImplicitCast.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestReverseImplicitCast.java new file mode 100644 index 0000000..a0b77a5 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestReverseImplicitCast.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import mockit.Injectable; +import org.apache.drill.common.util.FileUtils; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.pop.PopUnitTestBase; +import org.apache.drill.exec.proto.UserProtos; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.rpc.user.QueryResultBatch; +import org.apache.drill.exec.rpc.user.UserServer; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.DrillbitContext; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.vector.ValueVector; +import org.junit.Test; + +import java.util.Iterator; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestReverseImplicitCast extends PopUnitTestBase { + + @Test + public void twoWayCast(@Injectable final DrillbitContext bitContext, + @Injectable UserServer.UserClientConnection connection) throws Throwable { + + // Function checks for casting from Float, Double to Decimal data types + try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet(); + Drillbit bit = new Drillbit(CONFIG, serviceSet); + DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) { + + // run query. + bit.run(); + client.connect(); + List<QueryResultBatch> results = client.runQuery(UserProtos.QueryType.PHYSICAL, + Files.toString(FileUtils.getResourceAsFile("/functions/cast/two_way_implicit_cast.json"), Charsets.UTF_8)); + + RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator()); + + QueryResultBatch batch = results.get(0); + assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData())); + + Iterator<VectorWrapper<?>> itr = batchLoader.iterator(); + + ValueVector.Accessor intAccessor1 = itr.next().getValueVector().getAccessor(); + ValueVector.Accessor varcharAccessor1 = itr.next().getValueVector().getAccessor(); + + for (int i = 0; i < intAccessor1.getValueCount(); i++) { + System.out.println(intAccessor1.getObject(i)); + assertEquals(intAccessor1.getObject(i), 10); + System.out.println(varcharAccessor1.getObject(i)); + assertEquals(varcharAccessor1.getObject(i), "101"); + } + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd4f281e/exec/java-exec/src/test/resources/functions/cast/two_way_implicit_cast.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/functions/cast/two_way_implicit_cast.json b/exec/java-exec/src/test/resources/functions/cast/two_way_implicit_cast.json new file mode 100644 index 0000000..31cf541 --- /dev/null +++ b/exec/java-exec/src/test/resources/functions/cast/two_way_implicit_cast.json @@ -0,0 +1,36 @@ +{ + head:{ + type:"APACHE_DRILL_PHYSICAL", + version:"1", + generator:{ + type:"manual" + } + }, + graph:[ + { + @id:1, + pop:"mock-scan", + url: "http://apache.org", + entries:[ + {records: 1, types: [ + {name: "col1", type: "FLOAT4", mode: "REQUIRED"}, + {name: "col2", type: "FLOAT8", mode: "REQUIRED"} + ]} + ] + }, + { + @id:2, + child: 1, + pop:"project", + exprs: [ + {ref: "str_to_int_cast", expr:"8 + '2'" }, + {ref: "int_to_str_cast", expr:"substr(10123, 1, 3)" } + ] + }, + { + @id: 3, + child: 2, + pop: "screen" + } + ] +}