Repository: drill Updated Branches: refs/heads/master 68aa81f47 -> a2fc406c0
DRILL-2862: Convert_to/Convert_From throw assertion when an incorrect encoding type is specified Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a2fc406c Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a2fc406c Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a2fc406c Branch: refs/heads/master Commit: a2fc406c06aa6c553dc449dd904cef5400675b9c Parents: 68aa81f Author: Parth Chandra <par...@apache.org> Authored: Mon Jun 22 14:01:59 2015 -0700 Committer: Parth Chandra <par...@apache.org> Committed: Fri Jul 10 14:22:46 2015 -0700 ---------------------------------------------------------------------- .../planner/logical/PreProcessLogicalRel.java | 72 ++++++++++++++++- .../exec/util/ApproximateStringMatcher.java | 83 ++++++++++++++++++++ .../exec/util/TestApproximateStringMatcher.java | 44 +++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/a2fc406c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/PreProcessLogicalRel.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/PreProcessLogicalRel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/PreProcessLogicalRel.java index 0f8e45a..8d4c1b4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/PreProcessLogicalRel.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/PreProcessLogicalRel.java @@ -18,11 +18,14 @@ package org.apache.drill.exec.planner.logical; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import org.apache.drill.common.exceptions.UserException; import org.apache.drill.exec.exception.UnsupportedOperatorCollector; import org.apache.drill.exec.planner.StarColumnHelper; import org.apache.drill.exec.planner.sql.DrillOperatorTable; +import org.apache.drill.exec.util.ApproximateStringMatcher; import org.apache.drill.exec.work.foreman.SqlUnsupportedException; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.logical.LogicalAggregate; @@ -51,6 +54,9 @@ import org.apache.calcite.util.NlsString; * output type and we will fire/ ignore certain rules (merge project rule) based on this fact. */ public class PreProcessLogicalRel extends RelShuttleImpl { + + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PreProcessLogicalRel.class); + private RelDataTypeFactory factory; private DrillOperatorTable table; private UnsupportedOperatorCollector unsupportedOperatorCollector; @@ -94,8 +100,26 @@ public class PreProcessLogicalRel extends RelShuttleImpl { // check if its a convert_from or convert_to function if (functionName.equalsIgnoreCase("convert_from") || functionName.equalsIgnoreCase("convert_to")) { - assert nArgs == 2 && function.getOperands().get(1) instanceof RexLiteral; - String literal = ((NlsString) (((RexLiteral) function.getOperands().get(1)).getValue())).getValue(); + String literal; + if (nArgs == 2) { + if (function.getOperands().get(1) instanceof RexLiteral) { + try { + literal = ((NlsString) (((RexLiteral) function.getOperands().get(1)).getValue())).getValue(); + } catch (final ClassCastException e) { + // Caused by user entering a value with a non-string literal + throw getConvertFunctionInvalidTypeException(function); + } + } else { + // caused by user entering a non-literal + throw getConvertFunctionInvalidTypeException(function); + } + } else { + // Second operand is missing + throw UserException.parseError() + .message("'%s' expects a string literal as a second argument.", functionName) + .build(logger); + } + RexBuilder builder = new RexBuilder(factory); // construct the new function name based on the input argument @@ -103,7 +127,10 @@ public class PreProcessLogicalRel extends RelShuttleImpl { // Look up the new function name in the drill operator table List<SqlOperator> operatorList = table.getSqlOperator(newFunctionName); - assert operatorList.size() > 0; + if (operatorList.size() == 0) { + // User typed in an invalid type name + throw getConvertFunctionException(functionName, literal); + } SqlFunction newFunction = null; // Find the SqlFunction with the correct args @@ -113,7 +140,10 @@ public class PreProcessLogicalRel extends RelShuttleImpl { break; } } - assert newFunction != null; + if (newFunction == null) { + // we are here because we found some dummy convert function. (See DummyConvertFrom and DummyConvertTo) + throw getConvertFunctionException(functionName, literal); + } // create the new expression to be used in the rewritten project newExpr = builder.makeCall(newFunction, function.getOperands().subList(0, 1)); @@ -147,6 +177,40 @@ public class PreProcessLogicalRel extends RelShuttleImpl { return visitChildren(union); } + private UserException getConvertFunctionInvalidTypeException(final RexCall function) { + // Caused by user entering a value with a numeric type + final String functionName = function.getOperator().getName(); + final String typeName = function.getOperands().get(1).getType().getFullTypeString(); + return UserException.parseError() + .message("Invalid type %s passed as second argument to function '%s'. " + + "The function expects a literal argument.", + typeName, + functionName) + .build(logger); + } + + private UserException getConvertFunctionException(final String functionName, final String typeName) { + final String newFunctionName = functionName + typeName; + final String typeNameToPrint = typeName.length()==0 ? "<empty_string>" : typeName; + final UserException.Builder exceptionBuilder = UserException.unsupportedError() + .message("%s does not support conversion %s type '%s'.", functionName, functionName.substring(8).toLowerCase(), typeNameToPrint); + // Build a nice error message + if (typeName.length()>0) { + List<String> ops = new ArrayList<>(); + for (SqlOperator op : table.getOperatorList()) { + ops.add(op.getName()); + } + final String bestMatch = ApproximateStringMatcher.getBestMatch(ops, newFunctionName); + if (bestMatch != null && bestMatch.length() > 0 && bestMatch.toLowerCase().startsWith("convert")) { + final StringBuilder s = new StringBuilder("Did you mean ") + .append(bestMatch.substring(functionName.length())) + .append("?"); + exceptionBuilder.addContext(s.toString()); + } + } + return exceptionBuilder.build(logger); + } + public void convertException() throws SqlUnsupportedException { unsupportedOperatorCollector.convertException(); } http://git-wip-us.apache.org/repos/asf/drill/blob/a2fc406c/exec/java-exec/src/main/java/org/apache/drill/exec/util/ApproximateStringMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/util/ApproximateStringMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/util/ApproximateStringMatcher.java new file mode 100644 index 0000000..9650bcc --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/util/ApproximateStringMatcher.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ApproximateStringMatcher { + // From https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance. + // This function is not performant and should only be used for small lists. It is useful to + // detect typos in queries entered by the user but not appropriate to do approximate string matching + // on large data sets + private static int LevenshteinDistance(final String s0, final String s1) { + final int len0 = s0.length() + 1; + final int len1 = s1.length() + 1; + + // the array of distances + int[] cost = new int[len0]; + int[] newcost = new int[len0]; + + // initial cost of skipping prefix in String s0 + for (int i = 0; i < len0; i++) { + cost[i] = i; + } + + // dynamically computing the array of distances + + // transformation cost for each letter in s1 + for (int j = 1; j < len1; j++) { + // initial cost of skipping prefix in String s1 + newcost[0] = j; + + // transformation cost for each letter in s0 + for (int i = 1; i < len0; i++) { + // matching current letters in both strings + final int match = (s0.charAt(i - 1) == s1.charAt(j - 1)) ? 0 : 1; + + // computing cost for each transformation + int cost_replace = cost[i - 1] + match; + int cost_insert = cost[i] + 1; + int cost_delete = newcost[i - 1] + 1; + + // keep minimum cost + newcost[i] = Math.min(Math.min(cost_insert, cost_delete), cost_replace); + } + + // swap cost/newcost arrays + final int[] swap = cost; + cost = newcost; + newcost = swap; + } + + // the distance is the cost for transforming all letters in both strings + return cost[len0 - 1]; + } + + public static String getBestMatch(final List<String> namesToSearch, final String nameToMatch) { + final List<Integer> editDistances = new ArrayList<>(); + for (final String name : namesToSearch) { + final int dist = ApproximateStringMatcher.LevenshteinDistance(nameToMatch, name); + editDistances.add(dist); + } + final int minIndex = editDistances.indexOf(Collections.min(editDistances)); + final String bestMatch = namesToSearch.get(minIndex); + return bestMatch; + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/a2fc406c/exec/java-exec/src/test/java/org/apache/drill/exec/util/TestApproximateStringMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/util/TestApproximateStringMatcher.java b/exec/java-exec/src/test/java/org/apache/drill/exec/util/TestApproximateStringMatcher.java new file mode 100644 index 0000000..930a30b --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/util/TestApproximateStringMatcher.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.util; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class TestApproximateStringMatcher { + @Test + public void testStringMatcher() { + List<String> names = new ArrayList<>(); + names.add("a"); + names.add("little"); + names.add("sql"); + names.add("for"); + names.add("your"); + names.add("nosql"); + + String name = "squeal"; + String bestMatch = ApproximateStringMatcher.getBestMatch(names, name); + + assertEquals("sql", bestMatch); + } +} +