DRILL-6154: NaN, Infinity issues - changed comparison rules for NaN, Infinity values. For now NaN is the biggest value, Infinity - second biggest value - fixed min, max, trunc functions for NaN, Infinity values - made drill use original sqrt function instead of power(x,0.5) substitution
close apache/drill#1123 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/bcd358b3 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/bcd358b3 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/bcd358b3 Branch: refs/heads/master Commit: bcd358b3c73f34381b543789dfc368bfaefa4ea9 Parents: b4d2a77 Author: vladimir tkach <vovatkac...@gmail.com> Authored: Fri Feb 16 16:36:49 2018 +0200 Committer: Aman Sinha <asi...@maprtech.com> Committed: Fri Feb 23 14:18:15 2018 -0800 ---------------------------------------------------------------------- .../codegen/templates/AggrTypeFunctions1.java | 23 ++++++-- .../codegen/templates/ComparisonFunctions.java | 13 ++++- .../main/codegen/templates/MathFunctions.java | 16 ++++-- .../exec/planner/sql/DrillConvertletTable.java | 15 ++++++ .../org/apache/drill/TestExampleQueries.java | 1 - .../fn/impl/TestMathFunctionsWithNanInf.java | 22 +++++--- .../vector/complex/writer/TestJsonNanInf.java | 57 ++++++++++++++++++++ 7 files changed, 129 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java b/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java index b363cd1..fa4e8f1 100644 --- a/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java +++ b/exec/java-exec/src/main/codegen/templates/AggrTypeFunctions1.java @@ -71,9 +71,9 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu <#elseif type.runningType?starts_with("BigInt")> value.value = Long.MAX_VALUE; <#elseif type.runningType?starts_with("Float4")> - value.value = Float.MAX_VALUE; + value.value = Float.NaN; <#elseif type.runningType?starts_with("Float8")> - value.value = Double.MAX_VALUE; + value.value = Double.NaN; </#if> <#elseif aggrtype.funcName == "max"> <#if type.runningType?starts_with("Bit")> @@ -101,8 +101,21 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu } </#if> nonNullCount.value = 1; + // For min/max functions: NaN is the biggest value, + // Infinity is the second biggest value + // -Infinity is the smallest value <#if aggrtype.funcName == "min"> - value.value = Math.min(value.value, in.value); + <#if type.inputType?contains("Float4")> + if(!Float.isNaN(in.value)) { + value.value = Float.isNaN(value.value) ? in.value : Math.min(value.value, in.value); + } + <#elseif type.inputType?contains("Float8")> + if(!Double.isNaN(in.value)) { + value.value = Double.isNaN(value.value) ? in.value : Math.min(value.value, in.value); + } + <#else> + value.value = Math.min(value.value, in.value); + </#if> <#elseif aggrtype.funcName == "max"> value.value = Math.max(value.value, in.value); <#elseif aggrtype.funcName == "sum"> @@ -138,9 +151,9 @@ public static class ${type.inputType}${aggrtype.className} implements DrillAggFu <#elseif type.runningType?starts_with("BigInt")> value.value = Long.MAX_VALUE; <#elseif type.runningType?starts_with("Float4")> - value.value = Float.MAX_VALUE; + value.value = Float.NaN; <#elseif type.runningType?starts_with("Float8")> - value.value = Double.MAX_VALUE; + value.value = Double.NaN; </#if> <#elseif aggrtype.funcName == "max"> <#if type.runningType?starts_with("Int")> http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java index 633bb56..12a4ef2 100644 --- a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java +++ b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java @@ -118,9 +118,18 @@ { <@compareNullsSubblock leftType=leftType rightType=rightType output="out.value" breakTarget="outside" nullCompare=true nullComparesHigh=nullComparesHigh /> - + // NaN is the biggest possible value, and NaN == NaN <#if mode == "primitive"> - ${output} = left.value < right.value ? -1 : (left.value == right.value ? 0 : 1); + if(Double.isNaN(left.value) && Double.isNaN(right.value)) { + ${output} = 0; + } else if (!Double.isNaN(left.value) && Double.isNaN(right.value)) { + ${output} = -1; + } else if (Double.isNaN(left.value) && !Double.isNaN(right.value)) { + ${output} = 1; + } else { + ${output} = left.value < right.value ? -1 : (left.value == right.value ? 0 : 1); + } + <#elseif mode == "varString"> ${output} = org.apache.drill.exec.expr.fn.impl.ByteFunctionHelpers.compare( left.buffer, left.start, left.end, right.buffer, right.start, right.end ); http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/codegen/templates/MathFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/MathFunctions.java b/exec/java-exec/src/main/codegen/templates/MathFunctions.java index 548c10e..0745aee 100644 --- a/exec/java-exec/src/main/codegen/templates/MathFunctions.java +++ b/exec/java-exec/src/main/codegen/templates/MathFunctions.java @@ -67,7 +67,11 @@ public class GMathFunctions{ public static class ${func.className}${type.input} implements DrillSimpleFunc { @Param ${type.input}Holder in; + <#if func.funcName == 'sqrt'> + @Output Float8Holder out; + <#else> @Output ${type.outputType}Holder out; + </#if> public void setup() { } @@ -76,12 +80,16 @@ public class GMathFunctions{ <#if func.funcName=='trunc'> <#if type.roundingRequired ??> - java.math.BigDecimal bd = java.math.BigDecimal.valueOf(in.value).setScale(0, java.math.BigDecimal.ROUND_DOWN); - out.value = <#if type.extraCast ??>(${type.extraCast})</#if>bd.${type.castType}Value(); + if (Double.isInfinite(in.value) || Double.isNaN(in.value)){ + out.value = <#if type.input='Float8'> Double.NaN <#else> Float.NaN </#if>; + } else { + java.math.BigDecimal bd = java.math.BigDecimal.valueOf(in.value).setScale(0, java.math.BigDecimal.ROUND_DOWN); + out.value = <#if type.extraCast ??>(${type.extraCast})</#if>bd.${type.castType}Value(); + } <#else> - out.value = (${type.castType}) (in.value);</#if> + out.value = <#if func.funcName!='sqrt'>(${type.castType})</#if> (in.value);</#if> <#else> - out.value = (${type.castType}) ${func.javaFunc}(in.value); + out.value = <#if func.funcName!='sqrt'>(${type.castType})</#if> ${func.javaFunc}(in.value); </#if> } } http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java index 7b66d12..bacf5c2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/DrillConvertletTable.java @@ -19,11 +19,17 @@ package org.apache.drill.exec.planner.sql; import java.util.HashMap; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql2rel.SqlRexContext; import org.apache.calcite.sql2rel.SqlRexConvertlet; import org.apache.calcite.sql2rel.SqlRexConvertletTable; import org.apache.calcite.sql2rel.StandardConvertletTable; @@ -34,10 +40,19 @@ public class DrillConvertletTable implements SqlRexConvertletTable{ public static HashMap<SqlOperator, SqlRexConvertlet> map = new HashMap<>(); public static SqlRexConvertletTable INSTANCE = new DrillConvertletTable(); + private static SqlRexConvertlet sqrtConvertlet = new SqlRexConvertlet() { + public RexNode convertCall(SqlRexContext cx, SqlCall call) { + RexNode operand = cx.convertExpression(call.operand(0)); + return cx.getRexBuilder().makeCall(SqlStdOperatorTable.SQRT, operand); + } + }; static { // Use custom convertlet for extract function map.put(SqlStdOperatorTable.EXTRACT, DrillExtractConvertlet.INSTANCE); + // sqrt needs it's own convertlet because calcite overrides it to power(x,0.5) + // which is not suitable for Infinity value case + map.put(SqlStdOperatorTable.SQRT, sqrtConvertlet); map.put(SqlStdOperatorTable.AVG, new DrillAvgVarianceConvertlet(SqlKind.AVG)); map.put(SqlStdOperatorTable.STDDEV_POP, new DrillAvgVarianceConvertlet(SqlKind.STDDEV_POP)); map.put(SqlStdOperatorTable.STDDEV_SAMP, new DrillAvgVarianceConvertlet(SqlKind.STDDEV_SAMP)); http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java index 7de64c9..732884c 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java @@ -28,7 +28,6 @@ import org.apache.drill.categories.PlannerTest; import org.apache.drill.categories.SqlFunctionTest; import org.apache.drill.categories.UnlikelyTest; import org.apache.drill.common.types.TypeProtos; -import org.apache.drill.common.util.DrillFileUtils; import org.apache.drill.exec.ExecConstants; import org.apache.drill.test.BaseTestQuery; import org.junit.BeforeClass; http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java index 4003bbc..b692c9f 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java @@ -299,13 +299,23 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { @Test public void testSqrtFunction() throws Exception { String table_name = "nan_test.json"; - String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; - String query = String.format("select sqrt(nan_col) as nan_col, sqrt(inf_col) as inf_col from dfs.`%s`", table_name); - String[] columns = {"nan_col", "inf_col"}; - Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + String json = "{\"nan_col\":NaN, \"inf_col\":-Infinity, \"pos_inf_col\":Infinity}"; + String query = String.format("select sqrt(nan_col) as nan_col, sqrt(inf_col) as inf_col, sqrt(pos_inf_col) as pos_inf from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col", "pos_inf"}; + Object[] values = {Double.NaN, Double.NaN, Double.POSITIVE_INFINITY}; evalTest(table_name, json, query, columns, values); } + @Test + public void testSqrtNestedFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":-Infinity, \"pos_inf_col\":Infinity}"; + String query = String.format("select sqrt(sin(nan_col)) as nan_col, sqrt(sin(inf_col)) as inf_col, sqrt(sin(pos_inf_col)) as pos_inf from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col", "pos_inf"}; + Object[] values = {Double.NaN, Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + @Test public void testCeilFunction() throws Exception { String table_name = "nan_test.json"; @@ -390,7 +400,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { public void testTruncFunction() throws Exception { String table_name = "nan_test.json"; String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; - String query = String.format("select trunc(nan_col,3) as nan_col, trunc(inf_col,3) as inf_col from dfs.`%s`", table_name); + String query = String.format("select trunc(nan_col,3) as nan_col, trunc(inf_col) as inf_col from dfs.`%s`", table_name); String[] columns = {"nan_col", "inf_col"}; Object[] values = {Double.NaN, Double.NaN}; evalTest(table_name, json, query, columns, values); @@ -496,7 +506,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select min(nan_col) as nan_col, min(inf_col) as inf_col from dfs.`%s`", table_name); String[] columns = {"nan_col", "inf_col"}; - Object[] values = {Double.NaN, 5.0}; + Object[] values = {5.0, 5.0}; evalTest(table_name, json, query, columns, values); } http://git-wip-us.apache.org/repos/asf/drill/blob/bcd358b3/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java index 2d19c17..95848c4 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java @@ -274,4 +274,61 @@ public class TestJsonNanInf extends BaseTestQuery { .run(); } + @Test + public void testOrderByWithNaN() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}"; + String query = String.format("SELECT name, attr4 from dfs.`%s` order by name, attr4 ", table_name); + + File file = new File(dirTestWatcher.getRootDir(), table_name); + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineColumns("name", "attr4") + .baselineValues("obj1", Double.POSITIVE_INFINITY) + .baselineValues("obj1", Double.NaN) + .baselineValues("obj2", Double.NEGATIVE_INFINITY) + .baselineValues("obj2", Double.NaN) + .build() + .run(); + } finally { + test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testInnerJoinWithNaN() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}"; + String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s` " + + " tt on t.attr4 = tt.attr4 ", table_name, table_name); + + File file = new File(dirTestWatcher.getRootDir(), table_name); + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineColumns("name") + .baselineValues("obj1") + .baselineValues("obj2") + .build() + .run(); + } finally { + test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + FileUtils.deleteQuietly(file); + } + } + }