DRILL-6217: NaN/Inf NestedLoopJoin processes NaN values incorrectly - Changed loggic for equality functions to handle NaN values as the biggest ones
closes #1154 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d09efb93 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d09efb93 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d09efb93 Branch: refs/heads/master Commit: d09efb931f183d989516d1dd098ade546bbe3f16 Parents: f47af65 Author: Vladimir Tkach <vovatkac...@gmail.com> Authored: Tue Mar 6 11:42:02 2018 +0200 Committer: Ben-Zvi <bben-...@mapr.com> Committed: Fri Mar 9 18:51:15 2018 -0800 ---------------------------------------------------------------------- .../codegen/templates/ComparisonFunctions.java | 39 +++++++++ .../fn/impl/TestMathFunctionsWithNanInf.java | 6 +- .../vector/complex/writer/TestJsonNanInf.java | 88 ++++++++++++++++++-- 3 files changed, 123 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java index 12a4ef2..502418e 100644 --- a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java +++ b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java @@ -249,7 +249,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(left.value) || ( Double.isNaN(left.value) && Double.isNaN(right.value))) { + out.value=0; + } else if (Double.isNaN(right.value) && !Double.isNaN(left.value)) { + out.value = 1; + } else { out.value = left.value < right.value ? 1 : 0; + } <#elseif typeGroup.mode == "varString" || typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" > int cmp; @@ -280,7 +287,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(right.value)){ + out.value = 1; + } else if (!Double.isNaN(right.value) && Double.isNaN(left.value)) { + out.value = 0; + } else { out.value = left.value <= right.value ? 1 : 0; + } <#elseif typeGroup.mode == "varString" || typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" > int cmp; @@ -311,7 +325,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(right.value) || ( Double.isNaN(left.value) && Double.isNaN(right.value))) { + out.value = 0; + } else if (Double.isNaN(left.value) && !Double.isNaN(right.value)) { + out.value = 1; + } else { out.value = left.value > right.value ? 1 : 0; + } <#elseif typeGroup.mode == "varString" || typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" > int cmp; @@ -342,7 +363,15 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(left.value)){ + out.value=1; + } else if (!Double.isNaN(left.value) && Double.isNaN(right.value)) { + out.value = 0; + } else { out.value = left.value >= right.value ? 1 : 0; + } + <#elseif typeGroup.mode == "varString" || typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" > int cmp; @@ -373,7 +402,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(left.value) && Double.isNaN(right.value)) { + out.value = 1; + } else { out.value = left.value == right.value ? 1 : 0; + } <#elseif typeGroup.mode == "varString" > out.value = org.apache.drill.exec.expr.fn.impl.ByteFunctionHelpers.equal( left.buffer, left.start, left.end, right.buffer, right.start, right.end); @@ -406,7 +440,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} { public void eval() { <#if typeGroup.mode == "primitive"> + // NaN is the biggest possible value, and NaN == NaN + if (Double.isNaN(left.value) && Double.isNaN(right.value)) { + out.value = 0; + } else { out.value = left.value != right.value ? 1 : 0; + } <#elseif typeGroup.mode == "varString" || typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" > int cmp; http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java index b692c9f..8131195 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java @@ -54,7 +54,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; String query = String.format("select equal(nan_col, nan_col) as nan_col, equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name); String[] columns = {"nan_col", "inf_col"}; - Object[] values = {false, true}; + Object[] values = {true, true}; evalTest(table_name, json, query, columns, values); } @@ -65,7 +65,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; String query = String.format("select not_equal(nan_col, nan_col) as nan_col, not_equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name); String[] columns = {"nan_col", "inf_col"}; - Object[] values = {true, false}; + Object[] values = {false, false}; evalTest(table_name, json, query, columns, values); } @@ -85,7 +85,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery { String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; String query = String.format("select greater_than(nan_col, 5) as nan_col, greater_than(inf_col, 5) as inf_col from dfs.`%s`", table_name); String[] columns = {"nan_col", "inf_col"}; - Object[] values = {false, true}; + Object[] values = {true, true}; evalTest(table_name, json, query, columns, values); } http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java index 95848c4..3ff5ba2 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java @@ -18,6 +18,8 @@ package org.apache.drill.exec.vector.complex.writer; import org.apache.commons.io.FileUtils; +import org.apache.drill.exec.physical.impl.join.JoinTestBase; +import org.apache.drill.exec.planner.physical.PlannerSettings; import org.apache.drill.test.BaseTestQuery; import org.apache.drill.common.exceptions.UserRemoteException; import org.apache.drill.common.expression.SchemaPath; @@ -304,12 +306,20 @@ public class TestJsonNanInf extends BaseTestQuery { } @Test - public void testInnerJoinWithNaN() throws Exception { + public void testNestedLoopJoinWithNaN() throws Exception { String table_name = "nan_test.json"; - String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + - "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" + - "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" + - "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}"; + String json = "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n" + + "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n" + + "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n" + + "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n" + + "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}"; + JoinTestBase.enableJoin(false, false, true); String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s` " + " tt on t.attr4 = tt.attr4 ", table_name, table_name); @@ -321,12 +331,76 @@ public class TestJsonNanInf extends BaseTestQuery { .sqlQuery(query) .ordered() .baselineColumns("name") - .baselineValues("obj1") - .baselineValues("obj2") + .baselineValues("object1") + .baselineValues("object2") + .baselineValues("object3") + .baselineValues("object4") .build() .run(); } finally { test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + JoinTestBase.resetJoinOptions(); + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testHashJoinWithNaN() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}"; + JoinTestBase.enableJoin(true, false, false); + String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s` " + + " tt on t.attr4 = tt.attr4 ", table_name, table_name); + + File file = new File(dirTestWatcher.getRootDir(), table_name); + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineColumns("name") + .baselineValues("obj1") + .baselineValues("obj2") + .build() + .run(); + } finally { + test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + JoinTestBase.resetJoinOptions(); + FileUtils.deleteQuietly(file); + } + } + + + @Test + public void testMergeJoinWithNaN() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n" + + "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n" + + "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}"; + JoinTestBase.enableJoin(false, true, false); + String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s` " + + " tt on t.attr4 = tt.attr4 ", table_name, table_name); + + File file = new File(dirTestWatcher.getRootDir(), table_name); + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineColumns("name") + .baselineValues("obj1") + .baselineValues("obj2") + .build() + .run(); + } finally { + test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + JoinTestBase.resetJoinOptions(); FileUtils.deleteQuietly(file); } }