DRILL-6217: NaN/Inf NestedLoopJoin processes NaN values incorrectly

- Changed loggic for equality functions to handle NaN values as the biggest ones

closes #1154


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d09efb93
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d09efb93
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d09efb93

Branch: refs/heads/master
Commit: d09efb931f183d989516d1dd098ade546bbe3f16
Parents: f47af65
Author: Vladimir Tkach <vovatkac...@gmail.com>
Authored: Tue Mar 6 11:42:02 2018 +0200
Committer: Ben-Zvi <bben-...@mapr.com>
Committed: Fri Mar 9 18:51:15 2018 -0800

----------------------------------------------------------------------
 .../codegen/templates/ComparisonFunctions.java  | 39 +++++++++
 .../fn/impl/TestMathFunctionsWithNanInf.java    |  6 +-
 .../vector/complex/writer/TestJsonNanInf.java   | 88 ++++++++++++++++++--
 3 files changed, 123 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java 
b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
index 12a4ef2..502418e 100644
--- a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
+++ b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
@@ -249,7 +249,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(left.value) || ( Double.isNaN(left.value) && 
Double.isNaN(right.value))) {
+          out.value=0;
+        } else if (Double.isNaN(right.value) && !Double.isNaN(left.value)) {
+          out.value = 1;
+        } else {
           out.value = left.value < right.value ? 1 : 0;
+        }
         <#elseif typeGroup.mode == "varString"
             || typeGroup.mode == "intervalNameThis" || typeGroup.mode == 
"intervalDay" >
           int cmp;
@@ -280,7 +287,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(right.value)){
+          out.value = 1;
+        } else if (!Double.isNaN(right.value) && Double.isNaN(left.value)) {
+          out.value = 0;
+        } else {
           out.value = left.value <= right.value ? 1 : 0;
+        }
         <#elseif typeGroup.mode == "varString"
             || typeGroup.mode == "intervalNameThis" || typeGroup.mode == 
"intervalDay" >
           int cmp;
@@ -311,7 +325,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(right.value) || ( Double.isNaN(left.value) && 
Double.isNaN(right.value))) {
+          out.value = 0;
+        } else if (Double.isNaN(left.value) && !Double.isNaN(right.value)) {
+          out.value = 1;
+        } else {
           out.value = left.value > right.value ? 1 : 0;
+        }
         <#elseif typeGroup.mode == "varString"
             || typeGroup.mode == "intervalNameThis" || typeGroup.mode == 
"intervalDay" >
           int cmp;
@@ -342,7 +363,15 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(left.value)){
+          out.value=1;
+        } else if (!Double.isNaN(left.value) && Double.isNaN(right.value)) {
+          out.value = 0;
+        } else {
           out.value = left.value >= right.value ? 1 : 0;
+        }
+
         <#elseif typeGroup.mode == "varString"
             || typeGroup.mode == "intervalNameThis" || typeGroup.mode == 
"intervalDay" >
           int cmp;
@@ -373,7 +402,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(left.value) && Double.isNaN(right.value)) {
+          out.value = 1;
+        } else {
           out.value = left.value == right.value ? 1 : 0;
+        }
         <#elseif typeGroup.mode == "varString" >
           out.value = 
org.apache.drill.exec.expr.fn.impl.ByteFunctionHelpers.equal(
               left.buffer, left.start, left.end, right.buffer, right.start, 
right.end);
@@ -406,7 +440,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
       public void eval() {
 
         <#if typeGroup.mode == "primitive">
+        // NaN is the biggest possible value, and NaN == NaN
+        if (Double.isNaN(left.value) && Double.isNaN(right.value)) {
+          out.value = 0;
+        } else {
           out.value = left.value != right.value ? 1 : 0;
+        }
         <#elseif typeGroup.mode == "varString"
             || typeGroup.mode == "intervalNameThis" || typeGroup.mode == 
"intervalDay" >
           int cmp;

http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
index b692c9f..8131195 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
@@ -54,7 +54,7 @@ public class TestMathFunctionsWithNanInf extends 
BaseTestQuery {
       String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
       String query = String.format("select equal(nan_col, nan_col) as nan_col, 
equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name);
       String[] columns = {"nan_col", "inf_col"};
-      Object[] values = {false, true};
+      Object[] values = {true, true};
       evalTest(table_name, json, query, columns, values);
 
     }
@@ -65,7 +65,7 @@ public class TestMathFunctionsWithNanInf extends 
BaseTestQuery {
       String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
       String query = String.format("select not_equal(nan_col, nan_col) as 
nan_col, not_equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name);
       String[] columns = {"nan_col", "inf_col"};
-      Object[] values = {true, false};
+      Object[] values = {false, false};
       evalTest(table_name, json, query, columns, values);
     }
 
@@ -85,7 +85,7 @@ public class TestMathFunctionsWithNanInf extends 
BaseTestQuery {
       String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
       String query = String.format("select greater_than(nan_col, 5) as 
nan_col, greater_than(inf_col, 5) as inf_col from dfs.`%s`", table_name);
       String[] columns = {"nan_col", "inf_col"};
-      Object[] values = {false, true};
+      Object[] values = {true, true};
       evalTest(table_name, json, query, columns, values);
     }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
index 95848c4..3ff5ba2 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
@@ -18,6 +18,8 @@
 package org.apache.drill.exec.vector.complex.writer;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.drill.exec.physical.impl.join.JoinTestBase;
+import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.test.BaseTestQuery;
 import org.apache.drill.common.exceptions.UserRemoteException;
 import org.apache.drill.common.expression.SchemaPath;
@@ -304,12 +306,20 @@ public class TestJsonNanInf extends BaseTestQuery {
   }
 
   @Test
-  public void testInnerJoinWithNaN() throws Exception {
+  public void testNestedLoopJoinWithNaN() throws Exception {
     String table_name = "nan_test.json";
-    String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
-        "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, 
\"attr4\":Infinity}\n" +
-        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, 
\"attr4\":-Infinity}\n" +
-        "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}";
+    String json = "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, 
\"attr3\":3, \"attr4\":NaN}\n" +
+            "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":Infinity}";
+    JoinTestBase.enableJoin(false, false, true);
     String query = String.format("select distinct t.name from dfs.`%s` t inner 
join dfs.`%s` " +
         " tt on t.attr4 = tt.attr4 ", table_name, table_name);
 
@@ -321,12 +331,76 @@ public class TestJsonNanInf extends BaseTestQuery {
           .sqlQuery(query)
           .ordered()
           .baselineColumns("name")
-          .baselineValues("obj1")
-          .baselineValues("obj2")
+          .baselineValues("object1")
+          .baselineValues("object2")
+          .baselineValues("object3")
+          .baselineValues("object4")
           .build()
           .run();
     } finally {
       test("alter session set `%s` = false", 
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      JoinTestBase.resetJoinOptions();
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
+  @Test
+  public void testHashJoinWithNaN() throws Exception {
+    String table_name = "nan_test.json";
+    String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, 
\"attr4\":-Infinity}\n" +
+            "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}";
+    JoinTestBase.enableJoin(true, false, false);
+    String query = String.format("select distinct t.name from dfs.`%s` t inner 
join dfs.`%s` " +
+            " tt on t.attr4 = tt.attr4 ", table_name, table_name);
+
+    File file = new File(dirTestWatcher.getRootDir(), table_name);
+    try {
+      FileUtils.writeStringToFile(file, json);
+      test("alter session set `%s` = true", 
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      testBuilder()
+              .sqlQuery(query)
+              .ordered()
+              .baselineColumns("name")
+              .baselineValues("obj1")
+              .baselineValues("obj2")
+              .build()
+              .run();
+    } finally {
+      test("alter session set `%s` = false", 
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      JoinTestBase.resetJoinOptions();
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
+
+  @Test
+  public void testMergeJoinWithNaN() throws Exception {
+    String table_name = "nan_test.json";
+    String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}\n" +
+            "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, 
\"attr4\":Infinity}\n" +
+            "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, 
\"attr4\":-Infinity}\n" +
+            "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, 
\"attr4\":NaN}";
+    JoinTestBase.enableJoin(false, true, false);
+    String query = String.format("select distinct t.name from dfs.`%s` t inner 
join dfs.`%s` " +
+            " tt on t.attr4 = tt.attr4 ", table_name, table_name);
+
+    File file = new File(dirTestWatcher.getRootDir(), table_name);
+    try {
+      FileUtils.writeStringToFile(file, json);
+      test("alter session set `%s` = true", 
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      testBuilder()
+              .sqlQuery(query)
+              .ordered()
+              .baselineColumns("name")
+              .baselineValues("obj1")
+              .baselineValues("obj2")
+              .build()
+              .run();
+    } finally {
+      test("alter session set `%s` = false", 
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+      JoinTestBase.resetJoinOptions();
       FileUtils.deleteQuietly(file);
     }
   }

Reply via email to