[CARBONDATA-1703] Fix null handling in "IN" and "NOT IN" expressions

This closes #1650


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/082a3559
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/082a3559
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/082a3559

Branch: refs/heads/fgdatamap
Commit: 082a3559aa4d36b1263234e290ea77e5eead8db2
Parents: 62ce5b5
Author: Geetika Gupta <[email protected]>
Authored: Wed Dec 13 12:03:22 2017 +0530
Committer: Venkata Ramana G <[email protected]>
Committed: Tue Jan 9 19:48:03 2018 +0530

----------------------------------------------------------------------
 .../ExpressionWithNullTestCase.scala            | 21 +++++++++++++++-----
 .../spark/sql/optimizer/CarbonFilters.scala     | 21 ++++++++++++++------
 .../BooleanDataTypesFilterTest.scala            |  4 ++--
 3 files changed, 33 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
index 2024018..a2d124d 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala
@@ -48,9 +48,9 @@ class ExpressionWithNullTestCase extends QueryTest with 
BeforeAndAfterAll {
     checkAnswer(sql("select * from expression_test where id in (1,2,'', NULL, 
' ')"), sql("select * from expression_test_hive where id in (1,2,' ', NULL, ' 
')"))
     checkAnswer(sql("select * from expression_test where id in (1,2,'')"), 
sql("select * from expression_test_hive where id in (1,2,'')"))
     checkAnswer(sql("select * from expression_test where id in ('')"), 
sql("select * from expression_test_hive where id in ('')"))
-//    checkAnswer(sql("select * from expression_test where number in (null)"), 
sql("select * from expression_test_hive where number in (null)"))
+    checkAnswer(sql("select * from expression_test where number in (NULL)"), 
sql("select * from expression_test_hive where number in (null)"))
     checkAnswer(sql("select * from expression_test where number in (2)"), 
sql("select * from expression_test_hive where number in (2)"))
-//    checkAnswer(sql("select * from expression_test where number in 
(1,null)"), sql("select * from expression_test_hive where number in (1,null)"))
+    checkAnswer(sql("select * from expression_test where number in (1,null)"), 
sql("select * from expression_test_hive where number in (1,null)"))
     checkAnswer(sql("select * from expression where number in (1,null)"), 
sql("select * from expression_hive where number in (1,null)"))
     checkAnswer(sql("select * from expression where id in (3)"), sql("select * 
from expression_hive where id in (3)"))
     checkAnswer(sql("select * from expression where id in ('2')"), sql("select 
* from expression_hive where id in ('2')"))
@@ -59,14 +59,20 @@ class ExpressionWithNullTestCase extends QueryTest with 
BeforeAndAfterAll {
     checkAnswer(sql("select * from expression_test where id in ('2')"), 
sql("select * from expression_test_hive where id in ('2')"))
     checkAnswer(sql("select * from expression_test where id in (cast('2' as 
int))"), sql("select * from expression_test_hive where id in (cast('2' as 
int))"))
     checkAnswer(sql("select * from expression_test where id in (cast('null' as 
int))"), sql("select * from expression_test_hive where id in (cast('null' as 
int))"))
+    checkAnswer(sql("select * from expression_test where id in (1,2,NULL)"), 
sql("select * from expression_test_hive where id in (1,2,NULL)"))
+    checkAnswer(sql("select * from expression_test where id in (NULL)"), 
sql("select * from expression_test_hive where id in (NULL)"))
+    checkAnswer(sql("select * from expression_test where number in 
(cast('null' as int))"), sql("select * from expression_test_hive where number 
in (cast('null' as int))"))
+    checkAnswer(sql("select * from expression_test where number in (1,2, 
cast('NULL' as int), cast('3' as int))"), sql("select * from 
expression_test_hive where number in (1,2, cast('NULL' as int),cast('3' as 
int))"))
+    checkAnswer(sql("select * from expression_test where cast(number as int) 
IN(1,null)"), sql("select * from expression_test_hive where cast(number as int) 
IN(1,null)"))
+
   }
 
   test("test to check not in expression with null values") {
     checkAnswer(sql("select * from expression_test where id not in (1,2,'', 
NULL, ' ')"), sql("select * from expression_test_hive where id not in (1,2,' ', 
NULL, ' ')"))
     checkAnswer(sql("select * from expression_test where id not in (1,2,'')"), 
sql("select * from expression_test_hive where id not in (1,2,'')"))
     checkAnswer(sql("select * from expression_test where id not in ('')"), 
sql("select * from expression_test_hive where id not in ('')"))
-//    checkAnswer(sql("select * from expression_test where number not in 
(null)"), sql("select * from expression_test_hive where number not in (null)"))
-//    checkAnswer(sql("select * from expression_test where number not in 
(1,null)"), sql("select * from expression_test_hive where number not in 
(1,null)"))
+    checkAnswer(sql("select * from expression_test where number not in 
(null)"), sql("select * from expression_test_hive where number not in (null)"))
+    checkAnswer(sql("select * from expression_test where number not in 
(1,null)"), sql("select * from expression_test_hive where number not in 
(1,null)"))
     checkAnswer(sql("select * from expression where number not in (1,null)"), 
sql("select * from expression_hive where number not in (1,null)"))
     checkAnswer(sql("select * from expression where id not in (3)"), 
sql("select * from expression_hive where id not in (3)"))
     checkAnswer(sql("select * from expression where id not in ('2')"), 
sql("select * from expression_hive where id not in ('2')"))
@@ -74,7 +80,12 @@ class ExpressionWithNullTestCase extends QueryTest with 
BeforeAndAfterAll {
     checkAnswer(sql("select * from expression_test where id not in (3)"), 
sql("select * from expression_test_hive where id not in (3)"))
     checkAnswer(sql("select * from expression_test where id not in ('2')"), 
sql("select * from expression_test_hive where id not in ('2')"))
     checkAnswer(sql("select * from expression_test where id not in (cast('2' 
as int))"), sql("select * from expression_test_hive where id not in (cast('2' 
as int))"))
-//    checkAnswer(sql("select * from expression_test where id not in 
(cast('null' as int))"), sql("select * from expression_test_hive where id not 
in (cast('null' as int))"))
+    checkAnswer(sql("select * from expression_test where id not in 
(cast('null' as int))"), sql("select * from expression_test_hive where id not 
in (cast('null' as int))"))
+    checkAnswer(sql("select * from expression_test where id not in 
(1,2,NULL)"), sql("select * from expression_test_hive where id not in 
(1,2,NULL)"))
+    checkAnswer(sql("select * from expression_test where id not in (NULL)"), 
sql("select * from expression_test_hive where id not in (NULL)"))
+    checkAnswer(sql("select * from expression_test where number not in (2, 
null)"), sql("select * from expression_test_hive where number not in (2, 
null)"))
+    checkAnswer(sql("select * from expression_test where number not in 
(cast('2' as int), cast('null' as int))"), sql("select * from 
expression_test_hive where number not in (cast('2' as int), cast('null' as 
int))"))
+
   }
 
   test("test to check equals expression with null values") {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
index 5027a66..341b513 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
@@ -80,13 +80,22 @@ object CarbonFilters {
           Some(new LessThanEqualToExpression(getCarbonExpression(name),
             getCarbonLiteralExpression(name, value)))
         case sources.In(name, values) =>
-          Some(new InExpression(getCarbonExpression(name),
-            new ListExpression(
-              convertToJavaList(values.map(f => 
getCarbonLiteralExpression(name, f)).toList))))
+          if (values.length == 1 && values(0) == null) {
+            Some(new FalseExpression(getCarbonExpression(name)))
+          } else {
+            Some(new InExpression(getCarbonExpression(name),
+              new ListExpression(
+                convertToJavaList(values.filterNot(_ == null)
+                  .map(filterValues => getCarbonLiteralExpression(name, 
filterValues)).toList))))
+          }
         case sources.Not(sources.In(name, values)) =>
-          Some(new NotInExpression(getCarbonExpression(name),
-            new ListExpression(
-              convertToJavaList(values.map(f => 
getCarbonLiteralExpression(name, f)).toList))))
+          if (values.contains(null)) {
+            Some(new FalseExpression(getCarbonExpression(name)))
+          } else {
+            Some(new NotInExpression(getCarbonExpression(name),
+              new ListExpression(
+                convertToJavaList(values.map(f => 
getCarbonLiteralExpression(name, f)).toList))))
+          }
         case sources.IsNull(name) =>
           Some(new EqualToExpression(getCarbonExpression(name),
             getCarbonLiteralExpression(name, null), true))

http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala
index 6149ac9..cdcf018 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala
@@ -159,10 +159,10 @@ class BooleanDataTypesFilterTest extends QueryTest with 
BeforeAndAfterEach with
       Row(4))
 
     checkAnswer(sql("select * from carbon_table where booleanField not in 
(null)"),
-      Seq(Row(true), Row(true), Row(true), Row(true), Row(false), Row(false), 
Row(false), Row(false)))
+      Seq.empty)
 
     checkAnswer(sql("select count(*) from carbon_table where booleanField not 
in (null)"),
-      Row(8))
+      Row(0))
 
     checkAnswer(sql("select count(*) from carbon_table where booleanField not 
in (true,false)"),
       Row(0))

Reply via email to