[CARBONDATA-1703] Fix null handling in "IN" and "NOT IN" expressions
This closes #1650 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/082a3559 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/082a3559 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/082a3559 Branch: refs/heads/fgdatamap Commit: 082a3559aa4d36b1263234e290ea77e5eead8db2 Parents: 62ce5b5 Author: Geetika Gupta <[email protected]> Authored: Wed Dec 13 12:03:22 2017 +0530 Committer: Venkata Ramana G <[email protected]> Committed: Tue Jan 9 19:48:03 2018 +0530 ---------------------------------------------------------------------- .../ExpressionWithNullTestCase.scala | 21 +++++++++++++++----- .../spark/sql/optimizer/CarbonFilters.scala | 21 ++++++++++++++------ .../BooleanDataTypesFilterTest.scala | 4 ++-- 3 files changed, 33 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala index 2024018..a2d124d 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/ExpressionWithNullTestCase.scala @@ -48,9 +48,9 @@ class ExpressionWithNullTestCase extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("select * from expression_test where id in (1,2,'', NULL, ' ')"), sql("select * from expression_test_hive where id in (1,2,' ', NULL, ' ')")) checkAnswer(sql("select * from expression_test where id in (1,2,'')"), sql("select * from expression_test_hive where id in (1,2,'')")) checkAnswer(sql("select * from expression_test where id in ('')"), sql("select * from expression_test_hive where id in ('')")) -// checkAnswer(sql("select * from expression_test where number in (null)"), sql("select * from expression_test_hive where number in (null)")) + checkAnswer(sql("select * from expression_test where number in (NULL)"), sql("select * from expression_test_hive where number in (null)")) checkAnswer(sql("select * from expression_test where number in (2)"), sql("select * from expression_test_hive where number in (2)")) -// checkAnswer(sql("select * from expression_test where number in (1,null)"), sql("select * from expression_test_hive where number in (1,null)")) + checkAnswer(sql("select * from expression_test where number in (1,null)"), sql("select * from expression_test_hive where number in (1,null)")) checkAnswer(sql("select * from expression where number in (1,null)"), sql("select * from expression_hive where number in (1,null)")) checkAnswer(sql("select * from expression where id in (3)"), sql("select * from expression_hive where id in (3)")) checkAnswer(sql("select * from expression where id in ('2')"), sql("select * from expression_hive where id in ('2')")) @@ -59,14 +59,20 @@ class ExpressionWithNullTestCase extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("select * from expression_test where id in ('2')"), sql("select * from expression_test_hive where id in ('2')")) checkAnswer(sql("select * from expression_test where id in (cast('2' as int))"), sql("select * from expression_test_hive where id in (cast('2' as int))")) checkAnswer(sql("select * from expression_test where id in (cast('null' as int))"), sql("select * from expression_test_hive where id in (cast('null' as int))")) + checkAnswer(sql("select * from expression_test where id in (1,2,NULL)"), sql("select * from expression_test_hive where id in (1,2,NULL)")) + checkAnswer(sql("select * from expression_test where id in (NULL)"), sql("select * from expression_test_hive where id in (NULL)")) + checkAnswer(sql("select * from expression_test where number in (cast('null' as int))"), sql("select * from expression_test_hive where number in (cast('null' as int))")) + checkAnswer(sql("select * from expression_test where number in (1,2, cast('NULL' as int), cast('3' as int))"), sql("select * from expression_test_hive where number in (1,2, cast('NULL' as int),cast('3' as int))")) + checkAnswer(sql("select * from expression_test where cast(number as int) IN(1,null)"), sql("select * from expression_test_hive where cast(number as int) IN(1,null)")) + } test("test to check not in expression with null values") { checkAnswer(sql("select * from expression_test where id not in (1,2,'', NULL, ' ')"), sql("select * from expression_test_hive where id not in (1,2,' ', NULL, ' ')")) checkAnswer(sql("select * from expression_test where id not in (1,2,'')"), sql("select * from expression_test_hive where id not in (1,2,'')")) checkAnswer(sql("select * from expression_test where id not in ('')"), sql("select * from expression_test_hive where id not in ('')")) -// checkAnswer(sql("select * from expression_test where number not in (null)"), sql("select * from expression_test_hive where number not in (null)")) -// checkAnswer(sql("select * from expression_test where number not in (1,null)"), sql("select * from expression_test_hive where number not in (1,null)")) + checkAnswer(sql("select * from expression_test where number not in (null)"), sql("select * from expression_test_hive where number not in (null)")) + checkAnswer(sql("select * from expression_test where number not in (1,null)"), sql("select * from expression_test_hive where number not in (1,null)")) checkAnswer(sql("select * from expression where number not in (1,null)"), sql("select * from expression_hive where number not in (1,null)")) checkAnswer(sql("select * from expression where id not in (3)"), sql("select * from expression_hive where id not in (3)")) checkAnswer(sql("select * from expression where id not in ('2')"), sql("select * from expression_hive where id not in ('2')")) @@ -74,7 +80,12 @@ class ExpressionWithNullTestCase extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("select * from expression_test where id not in (3)"), sql("select * from expression_test_hive where id not in (3)")) checkAnswer(sql("select * from expression_test where id not in ('2')"), sql("select * from expression_test_hive where id not in ('2')")) checkAnswer(sql("select * from expression_test where id not in (cast('2' as int))"), sql("select * from expression_test_hive where id not in (cast('2' as int))")) -// checkAnswer(sql("select * from expression_test where id not in (cast('null' as int))"), sql("select * from expression_test_hive where id not in (cast('null' as int))")) + checkAnswer(sql("select * from expression_test where id not in (cast('null' as int))"), sql("select * from expression_test_hive where id not in (cast('null' as int))")) + checkAnswer(sql("select * from expression_test where id not in (1,2,NULL)"), sql("select * from expression_test_hive where id not in (1,2,NULL)")) + checkAnswer(sql("select * from expression_test where id not in (NULL)"), sql("select * from expression_test_hive where id not in (NULL)")) + checkAnswer(sql("select * from expression_test where number not in (2, null)"), sql("select * from expression_test_hive where number not in (2, null)")) + checkAnswer(sql("select * from expression_test where number not in (cast('2' as int), cast('null' as int))"), sql("select * from expression_test_hive where number not in (cast('2' as int), cast('null' as int))")) + } test("test to check equals expression with null values") { http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala index 5027a66..341b513 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala @@ -80,13 +80,22 @@ object CarbonFilters { Some(new LessThanEqualToExpression(getCarbonExpression(name), getCarbonLiteralExpression(name, value))) case sources.In(name, values) => - Some(new InExpression(getCarbonExpression(name), - new ListExpression( - convertToJavaList(values.map(f => getCarbonLiteralExpression(name, f)).toList)))) + if (values.length == 1 && values(0) == null) { + Some(new FalseExpression(getCarbonExpression(name))) + } else { + Some(new InExpression(getCarbonExpression(name), + new ListExpression( + convertToJavaList(values.filterNot(_ == null) + .map(filterValues => getCarbonLiteralExpression(name, filterValues)).toList)))) + } case sources.Not(sources.In(name, values)) => - Some(new NotInExpression(getCarbonExpression(name), - new ListExpression( - convertToJavaList(values.map(f => getCarbonLiteralExpression(name, f)).toList)))) + if (values.contains(null)) { + Some(new FalseExpression(getCarbonExpression(name))) + } else { + Some(new NotInExpression(getCarbonExpression(name), + new ListExpression( + convertToJavaList(values.map(f => getCarbonLiteralExpression(name, f)).toList)))) + } case sources.IsNull(name) => Some(new EqualToExpression(getCarbonExpression(name), getCarbonLiteralExpression(name, null), true)) http://git-wip-us.apache.org/repos/asf/carbondata/blob/082a3559/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala index 6149ac9..cdcf018 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesFilterTest.scala @@ -159,10 +159,10 @@ class BooleanDataTypesFilterTest extends QueryTest with BeforeAndAfterEach with Row(4)) checkAnswer(sql("select * from carbon_table where booleanField not in (null)"), - Seq(Row(true), Row(true), Row(true), Row(true), Row(false), Row(false), Row(false), Row(false))) + Seq.empty) checkAnswer(sql("select count(*) from carbon_table where booleanField not in (null)"), - Row(8)) + Row(0)) checkAnswer(sql("select count(*) from carbon_table where booleanField not in (true,false)"), Row(0))
