Repository: spark Updated Branches: refs/heads/branch-2.0 f8ce884e6 -> 0caab3ef2
[SPARK-18436][SQL] isin causing SQL syntax error with JDBC ## What changes were proposed in this pull request? The expression `in(empty seq)` is invalid in some data source. Since `in(empty seq)` is always false, we should generate `in(empty seq)` to false literal in optimizer. The sql `SELECT * FROM t WHERE a IN ()` throws a `ParseException` which is consistent with Hive, don't need to change that behavior. ## How was this patch tested? Add new test case in `OptimizeInSuite`. Author: jiangxingbo <[email protected]> Closes #15977 from jiangxb1987/isin-empty. (cherry picked from commit e2fb9fd365466da888ab8b3a2a0836049a65f8c8) Signed-off-by: Herman van Hovell <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0caab3ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0caab3ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0caab3ef Branch: refs/heads/branch-2.0 Commit: 0caab3ef2c532432297e38345d50b07bb5776d77 Parents: f8ce884 Author: jiangxingbo <[email protected]> Authored: Fri Nov 25 12:44:34 2016 -0800 Committer: Herman van Hovell <[email protected]> Committed: Fri Nov 25 12:44:59 2016 -0800 ---------------------------------------------------------------------- .../catalyst/expressions/PredicateSuite.scala | 24 +++++++++++--------- .../execution/datasources/jdbc/JDBCRDD.scala | 2 ++ .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 ++ 3 files changed, 17 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0caab3ef/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index c226b01..7e0191f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -35,7 +35,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { test(s"3VL $name") { truthTable.foreach { case (l, r, answer) => - val expr = op(Literal.create(l, BooleanType), Literal.create(r, BooleanType)) + val expr = op(NonFoldableLiteral(l, BooleanType), NonFoldableLiteral(r, BooleanType)) checkEvaluation(expr, answer) } } @@ -72,7 +72,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (false, true) :: (null, null) :: Nil notTrueTable.foreach { case (v, answer) => - checkEvaluation(Not(Literal.create(v, BooleanType)), answer) + checkEvaluation(Not(NonFoldableLiteral(v, BooleanType)), answer) } checkConsistencyBetweenInterpretedAndCodegen(Not, BooleanType) } @@ -120,12 +120,14 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (null, null, null) :: Nil) test("IN") { - checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal(1), Literal(2))), null) - checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal.create(null, IntegerType))), - null) - checkEvaluation(In(Literal(1), Seq(Literal.create(null, IntegerType))), null) - checkEvaluation(In(Literal(1), Seq(Literal(1), Literal.create(null, IntegerType))), true) - checkEvaluation(In(Literal(2), Seq(Literal(1), Literal.create(null, IntegerType))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq(Literal(1), Literal(2))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), + Seq(NonFoldableLiteral(null, IntegerType))), null) + checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq.empty), null) + checkEvaluation(In(Literal(1), Seq.empty), false) + checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral(null, IntegerType))), null) + checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), true) + checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), null) checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true) checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true) checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false) @@ -133,7 +135,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1), Literal(2)))), true) - val ns = Literal.create(null, StringType) + val ns = NonFoldableLiteral(null, StringType) checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null) checkEvaluation(In(ns, Seq(ns)), null) checkEvaluation(In(Literal("a"), Seq(ns)), null) @@ -153,7 +155,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { case _ => value } } - val input = inputData.map(Literal.create(_, t)) + val input = inputData.map(NonFoldableLiteral(_, t)) val expected = if (inputData(0) == null) { null } else if (inputData.slice(1, 10).contains(inputData(0))) { @@ -277,7 +279,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { test("BinaryComparison: null test") { // Use -1 (default value for codegen) which can trigger some weird bugs, e.g. SPARK-14757 val normalInt = Literal(-1) - val nullInt = Literal.create(null, IntegerType) + val nullInt = NonFoldableLiteral(null, IntegerType) def nullTest(op: (Expression, Expression) => Expression): Unit = { checkEvaluation(op(normalInt, nullInt), null) http://git-wip-us.apache.org/repos/asf/spark/blob/0caab3ef/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 82bc9d7..95058cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -216,6 +216,8 @@ object JDBCRDD extends Logging { case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" + case In(attr, value) if value.isEmpty => + s"CASE WHEN ${attr} IS NULL THEN NULL ELSE FALSE END" case In(attr, value) => s"$attr IN (${compileValue(value)})" case Not(f) => compileFilter(f).map(p => s"(NOT ($p))").getOrElse(null) case Or(f1, f2) => http://git-wip-us.apache.org/repos/asf/spark/blob/0caab3ef/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 1a6dba8..50a8dfb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -620,6 +620,8 @@ class JDBCSuite extends SparkFunSuite assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3") assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3") assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')") + assert(doCompileFilter(In("col1", Array.empty)) === + "CASE WHEN col1 IS NULL THEN NULL ELSE FALSE END") assert(doCompileFilter(Not(In("col1", Array("mno", "pqr")))) === "(NOT (col1 IN ('mno', 'pqr')))") assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
