This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9164865  [SPARK-31553][SQL][TESTS][FOLLOWUP] Tests for collection elem 
types of `isInCollection`
9164865 is described below

commit 91648654da259c63178f3fb3f94e3e62e1ef1e45
Author: Max Gekk <[email protected]>
AuthorDate: Thu Apr 30 03:20:10 2020 +0000

    [SPARK-31553][SQL][TESTS][FOLLOWUP] Tests for collection elem types of 
`isInCollection`
    
    ### What changes were proposed in this pull request?
    - Add tests for different element types of collections that could be passed 
to `isInCollection`. Added tests for types that can pass the check 
`In`.`checkInputDataTypes()`.
    - Test different switch thresholds in the `isInCollection: Scala 
Collection` test.
    
    ### Why are the changes needed?
    To prevent regressions like introduced by 
https://github.com/apache/spark/pull/25754 and reverted by 
https://github.com/apache/spark/pull/28388
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    By existing and new tests in `ColumnExpressionSuite`
    
    Closes #28405 from MaxGekk/test-isInCollection.
    
    Authored-by: Max Gekk <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/ColumnExpressionSuite.scala   | 93 +++++++++++++++++-----
 1 file changed, 75 insertions(+), 18 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 8d3b562..4bf19532 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.sql.Date
+import java.sql.{Date, Timestamp}
 import java.util.Locale
 
 import scala.collection.JavaConverters._
@@ -454,26 +454,83 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSparkSession {
   }
 
   test("isInCollection: Scala Collection") {
-    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
-    // Test with different types of collections
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
-    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+    Seq(0, 1, 10).foreach { optThreshold =>
+      Seq(0, 1, 10).foreach { switchThreshold =>
+        withSQLConf(
+          SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> 
optThreshold.toString,
+          SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> 
switchThreshold.toString) {
+          val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+          // Test with different types of collections
+          checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
+            df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 
1))
+          checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
+            df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 
2))
+          checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
+            df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 
2))
+          checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
+            df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 
1))
+
+          val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
 
-    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
-
-    val e = intercept[AnalysisException] {
-      df2.filter($"a".isInCollection(Seq($"b")))
+          val e = intercept[AnalysisException] {
+            df2.filter($"a".isInCollection(Seq($"b")))
+          }
+          Seq("cannot resolve", "due to data type mismatch: Arguments must be 
same type but were")
+            .foreach { s =>
+              
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+            }
+        }
+      }
     }
-    Seq("cannot resolve", "due to data type mismatch: Arguments must be same 
type but were")
-      .foreach { s =>
-        
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+  }
+
+  test("SPARK-31553: isInCollection - collection element types") {
+    val expected = Seq(Row(true), Row(false))
+    Seq(0, 1, 10).foreach { optThreshold =>
+      Seq(0, 1, 10).foreach { switchThreshold =>
+        withSQLConf(
+          SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> 
optThreshold.toString,
+          SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> 
switchThreshold.toString) {
+          checkAnswer(Seq(0).toDS.select($"value".isInCollection(Seq(null))), 
Seq(Row(null)))
+          checkAnswer(
+            Seq(true).toDS.select($"value".isInCollection(Seq(true, false))),
+            Seq(Row(true)))
+          checkAnswer(
+            Seq(0.toByte, 
1.toByte).toDS.select($"value".isInCollection(Seq(0.toByte, 2.toByte))),
+            expected)
+          checkAnswer(
+            Seq(0.toShort, 1.toShort).toDS
+              .select($"value".isInCollection(Seq(0.toShort, 2.toShort))),
+            expected)
+          checkAnswer(Seq(0, 1).toDS.select($"value".isInCollection(Seq(0, 
2))), expected)
+          checkAnswer(Seq(0L, 1L).toDS.select($"value".isInCollection(Seq(0L, 
2L))), expected)
+          checkAnswer(Seq(0.0f, 1.0f).toDS
+            .select($"value".isInCollection(Seq(0.0f, 2.0f))), expected)
+          checkAnswer(Seq(0.0D, 1.0D).toDS
+            .select($"value".isInCollection(Seq(0.0D, 2.0D))), expected)
+          checkAnswer(
+            Seq(BigDecimal(0), BigDecimal(2)).toDS
+              .select($"value".isInCollection(Seq(BigDecimal(0), 
BigDecimal(1)))),
+            expected)
+          checkAnswer(
+            Seq("abc", "def").toDS.select($"value".isInCollection(Seq("abc", 
"xyz"))),
+            expected)
+          checkAnswer(
+            Seq(Date.valueOf("2020-04-29"), Date.valueOf("2020-05-01")).toDS
+              .select($"value".isInCollection(
+                Seq(Date.valueOf("2020-04-29"), Date.valueOf("2020-04-30")))),
+            expected)
+          checkAnswer(
+            Seq(new Timestamp(0), new Timestamp(2)).toDS
+              .select($"value".isInCollection(Seq(new Timestamp(0), new 
Timestamp(1)))),
+            expected)
+          checkAnswer(
+            Seq(Array("a", "b"), Array("c", "d")).toDS
+              .select($"value".isInCollection(Seq(Array("a", "b"), Array("x", 
"z")))),
+            expected)
+        }
       }
+    }
   }
 
   test("&&") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to