Github user dbtsai commented on a diff in the pull request:
https://github.com/apache/spark/pull/21416#discussion_r191317978
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---
@@ -392,9 +396,97 @@ class ColumnExpressionSuite extends QueryTest with
SharedSQLContext {
val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
- intercept[AnalysisException] {
+ val e = intercept[AnalysisException] {
df2.filter($"a".isin($"b"))
}
+ Seq("cannot resolve", "due to data type mismatch: Arguments must be
same type but were")
+ .foreach { s =>
+
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+ }
+ }
+
+ test("isInCollection: Scala Collection") {
+ val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+ checkAnswer(df.filter($"a".isInCollection(Seq(1, 2))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq(3, 2))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+ // Auto casting should work with mixture of different types in
collections
+ checkAnswer(df.filter($"a".isInCollection(Seq(1.toShort, "2"))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq("3", 2.toLong))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq(3, "1"))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+ checkAnswer(df.filter($"b".isInCollection(Seq("y", "x"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "y" ||
r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".isInCollection(Seq("z", "x"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" ||
r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".isInCollection(Seq("z", "y"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" ||
r.getString(1) == "y"))
+
+ // Test with different types of collections
+ checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+ val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+
+ val e = intercept[AnalysisException] {
+ df2.filter($"a".isInCollection(Seq($"b")))
+ }
+ Seq("cannot resolve", "due to data type mismatch: Arguments must be
same type but were")
+ .foreach { s =>
+
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+ }
+ }
+
+ test("isInCollection: Java Collection") {
+ val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
--- End diff --
Done.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]