Github user dbtsai commented on a diff in the pull request:
https://github.com/apache/spark/pull/21416#discussion_r190470525
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---
@@ -397,6 +399,68 @@ class ColumnExpressionSuite extends QueryTest with
SharedSQLContext {
}
}
+ test("isinSet: Scala Set") {
+ val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+ checkAnswer(df.filter($"a".isinSet(Set(1, 2))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isinSet(Set(3, 2))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isinSet(Set(3, 1))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+ // Auto casting should work with mixture of different types in Set
+ checkAnswer(df.filter($"a".isinSet(Set(1.toShort, "2"))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isinSet(Set("3", 2.toLong))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".isinSet(Set(3, "1"))),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+ checkAnswer(df.filter($"b".isinSet(Set("y", "x"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "y" ||
r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".isinSet(Set("z", "x"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" ||
r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".isinSet(Set("z", "y"))),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" ||
r.getString(1) == "y"))
+
+ val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+
+ intercept[AnalysisException] {
+ df2.filter($"a".isinSet(Set($"b")))
+ }
--- End diff --
Addressed
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]