Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21416#discussion_r191317978
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---
    @@ -392,9 +396,97 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
     
         val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
     
    -    intercept[AnalysisException] {
    +    val e = intercept[AnalysisException] {
           df2.filter($"a".isin($"b"))
         }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be 
same type but were")
    +      .foreach { s =>
    +        
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Scala Collection") {
    +    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    // Auto casting should work with mixture of different types in 
collections
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1.toShort, "2"))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq("3", 2.toLong))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, "1"))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    checkAnswer(df.filter($"b".isInCollection(Seq("y", "x"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "y" || 
r.getString(1) == "x"))
    +    checkAnswer(df.filter($"b".isInCollection(Seq("z", "x"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "z" || 
r.getString(1) == "x"))
    +    checkAnswer(df.filter($"b".isInCollection(Seq("z", "y"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "z" || 
r.getString(1) == "y"))
    +
    +    // Test with different types of collections
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
    +
    +    val e = intercept[AnalysisException] {
    +      df2.filter($"a".isInCollection(Seq($"b")))
    +    }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be 
same type but were")
    +      .foreach { s =>
    +        
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Java Collection") {
    +    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
    --- End diff --
    
    Done.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to