Github user holdenk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21416#discussion_r191488193
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---
    @@ -390,11 +394,67 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
         checkAnswer(df.filter($"b".isin("z", "y")),
           df.collect().toSeq.filter(r => r.getString(1) == "z" || 
r.getString(1) == "y"))
     
    +    // Auto casting should work with mixture of different types in 
collections
    +    checkAnswer(df.filter($"a".isin(1.toShort, "2")),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isin("3", 2.toLong)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isin(3, "1")),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
         val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
     
    -    intercept[AnalysisException] {
    +    val e = intercept[AnalysisException] {
           df2.filter($"a".isin($"b"))
         }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be 
same type but were")
    +      .foreach { s =>
    +        
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Scala Collection") {
    +    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
    +    // Test with different types of collections
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
    +
    +    val e = intercept[AnalysisException] {
    +      df2.filter($"a".isInCollection(Seq($"b")))
    +    }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be 
same type but were")
    +      .foreach { s =>
    +        
assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Java Collection") {
    --- End diff --
    
    As stated up above, maybe this would make sense to do in Java, but your 
call.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to