This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4de4ed10ebd5 [SPARK-48935][SQL][TESTS] Make `checkEvaluation` directly 
check the `Collation` expression itself in UT
4de4ed10ebd5 is described below

commit 4de4ed10ebd5ddc949d4d29fe77f1834e1447794
Author: panbingkun <[email protected]>
AuthorDate: Wed Jul 24 14:44:11 2024 +0800

    [SPARK-48935][SQL][TESTS] Make `checkEvaluation` directly check the 
`Collation` expression itself in UT
    
    ### What changes were proposed in this pull request?
    The pr aims to:
    - make `checkEvaluation` directly check the `Collation` expression itself 
in UT, rather than `Collation(...).replacement`.
    - fix an `miss` check in UT.
    
    ### Why are the changes needed?
    When checking the `RuntimeReplaceable` expression in UT, there is no need 
to write as `checkEvaluation(Collation(Literal("abc")).replacement, 
"UTF8_BINARY")`, because it has already undergone a similar replacement 
internally.
    
https://github.com/apache/spark/blob/1a428c1606645057ef94ac8a6cadbb947b9208a6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala#L75
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    - Update existed UT.
    - Pass GA.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #47401 from panbingkun/SPARK-48935.
    
    Authored-by: panbingkun <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../expressions/CollationExpressionSuite.scala     |  12 +-
 .../CollationRegexpExpressionsSuite.scala          | 121 +++++++++++++--------
 2 files changed, 84 insertions(+), 49 deletions(-)

diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
index a4651c6c4c7e..175dd05d5911 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
@@ -28,14 +28,14 @@ class CollationExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     assert(collationId == 0)
     val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
     assert(collateExpr.dataType === StringType(collationId))
-    collateExpr.dataType.asInstanceOf[StringType].collationId == 0
+    assert(collateExpr.dataType.asInstanceOf[StringType].collationId == 0)
     checkEvaluation(collateExpr, "abc")
   }
 
   test("collate against literal") {
     val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
     val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
-    assert(collateExpr.dataType == StringType(collationId))
+    assert(collateExpr.dataType === StringType(collationId))
     checkEvaluation(collateExpr, "abc")
   }
 
@@ -67,16 +67,16 @@ class CollationExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
   }
 
   test("collation on non-explicit default collation") {
-    checkEvaluation(Collation(Literal("abc")).replacement, "UTF8_BINARY")
+    checkEvaluation(Collation(Literal("abc")), "UTF8_BINARY")
   }
 
   test("collation on explicitly collated string") {
     checkEvaluation(
       Collation(Literal.create("abc",
-        StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))).replacement,
+        StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))),
       "UTF8_LCASE")
     checkEvaluation(
-      Collation(Collate(Literal("abc"), "UTF8_LCASE")).replacement,
+      Collation(Collate(Literal("abc"), "UTF8_LCASE")),
       "UTF8_LCASE")
   }
 
@@ -212,7 +212,7 @@ class CollationExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       ("sR_cYRl_sRb", "sr_Cyrl_SRB")
     ).foreach {
       case (collation, normalized) =>
-        checkEvaluation(Collation(Literal.create("abc", 
StringType(collation))).replacement,
+        checkEvaluation(Collation(Literal.create("abc", 
StringType(collation))),
           normalized)
     }
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
index 6f0d0c13b32a..2c1244eec365 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class CollationRegexpExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 
@@ -47,7 +48,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite 
with ExpressionEvalH
       // ILike
       checkEvaluation(ILike(
         Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
-        Literal.create(t.regexLike, StringType), '\\').replacement, 
t.expectedILike)
+        Literal.create(t.regexLike, StringType), '\\'), t.expectedILike)
       // RLike
       checkEvaluation(RLike(
         Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
@@ -106,7 +107,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite 
with ExpressionEvalH
       // RegExpCount
       checkEvaluation(RegExpCount(
         Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
-        Literal.create(t.r, StringType)).replacement, t.expectedCount)
+        Literal.create(t.r, StringType)), t.expectedCount)
       // RegExpInStr
       def expectedInStr(count: Any): Any = count match {
         case null => null
@@ -120,50 +121,84 @@ class CollationRegexpExpressionsSuite extends 
SparkFunSuite with ExpressionEvalH
   }
 
   test("MultiLikeBase regexp expressions with collated strings") {
-    val nullStr = Literal.create(null, StringType)
-    // Supported collations (StringTypeBinaryLcase)
-    val binaryCollation = 
StringType(CollationFactory.collationNameToId("UTF8_BINARY"))
-    val lowercaseCollation = 
StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
     // LikeAll
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", 
"%oo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", 
"%bar%"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", 
"%oo"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", 
"%bar%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", 
"%oo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", 
"%bar%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", 
nullStr), null)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%feo%", 
nullStr), false)
-    checkEvaluation(Literal.create(null, binaryCollation).likeAll("%foo%", 
"%oo"), null)
+    case class LikeAllTestCase[R](l: String, p1: String, p2: String, 
collation: String,
+      expectedLikeAll: R)
+    val likeAllTestCases = Seq(
+      LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+      LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+      LikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", true),
+      LikeAllTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", false),
+      LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+      LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+      LikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+      LikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", false),
+      LikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    likeAllTestCases.foreach(t => {
+      checkEvaluation(LikeAll(
+        Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
+          Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), 
t.expectedLikeAll)
+    })
+
     // NotLikeAll
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", 
"%oo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", 
"%bar%"), true)
-    checkEvaluation(Literal.create("Foo", 
lowercaseCollation).notLikeAll("%foo%", "%oo"), false)
-    checkEvaluation(Literal.create("Foo", 
lowercaseCollation).notLikeAll("%goo%", "%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", 
"%oo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", 
"%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", 
nullStr), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%feo%", 
nullStr), null)
-    checkEvaluation(Literal.create(null, binaryCollation).notLikeAll("%foo%", 
"%oo"), null)
+    case class NotLikeAllTestCase[R](l: String, p1: String, p2: String, 
collation: String,
+      expectedNotLikeAll: R)
+    val notLikeAllTestCases = Seq(
+      NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+      NotLikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", false),
+      NotLikeAllTestCase("Foo", "%goo%", "%bar%", "UTF8_LCASE", true),
+      NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+      NotLikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+      NotLikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    notLikeAllTestCases.foreach(t => {
+      checkEvaluation(NotLikeAll(
+        Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), 
t.expectedNotLikeAll)
+    })
+
     // LikeAny
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", 
"%hoo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", 
"%bar%"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%goo%", 
"%hoo"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%foo%", 
"%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", 
"%hoo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", 
"%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", 
nullStr), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%feo%", 
nullStr), null)
-    checkEvaluation(Literal.create(null, binaryCollation).likeAny("%foo%", 
"%oo"), null)
+    case class LikeAnyTestCase[R](l: String, p1: String, p2: String, 
collation: String,
+      expectedLikeAny: R)
+    val likeAnyTestCases = Seq(
+      LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+      LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+      LikeAnyTestCase("Foo", "%goo%", "%hoo", "UTF8_LCASE", false),
+      LikeAnyTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", true),
+      LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+      LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+      LikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", true),
+      LikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+      LikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    likeAnyTestCases.foreach(t => {
+      checkEvaluation(LikeAny(
+        Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), 
t.expectedLikeAny)
+    })
+
     // NotLikeAny
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", 
"%hoo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", 
"%oo%"), false)
-    checkEvaluation(Literal.create("Foo", 
lowercaseCollation).notLikeAny("%Foo%", "%hoo"), true)
-    checkEvaluation(Literal.create("Foo", 
lowercaseCollation).notLikeAny("%foo%", "%oo%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%Foo%", 
"%hoo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", 
"%oo%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", 
nullStr), null)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%feo%", 
nullStr), true)
-    checkEvaluation(Literal.create(null, binaryCollation).notLikeAny("%foo%", 
"%oo"), null)
+    case class NotLikeAnyTestCase[R](l: String, p1: String, p2: String, 
collation: String,
+      expectedNotLikeAny: R)
+    val notLikeAnyTestCases = Seq(
+      NotLikeAnyTestCase("foo", "%foo%", "%hoo", "UTF8_BINARY", true),
+      NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+      NotLikeAnyTestCase("Foo", "%Foo%", "%hoo", "UTF8_LCASE", true),
+      NotLikeAnyTestCase("Foo", "%foo%", "%oo%", "UTF8_LCASE", false),
+      NotLikeAnyTestCase("foo", "%Foo%", "%hoo", "UTF8_BINARY", true),
+      NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+      NotLikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+      NotLikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", true),
+      NotLikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    notLikeAnyTestCases.foreach(t => {
+      checkEvaluation(NotLikeAny(
+        Literal.create(t.l, 
StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), 
t.expectedNotLikeAny)
+    })
   }
-
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to