This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4de4ed10ebd5 [SPARK-48935][SQL][TESTS] Make `checkEvaluation` directly
check the `Collation` expression itself in UT
4de4ed10ebd5 is described below
commit 4de4ed10ebd5ddc949d4d29fe77f1834e1447794
Author: panbingkun <[email protected]>
AuthorDate: Wed Jul 24 14:44:11 2024 +0800
[SPARK-48935][SQL][TESTS] Make `checkEvaluation` directly check the
`Collation` expression itself in UT
### What changes were proposed in this pull request?
The pr aims to:
- make `checkEvaluation` directly check the `Collation` expression itself
in UT, rather than `Collation(...).replacement`.
- fix an `miss` check in UT.
### Why are the changes needed?
When checking the `RuntimeReplaceable` expression in UT, there is no need
to write as `checkEvaluation(Collation(Literal("abc")).replacement,
"UTF8_BINARY")`, because it has already undergone a similar replacement
internally.
https://github.com/apache/spark/blob/1a428c1606645057ef94ac8a6cadbb947b9208a6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala#L75
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Update existed UT.
- Pass GA.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47401 from panbingkun/SPARK-48935.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../expressions/CollationExpressionSuite.scala | 12 +-
.../CollationRegexpExpressionsSuite.scala | 121 +++++++++++++--------
2 files changed, 84 insertions(+), 49 deletions(-)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
index a4651c6c4c7e..175dd05d5911 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
@@ -28,14 +28,14 @@ class CollationExpressionSuite extends SparkFunSuite with
ExpressionEvalHelper {
assert(collationId == 0)
val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
assert(collateExpr.dataType === StringType(collationId))
- collateExpr.dataType.asInstanceOf[StringType].collationId == 0
+ assert(collateExpr.dataType.asInstanceOf[StringType].collationId == 0)
checkEvaluation(collateExpr, "abc")
}
test("collate against literal") {
val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
- assert(collateExpr.dataType == StringType(collationId))
+ assert(collateExpr.dataType === StringType(collationId))
checkEvaluation(collateExpr, "abc")
}
@@ -67,16 +67,16 @@ class CollationExpressionSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
test("collation on non-explicit default collation") {
- checkEvaluation(Collation(Literal("abc")).replacement, "UTF8_BINARY")
+ checkEvaluation(Collation(Literal("abc")), "UTF8_BINARY")
}
test("collation on explicitly collated string") {
checkEvaluation(
Collation(Literal.create("abc",
- StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))).replacement,
+ StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))),
"UTF8_LCASE")
checkEvaluation(
- Collation(Collate(Literal("abc"), "UTF8_LCASE")).replacement,
+ Collation(Collate(Literal("abc"), "UTF8_LCASE")),
"UTF8_LCASE")
}
@@ -212,7 +212,7 @@ class CollationExpressionSuite extends SparkFunSuite with
ExpressionEvalHelper {
("sR_cYRl_sRb", "sr_Cyrl_SRB")
).foreach {
case (collation, normalized) =>
- checkEvaluation(Collation(Literal.create("abc",
StringType(collation))).replacement,
+ checkEvaluation(Collation(Literal.create("abc",
StringType(collation))),
normalized)
}
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
index 6f0d0c13b32a..2c1244eec365 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
class CollationRegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
@@ -47,7 +48,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite
with ExpressionEvalH
// ILike
checkEvaluation(ILike(
Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
- Literal.create(t.regexLike, StringType), '\\').replacement,
t.expectedILike)
+ Literal.create(t.regexLike, StringType), '\\'), t.expectedILike)
// RLike
checkEvaluation(RLike(
Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
@@ -106,7 +107,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite
with ExpressionEvalH
// RegExpCount
checkEvaluation(RegExpCount(
Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
- Literal.create(t.r, StringType)).replacement, t.expectedCount)
+ Literal.create(t.r, StringType)), t.expectedCount)
// RegExpInStr
def expectedInStr(count: Any): Any = count match {
case null => null
@@ -120,50 +121,84 @@ class CollationRegexpExpressionsSuite extends
SparkFunSuite with ExpressionEvalH
}
test("MultiLikeBase regexp expressions with collated strings") {
- val nullStr = Literal.create(null, StringType)
- // Supported collations (StringTypeBinaryLcase)
- val binaryCollation =
StringType(CollationFactory.collationNameToId("UTF8_BINARY"))
- val lowercaseCollation =
StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
// LikeAll
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%",
"%oo"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%",
"%bar%"), false)
- checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%",
"%oo"), true)
- checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%",
"%bar%"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%",
"%oo"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%",
"%bar%"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%",
nullStr), null)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%feo%",
nullStr), false)
- checkEvaluation(Literal.create(null, binaryCollation).likeAll("%foo%",
"%oo"), null)
+ case class LikeAllTestCase[R](l: String, p1: String, p2: String,
collation: String,
+ expectedLikeAll: R)
+ val likeAllTestCases = Seq(
+ LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+ LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+ LikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", true),
+ LikeAllTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", false),
+ LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+ LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+ LikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+ LikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", false),
+ LikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+ )
+ likeAllTestCases.foreach(t => {
+ checkEvaluation(LikeAll(
+ Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
+ Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))),
t.expectedLikeAll)
+ })
+
// NotLikeAll
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%",
"%oo"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%",
"%bar%"), true)
- checkEvaluation(Literal.create("Foo",
lowercaseCollation).notLikeAll("%foo%", "%oo"), false)
- checkEvaluation(Literal.create("Foo",
lowercaseCollation).notLikeAll("%goo%", "%bar%"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%",
"%oo"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%",
"%bar%"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%",
nullStr), false)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%feo%",
nullStr), null)
- checkEvaluation(Literal.create(null, binaryCollation).notLikeAll("%foo%",
"%oo"), null)
+ case class NotLikeAllTestCase[R](l: String, p1: String, p2: String,
collation: String,
+ expectedNotLikeAll: R)
+ val notLikeAllTestCases = Seq(
+ NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+ NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+ NotLikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", false),
+ NotLikeAllTestCase("Foo", "%goo%", "%bar%", "UTF8_LCASE", true),
+ NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+ NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+ NotLikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", false),
+ NotLikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+ NotLikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+ )
+ notLikeAllTestCases.foreach(t => {
+ checkEvaluation(NotLikeAll(
+ Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
+ Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))),
t.expectedNotLikeAll)
+ })
+
// LikeAny
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%",
"%hoo"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%",
"%bar%"), true)
- checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%goo%",
"%hoo"), false)
- checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%foo%",
"%bar%"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%",
"%hoo"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%",
"%bar%"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%",
nullStr), true)
- checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%feo%",
nullStr), null)
- checkEvaluation(Literal.create(null, binaryCollation).likeAny("%foo%",
"%oo"), null)
+ case class LikeAnyTestCase[R](l: String, p1: String, p2: String,
collation: String,
+ expectedLikeAny: R)
+ val likeAnyTestCases = Seq(
+ LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+ LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+ LikeAnyTestCase("Foo", "%goo%", "%hoo", "UTF8_LCASE", false),
+ LikeAnyTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", true),
+ LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+ LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+ LikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", true),
+ LikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+ LikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+ )
+ likeAnyTestCases.foreach(t => {
+ checkEvaluation(LikeAny(
+ Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
+ Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))),
t.expectedLikeAny)
+ })
+
// NotLikeAny
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%",
"%hoo"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%",
"%oo%"), false)
- checkEvaluation(Literal.create("Foo",
lowercaseCollation).notLikeAny("%Foo%", "%hoo"), true)
- checkEvaluation(Literal.create("Foo",
lowercaseCollation).notLikeAny("%foo%", "%oo%"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%Foo%",
"%hoo"), true)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%",
"%oo%"), false)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%",
nullStr), null)
- checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%feo%",
nullStr), true)
- checkEvaluation(Literal.create(null, binaryCollation).notLikeAny("%foo%",
"%oo"), null)
+ case class NotLikeAnyTestCase[R](l: String, p1: String, p2: String,
collation: String,
+ expectedNotLikeAny: R)
+ val notLikeAnyTestCases = Seq(
+ NotLikeAnyTestCase("foo", "%foo%", "%hoo", "UTF8_BINARY", true),
+ NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+ NotLikeAnyTestCase("Foo", "%Foo%", "%hoo", "UTF8_LCASE", true),
+ NotLikeAnyTestCase("Foo", "%foo%", "%oo%", "UTF8_LCASE", false),
+ NotLikeAnyTestCase("foo", "%Foo%", "%hoo", "UTF8_BINARY", true),
+ NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+ NotLikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+ NotLikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", true),
+ NotLikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+ )
+ notLikeAnyTestCases.foreach(t => {
+ checkEvaluation(NotLikeAny(
+ Literal.create(t.l,
StringType(CollationFactory.collationNameToId(t.collation))),
+ Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))),
t.expectedNotLikeAny)
+ })
}
-
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]