nikolamand-db commented on code in PR #46077:
URL: https://github.com/apache/spark/pull/46077#discussion_r1570959180
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -34,288 +34,380 @@ class CollationRegexpExpressionsSuite
// Supported collations
case class LikeTestCase[R](l: String, r: String, c: String, result: R)
val testCases = Seq(
- LikeTestCase("ABC", "%B%", "UTF8_BINARY", true)
+ LikeTestCase("ABC", "%B%", "UTF8_BINARY", true),
+ LikeTestCase("AḂC", "%ḃ%", "UTF8_BINARY_LCASE", true), //
scalastyle:ignore
+ LikeTestCase("ABC", "%b%", "UNICODE", false)
)
testCases.foreach(t => {
- val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}',
'${t.c}'))"
+ val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
// Result & data type
checkAnswer(sql(query), Row(t.result))
assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
- // TODO: Implicit casting (not currently supported)
})
// Unsupported collations
case class LikeTestFail(l: String, r: String, c: String)
val failCases = Seq(
- LikeTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
- LikeTestFail("ABC", "%B%", "UNICODE"),
LikeTestFail("ABC", "%b%", "UNICODE_CI")
)
failCases.foreach(t => {
- val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}',
'${t.c}'))"
+ val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
val unsupportedCollation = intercept[AnalysisException] { sql(query) }
assert(unsupportedCollation.getErrorClass ===
"DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
})
- // TODO: Collation mismatch (not currently supported)
}
test("Support ILike string expression with collation") {
// Supported collations
case class ILikeTestCase[R](l: String, r: String, c: String, result: R)
val testCases = Seq(
- ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true)
+ ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true),
+ ILikeTestCase("AḂC", "%ḃ%", "UTF8_BINARY_LCASE", true), //
scalastyle:ignore
Review Comment:
Let's wrap the entire class with ascii check ignore such as in
https://github.com/apache/spark/blob/6232085227ee2cc4e831996a1ac84c27868a1595/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala#L27
##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java:
##########
@@ -143,7 +145,24 @@ public static boolean execICU(final UTF8String l, final
UTF8String r,
* Collation-aware regexp expressions.
*/
- // TODO: Add more collation-aware regexp expressions.
+ public static boolean supportsLowercaseRegex(final int collationId) {
Review Comment:
Should we add thorough unit tests for these functions in
`CollationSupportSuite`?
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala:
##########
@@ -161,4 +162,40 @@ class CollationExpressionSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(ArrayExcept(left, right), out)
}
}
+
+ test("MultiLikeBase regexp expressions with collated strings") {
Review Comment:
Why do we need this test if we already have checks for `LikeAll`, `LikeAny`,
etc. in `CollationRegexpExpressionsSuite`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]