nikolamand-db commented on code in PR #46077:
URL: https://github.com/apache/spark/pull/46077#discussion_r1570959180


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -34,288 +34,380 @@ class CollationRegexpExpressionsSuite
     // Supported collations
     case class LikeTestCase[R](l: String, r: String, c: String, result: R)
     val testCases = Seq(
-      LikeTestCase("ABC", "%B%", "UTF8_BINARY", true)
+      LikeTestCase("ABC", "%B%", "UTF8_BINARY", true),
+      LikeTestCase("AḂC", "%ḃ%", "UTF8_BINARY_LCASE", true), // 
scalastyle:ignore
+      LikeTestCase("ABC", "%b%", "UNICODE", false)
     )
     testCases.foreach(t => {
-      val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}', 
'${t.c}'))"
+      val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
       // Result & data type
       checkAnswer(sql(query), Row(t.result))
       assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // TODO: Implicit casting (not currently supported)
     })
     // Unsupported collations
     case class LikeTestFail(l: String, r: String, c: String)
     val failCases = Seq(
-      LikeTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
-      LikeTestFail("ABC", "%B%", "UNICODE"),
       LikeTestFail("ABC", "%b%", "UNICODE_CI")
     )
     failCases.foreach(t => {
-      val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}', 
'${t.c}'))"
+      val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
       val unsupportedCollation = intercept[AnalysisException] { sql(query) }
       assert(unsupportedCollation.getErrorClass === 
"DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
     })
-    // TODO: Collation mismatch (not currently supported)
   }
 
   test("Support ILike string expression with collation") {
     // Supported collations
     case class ILikeTestCase[R](l: String, r: String, c: String, result: R)
     val testCases = Seq(
-      ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true)
+      ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true),
+      ILikeTestCase("AḂC", "%ḃ%", "UTF8_BINARY_LCASE", true), // 
scalastyle:ignore

Review Comment:
   Let's wrap the entire class with ascii check ignore such as in 
https://github.com/apache/spark/blob/6232085227ee2cc4e831996a1ac84c27868a1595/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala#L27



##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java:
##########
@@ -143,7 +145,24 @@ public static boolean execICU(final UTF8String l, final 
UTF8String r,
    * Collation-aware regexp expressions.
    */
 
-  // TODO: Add more collation-aware regexp expressions.
+  public static boolean supportsLowercaseRegex(final int collationId) {

Review Comment:
   Should we add thorough unit tests for these functions in 
`CollationSupportSuite`?



##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala:
##########
@@ -161,4 +162,40 @@ class CollationExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       checkEvaluation(ArrayExcept(left, right), out)
     }
   }
+
+  test("MultiLikeBase regexp expressions with collated strings") {

Review Comment:
   Why do we need this test if we already have checks for `LikeAll`, `LikeAny`, 
etc. in `CollationRegexpExpressionsSuite`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to