uros-db commented on code in PR #45856:
URL: https://github.com/apache/spark/pull/45856#discussion_r1565671721


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -116,26 +116,37 @@ class CollationRegexpExpressionsSuite
 
   test("Support StringSplit string expression with collation") {
     // Supported collations
-    case class StringSplitTestCase[R](l: String, r: String, c: String, result: 
R)
+    case class StringSplitTestCase[R](l: String, r: String, c: String, result: 
R, limit: Int = -1)
     val testCases = Seq(
-      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
+      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY", Seq("ABC")),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C")),
+      StringSplitTestCase("AAA", "[a]", "UTF8_BINARY_LCASE", Seq("", "", "", 
"")),
+      StringSplitTestCase("AAA", "[b]", "UTF8_BINARY_LCASE", Seq("AAA")),
+      StringSplitTestCase("aAbB", "[ab]", "UTF8_BINARY_LCASE", Seq("", "", "", 
"", "")),
+      StringSplitTestCase("", "", "UTF8_BINARY_LCASE", Seq("")),
+      StringSplitTestCase("", "[a]", "UTF8_BINARY_LCASE", Seq("")),
+      StringSplitTestCase("xAxBxaxbx", "[AB]", "UTF8_BINARY_LCASE", Seq("x", 
"x", "x", "x", "x")),
+      StringSplitTestCase("ABC", "", "UTF8_BINARY_LCASE", Seq("A", "B", "C")),
+      // test split with limit
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("ABC"), 1),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 2),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 3),
+      StringSplitTestCase("ABC", "[B]", "UNICODE", Seq("A", "C")),
+      StringSplitTestCase("ABC", "[b]", "UNICODE", Seq("ABC"))
     )
     testCases.foreach(t => {
-      val query = s"SELECT split(collate('${t.l}', '${t.c}'), 
collate('${t.r}', '${t.c}'))"
+      val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}', 
${t.limit})"
       // Result & data type
       checkAnswer(sql(query), Row(t.result))
       
assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
       // TODO: Implicit casting (not currently supported)

Review Comment:
   don't leave any TODOs
   
   if there is no casting to be done for this expression, note that in the 
comment here and explain why that's the case



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to