nikolamand-db commented on code in PR #45856:
URL: https://github.com/apache/spark/pull/45856#discussion_r1565804085


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -116,26 +116,37 @@ class CollationRegexpExpressionsSuite
 
   test("Support StringSplit string expression with collation") {
     // Supported collations
-    case class StringSplitTestCase[R](l: String, r: String, c: String, result: 
R)
+    case class StringSplitTestCase[R](l: String, r: String, c: String, result: 
R, limit: Int = -1)
     val testCases = Seq(
-      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
+      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY", Seq("ABC")),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C")),
+      StringSplitTestCase("AAA", "[a]", "UTF8_BINARY_LCASE", Seq("", "", "", 
"")),
+      StringSplitTestCase("AAA", "[b]", "UTF8_BINARY_LCASE", Seq("AAA")),
+      StringSplitTestCase("aAbB", "[ab]", "UTF8_BINARY_LCASE", Seq("", "", "", 
"", "")),
+      StringSplitTestCase("", "", "UTF8_BINARY_LCASE", Seq("")),
+      StringSplitTestCase("", "[a]", "UTF8_BINARY_LCASE", Seq("")),
+      StringSplitTestCase("xAxBxaxbx", "[AB]", "UTF8_BINARY_LCASE", Seq("x", 
"x", "x", "x", "x")),
+      StringSplitTestCase("ABC", "", "UTF8_BINARY_LCASE", Seq("A", "B", "C")),
+      // test split with limit
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("ABC"), 1),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 2),
+      StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 3),
+      StringSplitTestCase("ABC", "[B]", "UNICODE", Seq("A", "C")),
+      StringSplitTestCase("ABC", "[b]", "UNICODE", Seq("ABC"))
     )
     testCases.foreach(t => {
-      val query = s"SELECT split(collate('${t.l}', '${t.c}'), 
collate('${t.r}', '${t.c}'))"
+      val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}', 
${t.limit})"
       // Result & data type
       checkAnswer(sql(query), Row(t.result))
       
assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
       // TODO: Implicit casting (not currently supported)

Review Comment:
   Removed both TODOs since string split doesn't have any custom collation cast 
logic as regex parameter's collation is irrelevant.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to