uros-db commented on code in PR #45856:
URL: https://github.com/apache/spark/pull/45856#discussion_r1565671721
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -116,26 +116,37 @@ class CollationRegexpExpressionsSuite
test("Support StringSplit string expression with collation") {
// Supported collations
- case class StringSplitTestCase[R](l: String, r: String, c: String, result:
R)
+ case class StringSplitTestCase[R](l: String, r: String, c: String, result:
R, limit: Int = -1)
val testCases = Seq(
- StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
+ StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")),
+ StringSplitTestCase("ABC", "[b]", "UTF8_BINARY", Seq("ABC")),
+ StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C")),
+ StringSplitTestCase("AAA", "[a]", "UTF8_BINARY_LCASE", Seq("", "", "",
"")),
+ StringSplitTestCase("AAA", "[b]", "UTF8_BINARY_LCASE", Seq("AAA")),
+ StringSplitTestCase("aAbB", "[ab]", "UTF8_BINARY_LCASE", Seq("", "", "",
"", "")),
+ StringSplitTestCase("", "", "UTF8_BINARY_LCASE", Seq("")),
+ StringSplitTestCase("", "[a]", "UTF8_BINARY_LCASE", Seq("")),
+ StringSplitTestCase("xAxBxaxbx", "[AB]", "UTF8_BINARY_LCASE", Seq("x",
"x", "x", "x", "x")),
+ StringSplitTestCase("ABC", "", "UTF8_BINARY_LCASE", Seq("A", "B", "C")),
+ // test split with limit
+ StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("ABC"), 1),
+ StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 2),
+ StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 3),
+ StringSplitTestCase("ABC", "[B]", "UNICODE", Seq("A", "C")),
+ StringSplitTestCase("ABC", "[b]", "UNICODE", Seq("ABC"))
)
testCases.foreach(t => {
- val query = s"SELECT split(collate('${t.l}', '${t.c}'),
collate('${t.r}', '${t.c}'))"
+ val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}',
${t.limit})"
// Result & data type
checkAnswer(sql(query), Row(t.result))
assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
// TODO: Implicit casting (not currently supported)
Review Comment:
don't leave any TODOs
if there is no casting to be done for this expression, note that in the
comment here and explain why that's the case
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]