uros-db commented on code in PR #46040:
URL: https://github.com/apache/spark/pull/46040#discussion_r1568746724
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -212,6 +212,49 @@ class CollationStringExpressionsSuite
})
}
+ test("Support Left/Right/Substr with collation") {
+ case class SubstringTestCase(query: String, collation: String, result: Row)
+ val checks = Seq("utf8_binary_lcase", "utf8_binary", "unicode",
"unicode_ci").flatMap(
+ c => Seq(
+ SubstringTestCase("select substr('example' collate " + c + ", 1,
100)", c, Row("example")),
+ SubstringTestCase("select substr('example' collate " + c + ", 2, 2)",
c, Row("xa")),
+ SubstringTestCase("select substr('example' collate " + c + ", 0, 0)",
c, Row("")),
+ SubstringTestCase("select substr('example' collate " + c + ", -3, 2)",
c, Row("pl")),
+ SubstringTestCase("select substr(' a世a ' collate " + c + ", 2, 3)", c,
Row("a世a")), // scalastyle:ignore
+ SubstringTestCase("select left(' a世a ' collate " + c + ", 3)", c,
Row(" a世")), // scalastyle:ignore
+ SubstringTestCase("select right(' a世a ' collate " + c + ", 3)", c,
Row("世a ")), // scalastyle:ignore
+ SubstringTestCase("select left('AaAaAaAa000000' collate " + c + ",
3)", c, Row("AaA")),
+ SubstringTestCase("select right('AaAaAaAa000000' collate " + c + ",
3)", c, Row("000")),
+ SubstringTestCase("select substr('' collate " + c + ", 1, 1)", c,
Row("")),
+ SubstringTestCase("select left('' collate " + c + ", 1)", c, Row("")),
+ SubstringTestCase("select right('' collate " + c + ", 1)", c, Row("")),
+ // improper values
+ SubstringTestCase("select left(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select right(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", 1, 1)", c,
Row(null)),
+ SubstringTestCase("select left(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select right(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", null, null)",
c, Row(null)),
+ SubstringTestCase("select left('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select right('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select substr('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select substr('AaAaAaAa0' collate " + c + ", null,
null)", c, Row(null)),
+ SubstringTestCase("select right('' collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr('' collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr('' collate " + c + ", null, null)",
c, Row(null)),
+ SubstringTestCase("select left('' collate " + c + ", null)", c,
Row(null))
Review Comment:
28 cases * 4 collations = 112 tests
I'd say we don't need that many SQL tests, there's no need to do
`Seq("utf8_binary_lcase", "utf8_binary", "unicode", "unicode_ci").flatMap`,
only 4 tests (with valid values) per function (substring/left/right) should be
enough
a couple of additional tests for improper values are fine as well, but we
don't need to test every possible pair of collation & function
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -212,6 +212,49 @@ class CollationStringExpressionsSuite
})
}
+ test("Support Left/Right/Substr with collation") {
+ case class SubstringTestCase(query: String, collation: String, result: Row)
+ val checks = Seq("utf8_binary_lcase", "utf8_binary", "unicode",
"unicode_ci").flatMap(
+ c => Seq(
+ SubstringTestCase("select substr('example' collate " + c + ", 1,
100)", c, Row("example")),
+ SubstringTestCase("select substr('example' collate " + c + ", 2, 2)",
c, Row("xa")),
+ SubstringTestCase("select substr('example' collate " + c + ", 0, 0)",
c, Row("")),
+ SubstringTestCase("select substr('example' collate " + c + ", -3, 2)",
c, Row("pl")),
+ SubstringTestCase("select substr(' a世a ' collate " + c + ", 2, 3)", c,
Row("a世a")), // scalastyle:ignore
+ SubstringTestCase("select left(' a世a ' collate " + c + ", 3)", c,
Row(" a世")), // scalastyle:ignore
+ SubstringTestCase("select right(' a世a ' collate " + c + ", 3)", c,
Row("世a ")), // scalastyle:ignore
+ SubstringTestCase("select left('AaAaAaAa000000' collate " + c + ",
3)", c, Row("AaA")),
+ SubstringTestCase("select right('AaAaAaAa000000' collate " + c + ",
3)", c, Row("000")),
+ SubstringTestCase("select substr('' collate " + c + ", 1, 1)", c,
Row("")),
+ SubstringTestCase("select left('' collate " + c + ", 1)", c, Row("")),
+ SubstringTestCase("select right('' collate " + c + ", 1)", c, Row("")),
+ // improper values
+ SubstringTestCase("select left(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select right(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", 1)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", 1, 1)", c,
Row(null)),
+ SubstringTestCase("select left(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select right(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr(null collate " + c + ", null, null)",
c, Row(null)),
+ SubstringTestCase("select left('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select right('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select substr('AaAaAaAa000000' collate " + c + ",
null)", c, Row(null)),
+ SubstringTestCase("select substr('AaAaAaAa0' collate " + c + ", null,
null)", c, Row(null)),
+ SubstringTestCase("select right('' collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr('' collate " + c + ", null)", c,
Row(null)),
+ SubstringTestCase("select substr('' collate " + c + ", null, null)",
c, Row(null)),
+ SubstringTestCase("select left('' collate " + c + ", null)", c,
Row(null))
Review Comment:
since we don't have unit tests for these functions, let's make sure to use a
smaller number of tests here in order to test more things - for example, I
don't see any case/accent variation here, as well as a wider variety of
variable len characters, etc.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]