stefankandic commented on code in PR #45725:
URL: https://github.com/apache/spark/pull/45725#discussion_r1544730418
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -73,6 +73,84 @@ class CollationStringExpressionsSuite extends QueryTest
})
}
+ test("SUBSTRING_INDEX check result on explicitly collated strings") {
+ def testSubstringIndex(str: String, delim: String, cnt: Integer,
+ collationId: Integer, expected: String): Unit = {
+ val string = Literal.create(str, StringType(collationId))
+ val delimiter = Literal.create(delim, StringType(collationId))
+ val count = Literal(cnt)
+
+ checkEvaluation(SubstringIndex(string, delimiter, count), expected)
+ }
+
+ testSubstringIndex("wwwgapachegorg", "g", -3, 0, "apachegorg")
+ testSubstringIndex("www||apache||org", "||", 2, 0, "www||apache")
+ // UTF8_BINARY_LCASE
Review Comment:
instead of separating these blocks with comments describing the collation
used how about using a variable denoting the current collation
```
var collationId = CollationFactory.nameToId("UTF8_BINARY")
testcases ...
collationId = CollationFactory.nameToId("UNICODE")
testcases ...
```
this way we won't have to litter the code with [magic
constants](https://en.wikipedia.org/wiki/Magic_number_(programming)) which
should make it more readable
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]