GideonPotok commented on code in PR #46040:
URL: https://github.com/apache/spark/pull/46040#discussion_r1570170214


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -212,6 +212,119 @@ class CollationStringExpressionsSuite
     })
   }
 
+  test("Support Left/Right/Substr with collation") {
+    case class SubstringTestCase(query: String, collation: String, result: Row)
+    val checks = Seq(
+      SubstringTestCase(
+        "select substr('example' collate " + "utf8_binary_lcase" + ", 1, 100)",
+        "utf8_binary_lcase",
+        Row("example")),
+      SubstringTestCase(
+        "select substr('example' collate " + "utf8_binary" + ", 2, 2)",
+        "utf8_binary",
+        Row("xa")),
+      SubstringTestCase(
+        "select right('' collate " + "utf8_binary_lcase" + ", 1)",
+        "utf8_binary_lcase",
+        Row("")),
+      SubstringTestCase(
+        "select substr('example' collate " + "unicode" + ", 0, 0)",
+        "unicode",
+        Row("")),
+      SubstringTestCase(
+        "select substr('example' collate " + "unicode_ci" + ", -3, 2)",
+        "unicode_ci",
+        Row("pl")),
+      SubstringTestCase(
+        "select substr(' a世a ' collate " + "utf8_binary_lcase" + ", 2, 3)", // 
scalastyle:ignore
+        "utf8_binary_lcase",
+        Row("a世a")), // scalastyle:ignore
+      SubstringTestCase(
+        "select left(' a世a ' collate " + "utf8_binary" + ", 3)", // 
scalastyle:ignore
+        "utf8_binary",
+        Row(" a世")), // scalastyle:ignore
+      SubstringTestCase(
+        "select right(' a世a ' collate " + "unicode" + ", 3)", // 
scalastyle:ignore
+        "unicode",
+        Row("世a ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode_ci" + ", 
3)", // scalastyle:ignore
+        "unicode_ci",
+        Row("ÀÃÂ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" 
+ ", 3)", // scalastyle:ignore
+        "utf8_binary_lcase",
+        Row("ǢǼÆ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select substr('' collate " + "utf8_binary_lcase" + ", 1, 1)",
+        "utf8_binary_lcase",
+        Row("")),
+      SubstringTestCase(
+        "select substr('' collate " + "unicode" + ", 1, 1)",
+        "unicode",
+        Row("")),
+      SubstringTestCase(
+        "select left('' collate " + "utf8_binary" + ", 1)",
+        "utf8_binary",
+        Row("")),
+        // improper values
+      SubstringTestCase(
+        "select left(null collate " + "utf8_binary_lcase" + ", 1)",
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right(null collate " + "unicode" + ", 1)",
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "utf8_binary" + ", 1)",
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "unicode_ci" + ", 1, 1)",
+        "unicode_ci",
+        Row(null)),
+      SubstringTestCase(
+        "select left(null collate " + "utf8_binary_lcase" + ", null)",
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right(null collate " + "unicode" + ", null)",
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "utf8_binary" + ", null)",
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "unicode_ci" + ", null, null)",
+        "unicode_ci",
+        Row(null)),
+      SubstringTestCase(
+        "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" + 
", null)", // scalastyle:ignore
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode" + ", 
null)", // scalastyle:ignore
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary" + ", 
null)", // scalastyle:ignore
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr('' collate " + "unicode_ci" + ", null, null)",
+        "unicode_ci",
+        Row(null))

Review Comment:
   @uros-db I will change it accordingly.  Please advise - Do you want three 
case classes, or one case class but with a parameter for function name?  If the 
latter (one case class), how do you want me to handle the third parameter 
(`len`), which `left` and `right` do not have, and which is optional for 
`substr`? Maybe with an `Option[String]`?
   
   Is the quantity of tests satisfactory? I got it down from 112 tests to 25 
tests. Thus 13 for valid values  and 12 for invalid values. I can get it down 
to 12 valid test cases and, say, six invalid values if you prefer. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to