uros-db commented on code in PR #46991:
URL: https://github.com/apache/spark/pull/46991#discussion_r1642228436


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -208,6 +208,50 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
+  test("[SPARK-48472] Enable reflect expressions with collated strings") {
+    // be aware that output of java.util.UUID.fromString is always lowercase
+    Seq(
+      ("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "b53c2312-7f23-1234-bac2-b345acd5afd2", "utf8_binary", false),
+
+      ("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", true),
+      ("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", false),
+
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_lcase", true),
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_lcase", true),
+
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "unicode",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "unicode", true),
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "unicode", false),
+
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "unicode",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "unicode_ci", true),
+      ("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "unicode_ci", true),
+    ).foreach {
+      case (left, leftCollation, right, rightCollation, expected) =>
+        checkAnswer(sql(s"SELECT REFLECT('java.util.UUID', 'fromString'," +
+          s" collate('$left', '$leftCollation'))=" +
+          s" collate('$right', '$rightCollation');"),
+          Row(expected))
+    }
+    checkError(
+      exception = intercept[SparkException] {
+        sql("SELECT REFLECT('java.util.UUID', 'fromString'," +
+          " collate('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2', 'utf8_binary'))=" +
+          " collate('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2', 'utf8_BS');")
+      },
+      errorClass = "COLLATION_INVALID_NAME",
+      sqlState = "42704",
+      parameters = Map("collationName" -> "utf8_BS")
+    )

Review Comment:
   let's only focus on the `reflect` expression (the scope of this PR)
   we already have tests that check whether collation names are valid
   
   so we can remove this, but we should maybe check what happens if arguments 
have two different _valid_ collations. The function may or may not enforce some 
collation type casting rules, but this should probably be reflected in our 
tests either way



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to