Re: [PR] [SPARK-50032][SQL] Allow use of fully qualified collation name [spark]

via GitHub Fri, 18 Oct 2024 07:44:22 -0700


stefankandic commented on code in PR #48546:
URL: https://github.com/apache/spark/pull/48546#discussion_r1806622013



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2015,4 +2015,35 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    // Make sure that the collation expression returns the correct fully 
qualified name.
+    Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+      val df = sql(s"SELECT collation('a' collate $collation)")
+      checkAnswer(df,
+        
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+    }
+
+    // Make sure the user can specify the fully qualified name as a collation 
name.
+    Seq[String]("contains", "startswith", "endswith").foreach{ binaryFunction 
=>
+      Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+        val dfRegularName = sql(
+          s"SELECT $binaryFunction('a' collate $collation, 'A' collate 
$collation)")
+        val dfFullyQualifiedName = sql(
+          s"SELECT $binaryFunction('a' collate SYSTEM.BUILTIN.$collation, 'A' 
collate $collation)")
+        checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
+      }
+    }

Review Comment:
   can we also add a case for doing `collate('a', collation('b'))` ?



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2015,4 +2015,35 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    // Make sure that the collation expression returns the correct fully 
qualified name.
+    Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+      val df = sql(s"SELECT collation('a' collate $collation)")
+      checkAnswer(df,
+        
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+    }
+
+    // Make sure the user can specify the fully qualified name as a collation 
name.
+    Seq[String]("contains", "startswith", "endswith").foreach{ binaryFunction 
=>
+      Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+        val dfRegularName = sql(
+          s"SELECT $binaryFunction('a' collate $collation, 'A' collate 
$collation)")
+        val dfFullyQualifiedName = sql(
+          s"SELECT $binaryFunction('a' collate SYSTEM.BUILTIN.$collation, 'A' 
collate $collation)")

Review Comment:
   from what I see usually catalog and schema are written in lowercase, can we 
maybe change that as well?



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2015,4 +2015,35 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    // Make sure that the collation expression returns the correct fully 
qualified name.
+    Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>

Review Comment:
   can we maybe extract this into a val and reuse it?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-50032][SQL] Allow use of fully qualified collation name [spark]

Reply via email to