MaxGekk commented on code in PR #48546:
URL: https://github.com/apache/spark/pull/48546#discussion_r1806617446


##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java:
##########
@@ -1154,9 +1154,33 @@ public static StringSearch getStringSearch(
    * Returns the collation ID for the given collation name.
    */
   public static int collationNameToId(String collationName) throws 
SparkException {
+    // If collation name is given as a fully qualified name, extract the 
actual collation name as
+    // the last part of the [catalog].[schema].[collation_name] name.
+    long numDots = collationName.chars().filter(ch -> ch == '.').count();

Review Comment:
   How about '.' in backticks like 
   ```
   `catalog.name`.schema.collation_name
   ```



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2039,4 +2039,35 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    // Make sure that the collation expression returns the correct fully 
qualified name.
+    Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+      val df = sql(s"SELECT collation('a' collate $collation)")
+      checkAnswer(df,
+        
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+    }
+
+    // Make sure the user can specify the fully qualified name as a collation 
name.
+    Seq[String]("contains", "startswith", "endswith").foreach{ binaryFunction 
=>
+      Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+        val dfRegularName = sql(
+          s"SELECT $binaryFunction('a' collate $collation, 'A' collate 
$collation)")
+        val dfFullyQualifiedName = sql(
+          s"SELECT $binaryFunction('a' collate SYSTEM.BUILTIN.$collation, 'A' 
collate $collation)")
+        checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
+      }
+    }
+
+    // Wrong collation names raise a Spark runtime exception.

Review Comment:
   Do you mean `SparkRuntimeException`? Why do you check `SparkException` then?



##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java:
##########
@@ -1154,9 +1154,33 @@ public static StringSearch getStringSearch(
    * Returns the collation ID for the given collation name.
    */
   public static int collationNameToId(String collationName) throws 
SparkException {
+    // If collation name is given as a fully qualified name, extract the 
actual collation name as
+    // the last part of the [catalog].[schema].[collation_name] name.
+    long numDots = collationName.chars().filter(ch -> ch == '.').count();
+    if (numDots > 0) {
+      String[] nameParts = collationName.split("\\.");

Review Comment:
   Can't you reuse some existing helper function like `parseAttributeName`?



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2039,4 +2039,35 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    // Make sure that the collation expression returns the correct fully 
qualified name.
+    Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+      val df = sql(s"SELECT collation('a' collate $collation)")
+      checkAnswer(df,
+        
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+    }
+
+    // Make sure the user can specify the fully qualified name as a collation 
name.
+    Seq[String]("contains", "startswith", "endswith").foreach{ binaryFunction 
=>
+      Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE", 
"UNICODE_CI_AI").foreach { collation =>
+        val dfRegularName = sql(
+          s"SELECT $binaryFunction('a' collate $collation, 'A' collate 
$collation)")
+        val dfFullyQualifiedName = sql(
+          s"SELECT $binaryFunction('a' collate SYSTEM.BUILTIN.$collation, 'A' 
collate $collation)")
+        checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
+      }
+    }
+
+    // Wrong collation names raise a Spark runtime exception.
+    intercept[SparkException](sql("SELECT 'a' COLLATE 
SYSTEM.BUILTIN2.UTF8_BINARY"))

Review Comment:
   could you check error condition too using `checkError`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to