(spark) branch master updated: [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` syntax to show all collations

maxgekk Wed, 11 Sep 2024 17:11:47 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 0f4d289b7932 [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` 
syntax to show all collations
0f4d289b7932 is described below

commit 0f4d289b7932c91186d2da66095ebb41b6cd58c0
Author: panbingkun <[email protected]>
AuthorDate: Thu Sep 12 02:11:28 2024 +0200

    [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` syntax to show all 
collations
    
    ### What changes were proposed in this pull request?
    The pr aims to introduce `SHOW COLLATIONS LIKE ...` syntax to `show all 
collations`.
    
    ### Why are the changes needed?
    End-users will be able to obtain `collations` currently supported by the 
spark through SQL.
    Other databases, such as `MySQL`, also have similar syntax,
    ref: https://dev.mysql.com/doc/refman/9.0/en/show-collation.html
    <img width="958" alt="image" 
src="https://github.com/user-attachments/assets/1d5106b3-f8b8-42c5-b3ad-0f35c61ad5e2";>
    
    postgresql: 
https://database.guide/how-to-return-a-list-of-available-collations-in-postgresql/
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, end-users will be able to obtain `collation` currently supported by 
the spark through commands similar to the following
    
    |name|provider|version|binaryEquality|binaryOrdering|lowercaseEquality|
    | --------- | ----------- | ----------- | ----------- | ----------- | 
----------- |
    
    ```
    spark-sql (default)> SHOW COLLATIONS;
    UTF8_BINARY     spark   1.0     true    true    false
    UTF8_LCASE      spark   1.0     false   false   true
    ff_Adlm icu     153.120.0.0     false   false   false
    ff_Adlm_CI      icu     153.120.0.0     false   false   false
    ff_Adlm_AI      icu     153.120.0.0     false   false   false
    ff_Adlm_CI_AI   icu     153.120.0.0     false   false   false
    ...
    
    spark-sql (default)> SHOW COLLATIONS LIKE '*UTF8_BINARY*';
    UTF8_BINARY     spark   1.0     true    true    false
    Time taken: 0.043 seconds, Fetched 1 row(s)
    ```
    
    <img width="513" alt="image" 
src="https://github.com/user-attachments/assets/d5765e32-718d-4236-857d-d508f5473329";>
    
    ### How was this patch tested?
    Add new UT.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #47364 from panbingkun/show_collation_syntax.
    
    Authored-by: panbingkun <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 .../spark/sql/catalyst/util/CollationFactory.java  | 143 ++++++++++++++++++++-
 docs/sql-ref-ansi-compliance.md                    |   1 +
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4      |   1 +
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     |   2 +
 .../sql/catalyst/catalog/SessionCatalog.scala      |  15 ++-
 .../resources/ansi-sql-2016-reserved-keywords.txt  |   1 +
 .../spark/sql/execution/SparkSqlParser.scala       |  12 ++
 .../execution/command/ShowCollationsCommand.scala  |  62 +++++++++
 .../sql-tests/results/ansi/keywords.sql.out        |   2 +
 .../resources/sql-tests/results/keywords.sql.out   |   1 +
 .../org/apache/spark/sql/CollationSuite.scala      |  42 ++++++
 .../ThriftServerWithSparkContextSuite.scala        |   2 +-
 12 files changed, 278 insertions(+), 6 deletions(-)

diff --git 
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
 
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 5640a2468d02..4b88e15e8ed7 100644
--- 
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ 
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -23,12 +23,14 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import java.util.function.BiFunction;
 import java.util.function.ToLongFunction;
+import java.util.stream.Stream;
 
+import com.ibm.icu.text.CollationKey;
+import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.StringSearch;
 import com.ibm.icu.util.ULocale;
-import com.ibm.icu.text.CollationKey;
-import com.ibm.icu.text.Collator;
+import com.ibm.icu.util.VersionInfo;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -88,6 +90,17 @@ public final class CollationFactory {
     }
   }
 
+  public record CollationMeta(
+    String catalog,
+    String schema,
+    String collationName,
+    String language,
+    String country,
+    String icuVersion,
+    String padAttribute,
+    boolean accentSensitivity,
+    boolean caseSensitivity) { }
+
   /**
    * Entry encapsulating all information about a collation.
    */
@@ -342,6 +355,23 @@ public final class CollationFactory {
       }
 
       protected abstract Collation buildCollation();
+
+      protected abstract CollationMeta buildCollationMeta();
+
+      static List<CollationIdentifier> listCollations() {
+        return Stream.concat(
+          CollationSpecUTF8.listCollations().stream(),
+          CollationSpecICU.listCollations().stream()).toList();
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier 
collationIdentifier) {
+        CollationMeta collationSpecUTF8 =
+          CollationSpecUTF8.loadCollationMeta(collationIdentifier);
+        if (collationSpecUTF8 == null) {
+          return CollationSpecICU.loadCollationMeta(collationIdentifier);
+        }
+        return collationSpecUTF8;
+      }
     }
 
     private static class CollationSpecUTF8 extends CollationSpec {
@@ -364,6 +394,9 @@ public final class CollationFactory {
        */
       private static final int CASE_SENSITIVITY_MASK = 0b1;
 
+      private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY";
+      private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE";
+
       private static final int UTF8_BINARY_COLLATION_ID =
         new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).collationId;
       private static final int UTF8_LCASE_COLLATION_ID =
@@ -406,7 +439,7 @@ public final class CollationFactory {
       protected Collation buildCollation() {
         if (collationId == UTF8_BINARY_COLLATION_ID) {
           return new Collation(
-            "UTF8_BINARY",
+            UTF8_BINARY_COLLATION_NAME,
             PROVIDER_SPARK,
             null,
             UTF8String::binaryCompare,
@@ -417,7 +450,7 @@ public final class CollationFactory {
             /* supportsLowercaseEquality = */ false);
         } else {
           return new Collation(
-            "UTF8_LCASE",
+            UTF8_LCASE_COLLATION_NAME,
             PROVIDER_SPARK,
             null,
             CollationAwareUTF8String::compareLowerCase,
@@ -428,6 +461,52 @@ public final class CollationFactory {
             /* supportsLowercaseEquality = */ true);
         }
       }
+
+      @Override
+      protected CollationMeta buildCollationMeta() {
+        if (collationId == UTF8_BINARY_COLLATION_ID) {
+          return new CollationMeta(
+            CATALOG,
+            SCHEMA,
+            UTF8_BINARY_COLLATION_NAME,
+            /* language = */ null,
+            /* country = */ null,
+            /* icuVersion = */ null,
+            COLLATION_PAD_ATTRIBUTE,
+            /* accentSensitivity = */ true,
+            /* caseSensitivity = */ true);
+        } else {
+          return new CollationMeta(
+            CATALOG,
+            SCHEMA,
+            UTF8_LCASE_COLLATION_NAME,
+            /* language = */ null,
+            /* country = */ null,
+            /* icuVersion = */ null,
+            COLLATION_PAD_ATTRIBUTE,
+            /* accentSensitivity = */ true,
+            /* caseSensitivity = */ false);
+        }
+      }
+
+      static List<CollationIdentifier> listCollations() {
+        CollationIdentifier UTF8_BINARY_COLLATION_IDENT =
+          new CollationIdentifier(PROVIDER_SPARK, UTF8_BINARY_COLLATION_NAME, 
"1.0");
+        CollationIdentifier UTF8_LCASE_COLLATION_IDENT =
+          new CollationIdentifier(PROVIDER_SPARK, UTF8_LCASE_COLLATION_NAME, 
"1.0");
+        return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, 
UTF8_LCASE_COLLATION_IDENT);
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier 
collationIdentifier) {
+        try {
+          int collationId = CollationSpecUTF8.collationNameToId(
+            collationIdentifier.name, collationIdentifier.name.toUpperCase());
+          return 
CollationSpecUTF8.fromCollationId(collationId).buildCollationMeta();
+        } catch (SparkException ignored) {
+          // ignore
+          return null;
+        }
+      }
     }
 
     private static class CollationSpecICU extends CollationSpec {
@@ -684,6 +763,20 @@ public final class CollationFactory {
           /* supportsLowercaseEquality = */ false);
       }
 
+      @Override
+      protected CollationMeta buildCollationMeta() {
+        return new CollationMeta(
+          CATALOG,
+          SCHEMA,
+          collationName(),
+          ICULocaleMap.get(locale).getDisplayLanguage(),
+          ICULocaleMap.get(locale).getDisplayCountry(),
+          VersionInfo.ICU_VERSION.toString(),
+          COLLATION_PAD_ATTRIBUTE,
+          caseSensitivity == CaseSensitivity.CS,
+          accentSensitivity == AccentSensitivity.AS);
+      }
+
       /**
        * Compute normalized collation name. Components of collation name are 
given in order:
        * - Locale name
@@ -704,6 +797,37 @@ public final class CollationFactory {
         }
         return builder.toString();
       }
+
+      private static List<String> allCollationNames() {
+        List<String> collationNames = new ArrayList<>();
+        for (String locale: ICULocaleToId.keySet()) {
+          // CaseSensitivity.CS + AccentSensitivity.AS
+          collationNames.add(locale);
+          // CaseSensitivity.CS + AccentSensitivity.AI
+          collationNames.add(locale + "_AI");
+          // CaseSensitivity.CI + AccentSensitivity.AS
+          collationNames.add(locale + "_CI");
+          // CaseSensitivity.CI + AccentSensitivity.AI
+          collationNames.add(locale + "_CI_AI");
+        }
+        return collationNames.stream().sorted().toList();
+      }
+
+      static List<CollationIdentifier> listCollations() {
+        return allCollationNames().stream().map(name ->
+          new CollationIdentifier(PROVIDER_ICU, name, 
VersionInfo.ICU_VERSION.toString())).toList();
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier 
collationIdentifier) {
+        try {
+          int collationId = CollationSpecICU.collationNameToId(
+            collationIdentifier.name, collationIdentifier.name.toUpperCase());
+          return 
CollationSpecICU.fromCollationId(collationId).buildCollationMeta();
+        } catch (SparkException ignored) {
+          // ignore
+          return null;
+        }
+      }
     }
 
     /**
@@ -730,9 +854,12 @@ public final class CollationFactory {
     }
   }
 
+  public static final String CATALOG = "SYSTEM";
+  public static final String SCHEMA = "BUILTIN";
   public static final String PROVIDER_SPARK = "spark";
   public static final String PROVIDER_ICU = "icu";
   public static final List<String> SUPPORTED_PROVIDERS = 
List.of(PROVIDER_SPARK, PROVIDER_ICU);
+  public static final String COLLATION_PAD_ATTRIBUTE = "NO_PAD";
 
   public static final int UTF8_BINARY_COLLATION_ID =
     Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION_ID;
@@ -923,4 +1050,12 @@ public final class CollationFactory {
 
     return String.join(", ", suggestions);
   }
+
+  public static List<CollationIdentifier> listCollations() {
+    return Collation.CollationSpec.listCollations();
+  }
+
+  public static CollationMeta loadCollationMeta(CollationIdentifier 
collationIdentifier) {
+    return Collation.CollationSpec.loadCollationMeta(collationIdentifier);
+  }
 }
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 3fa67036fd04..fe5ddf27bf6c 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -442,6 +442,7 @@ Below is a list of all the keywords in Spark SQL.
 |CODEGEN|non-reserved|non-reserved|non-reserved|
 |COLLATE|reserved|non-reserved|reserved|
 |COLLATION|reserved|non-reserved|reserved|
+|COLLATIONS|reserved|non-reserved|reserved|
 |COLLECTION|non-reserved|non-reserved|non-reserved|
 |COLUMN|reserved|non-reserved|reserved|
 |COLUMNS|non-reserved|non-reserved|non-reserved|
diff --git 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 28ebaeaaed6d..9ea213f3bf4a 100644
--- 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -162,6 +162,7 @@ CLUSTERED: 'CLUSTERED';
 CODEGEN: 'CODEGEN';
 COLLATE: 'COLLATE';
 COLLATION: 'COLLATION';
+COLLATIONS: 'COLLATIONS';
 COLLECTION: 'COLLECTION';
 COLUMN: 'COLUMN';
 COLUMNS: 'COLUMNS';
diff --git 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index e9fc6c3ca4f2..42f0094de351 100644
--- 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -260,6 +260,7 @@ statement
     | SHOW PARTITIONS identifierReference partitionSpec?               
#showPartitions
     | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)?
         (LIKE? (legacy=multipartIdentifier | pattern=stringLit))?      
#showFunctions
+    | SHOW COLLATIONS (LIKE? pattern=stringLit)?                       
#showCollations
     | SHOW CREATE TABLE identifierReference (AS SERDE)?                
#showCreateTable
     | SHOW CURRENT namespace                                           
#showCurrentNamespace
     | SHOW CATALOGS (LIKE? pattern=stringLit)?                            
#showCatalogs
@@ -1837,6 +1838,7 @@ nonReserved
     | CODEGEN
     | COLLATE
     | COLLATION
+    | COLLATIONS
     | COLLECTION
     | COLUMN
     | COLUMNS
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index d3a6cb6ae284..5c14e261fafc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
+import scala.jdk.CollectionConverters.CollectionHasAsScala
 import scala.util.{Failure, Success, Try}
 
 import com.google.common.cache.{Cache, CacheBuilder}
@@ -39,7 +40,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, 
Cast, Expression, Expre
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, 
ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, 
SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, 
StringUtils}
+import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import 
org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
@@ -1899,6 +1901,17 @@ class SessionCatalog(
       .filter(isTemporaryFunction)
   }
 
+  /**
+   * List all built-in collations with the given pattern.
+   */
+  def listCollations(pattern: Option[String]): Seq[CollationMeta] = {
+    val collationIdentifiers = CollationFactory.listCollations().asScala.toSeq
+    val filteredCollationNames = StringUtils.filterPattern(
+      collationIdentifiers.map(_.getName), pattern.getOrElse("*")).toSet
+    collationIdentifiers.filter(ident => 
filteredCollationNames.contains(ident.getName)).map(
+      CollationFactory.loadCollationMeta)
+  }
+
   // -----------------
   // | Other methods |
   // -----------------
diff --git 
a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt 
b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
index 46da60b7897b..452cf930525b 100644
--- a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
+++ b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
@@ -48,6 +48,7 @@ CLOSE
 COALESCE
 COLLATE
 COLLATION
+COLLATIONS
 COLLECT
 COLUMN
 COMMIT
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a8261e5d98ba..640abaea58ab 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1096,4 +1096,16 @@ class SparkSqlAstBuilder extends AstBuilder {
       withIdentClause(ctx.identifierReference(), UnresolvedNamespace(_)),
       cleanedProperties)
   }
+
+  /**
+   * Create a [[ShowCollationsCommand]] command.
+   * Expected format:
+   * {{{
+   *   SHOW COLLATIONS (LIKE? pattern=stringLit)?;
+   * }}}
+   */
+  override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan = 
withOrigin(ctx) {
+    val pattern = Option(ctx.pattern).map(x => string(visitStringLit(x)))
+    ShowCollationsCommand(pattern)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
new file mode 100644
index 000000000000..179a841b013b
--- /dev/null
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference}
+import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta
+import org.apache.spark.sql.types.StringType
+
+/**
+ * A command for `SHOW COLLATIONS`.
+ *
+ * The syntax of this command is:
+ * {{{
+ *    SHOW COLLATIONS (LIKE? pattern=stringLit)?;
+ * }}}
+ */
+case class ShowCollationsCommand(pattern: Option[String]) extends 
LeafRunnableCommand {
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("COLLATION_CATALOG", StringType, nullable = false)(),
+    AttributeReference("COLLATION_SCHEMA", StringType, nullable = false)(),
+    AttributeReference("COLLATION_NAME", StringType, nullable = false)(),
+    AttributeReference("LANGUAGE", StringType)(),
+    AttributeReference("COUNTRY", StringType)(),
+    AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(),
+    AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(),
+    AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(),
+    AttributeReference("ICU_VERSION", StringType)())
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val systemCollations: Seq[CollationMeta] =
+      sparkSession.sessionState.catalog.listCollations(pattern)
+
+    systemCollations.map(m => Row(
+      m.catalog,
+      m.schema,
+      m.collationName,
+      m.language,
+      m.country,
+      if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE",
+      if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE",
+      m.padAttribute,
+      m.icuVersion
+    ))
+  }
+}
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
index e6a36ac2445c..81ccc0f9efc1 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
@@ -48,6 +48,7 @@ CLUSTERED     false
 CODEGEN        false
 COLLATE        true
 COLLATION      true
+COLLATIONS     true
 COLLECTION     false
 COLUMN true
 COLUMNS        false
@@ -381,6 +382,7 @@ CAST
 CHECK
 COLLATE
 COLLATION
+COLLATIONS
 COLUMN
 CONSTRAINT
 CREATE
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out 
b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 19816c8252c9..e145c57332eb 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -48,6 +48,7 @@ CLUSTERED     false
 CODEGEN        false
 COLLATE        false
 COLLATION      false
+COLLATIONS     false
 COLLECTION     false
 COLUMN false
 COLUMNS        false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index a61be9eca8c3..b25cddb80762 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -1624,4 +1624,46 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
       }
     }
   }
+
+  test("show collations") {
+    assert(sql("SHOW COLLATIONS").collect().length >= 562)
+
+    // verify that the output ordering is as expected (UTF8_BINARY, 
UTF8_LCASE, etc.)
+    val df = sql("SHOW COLLATIONS").limit(10)
+    checkAnswer(df,
+      Seq(Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null),
+      Row("SYSTEM", "BUILTIN", "UTF8_LCASE", null, null,
+        "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", null),
+      Row("SYSTEM", "BUILTIN", "UNICODE", "", "",
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+      Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "",
+        "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+      Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
+        "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+     Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "",
+       "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+     Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "",
+       "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+     Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "",
+       "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+     Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
+       "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+     Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "",
+        "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
+
+    checkAnswer(sql("SHOW COLLATIONS LIKE '*UTF8_BINARY*'"),
+      Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null))
+
+    checkAnswer(sql("SHOW COLLATIONS '*zh_Hant_HKG*'"),
+      Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR 
China",
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR 
China",
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR 
China",
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong 
SAR China",
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
+  }
 }
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 6f0b6bccac30..edef6371be8a 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends 
SharedThriftServer {
       val sessionHandle = client.openSession(user, "")
       val infoValue = client.getInfo(sessionHandle, 
GetInfoType.CLI_ODBC_KEYWORDS)
       // scalastyle:off line.size.limit
-      assert(infoValue.getStringValue == 
"ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DAT
 [...]
+      assert(infoValue.getStringValue == 
"ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,
 [...]
       // scalastyle:on line.size.limit
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` syntax to show all collations

Reply via email to