This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0f4d289b7932 [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...`
syntax to show all collations
0f4d289b7932 is described below
commit 0f4d289b7932c91186d2da66095ebb41b6cd58c0
Author: panbingkun <[email protected]>
AuthorDate: Thu Sep 12 02:11:28 2024 +0200
[SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` syntax to show all
collations
### What changes were proposed in this pull request?
The pr aims to introduce `SHOW COLLATIONS LIKE ...` syntax to `show all
collations`.
### Why are the changes needed?
End-users will be able to obtain `collations` currently supported by the
spark through SQL.
Other databases, such as `MySQL`, also have similar syntax,
ref: https://dev.mysql.com/doc/refman/9.0/en/show-collation.html
<img width="958" alt="image"
src="https://github.com/user-attachments/assets/1d5106b3-f8b8-42c5-b3ad-0f35c61ad5e2">
postgresql:
https://database.guide/how-to-return-a-list-of-available-collations-in-postgresql/
### Does this PR introduce _any_ user-facing change?
Yes, end-users will be able to obtain `collation` currently supported by
the spark through commands similar to the following
|name|provider|version|binaryEquality|binaryOrdering|lowercaseEquality|
| --------- | ----------- | ----------- | ----------- | ----------- |
----------- |
```
spark-sql (default)> SHOW COLLATIONS;
UTF8_BINARY spark 1.0 true true false
UTF8_LCASE spark 1.0 false false true
ff_Adlm icu 153.120.0.0 false false false
ff_Adlm_CI icu 153.120.0.0 false false false
ff_Adlm_AI icu 153.120.0.0 false false false
ff_Adlm_CI_AI icu 153.120.0.0 false false false
...
spark-sql (default)> SHOW COLLATIONS LIKE '*UTF8_BINARY*';
UTF8_BINARY spark 1.0 true true false
Time taken: 0.043 seconds, Fetched 1 row(s)
```
<img width="513" alt="image"
src="https://github.com/user-attachments/assets/d5765e32-718d-4236-857d-d508f5473329">
### How was this patch tested?
Add new UT.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47364 from panbingkun/show_collation_syntax.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/util/CollationFactory.java | 143 ++++++++++++++++++++-
docs/sql-ref-ansi-compliance.md | 1 +
.../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +
.../sql/catalyst/catalog/SessionCatalog.scala | 15 ++-
.../resources/ansi-sql-2016-reserved-keywords.txt | 1 +
.../spark/sql/execution/SparkSqlParser.scala | 12 ++
.../execution/command/ShowCollationsCommand.scala | 62 +++++++++
.../sql-tests/results/ansi/keywords.sql.out | 2 +
.../resources/sql-tests/results/keywords.sql.out | 1 +
.../org/apache/spark/sql/CollationSuite.scala | 42 ++++++
.../ThriftServerWithSparkContextSuite.scala | 2 +-
12 files changed, 278 insertions(+), 6 deletions(-)
diff --git
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 5640a2468d02..4b88e15e8ed7 100644
---
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -23,12 +23,14 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.BiFunction;
import java.util.function.ToLongFunction;
+import java.util.stream.Stream;
+import com.ibm.icu.text.CollationKey;
+import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.StringSearch;
import com.ibm.icu.util.ULocale;
-import com.ibm.icu.text.CollationKey;
-import com.ibm.icu.text.Collator;
+import com.ibm.icu.util.VersionInfo;
import org.apache.spark.SparkException;
import org.apache.spark.unsafe.types.UTF8String;
@@ -88,6 +90,17 @@ public final class CollationFactory {
}
}
+ public record CollationMeta(
+ String catalog,
+ String schema,
+ String collationName,
+ String language,
+ String country,
+ String icuVersion,
+ String padAttribute,
+ boolean accentSensitivity,
+ boolean caseSensitivity) { }
+
/**
* Entry encapsulating all information about a collation.
*/
@@ -342,6 +355,23 @@ public final class CollationFactory {
}
protected abstract Collation buildCollation();
+
+ protected abstract CollationMeta buildCollationMeta();
+
+ static List<CollationIdentifier> listCollations() {
+ return Stream.concat(
+ CollationSpecUTF8.listCollations().stream(),
+ CollationSpecICU.listCollations().stream()).toList();
+ }
+
+ static CollationMeta loadCollationMeta(CollationIdentifier
collationIdentifier) {
+ CollationMeta collationSpecUTF8 =
+ CollationSpecUTF8.loadCollationMeta(collationIdentifier);
+ if (collationSpecUTF8 == null) {
+ return CollationSpecICU.loadCollationMeta(collationIdentifier);
+ }
+ return collationSpecUTF8;
+ }
}
private static class CollationSpecUTF8 extends CollationSpec {
@@ -364,6 +394,9 @@ public final class CollationFactory {
*/
private static final int CASE_SENSITIVITY_MASK = 0b1;
+ private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY";
+ private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE";
+
private static final int UTF8_BINARY_COLLATION_ID =
new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).collationId;
private static final int UTF8_LCASE_COLLATION_ID =
@@ -406,7 +439,7 @@ public final class CollationFactory {
protected Collation buildCollation() {
if (collationId == UTF8_BINARY_COLLATION_ID) {
return new Collation(
- "UTF8_BINARY",
+ UTF8_BINARY_COLLATION_NAME,
PROVIDER_SPARK,
null,
UTF8String::binaryCompare,
@@ -417,7 +450,7 @@ public final class CollationFactory {
/* supportsLowercaseEquality = */ false);
} else {
return new Collation(
- "UTF8_LCASE",
+ UTF8_LCASE_COLLATION_NAME,
PROVIDER_SPARK,
null,
CollationAwareUTF8String::compareLowerCase,
@@ -428,6 +461,52 @@ public final class CollationFactory {
/* supportsLowercaseEquality = */ true);
}
}
+
+ @Override
+ protected CollationMeta buildCollationMeta() {
+ if (collationId == UTF8_BINARY_COLLATION_ID) {
+ return new CollationMeta(
+ CATALOG,
+ SCHEMA,
+ UTF8_BINARY_COLLATION_NAME,
+ /* language = */ null,
+ /* country = */ null,
+ /* icuVersion = */ null,
+ COLLATION_PAD_ATTRIBUTE,
+ /* accentSensitivity = */ true,
+ /* caseSensitivity = */ true);
+ } else {
+ return new CollationMeta(
+ CATALOG,
+ SCHEMA,
+ UTF8_LCASE_COLLATION_NAME,
+ /* language = */ null,
+ /* country = */ null,
+ /* icuVersion = */ null,
+ COLLATION_PAD_ATTRIBUTE,
+ /* accentSensitivity = */ true,
+ /* caseSensitivity = */ false);
+ }
+ }
+
+ static List<CollationIdentifier> listCollations() {
+ CollationIdentifier UTF8_BINARY_COLLATION_IDENT =
+ new CollationIdentifier(PROVIDER_SPARK, UTF8_BINARY_COLLATION_NAME,
"1.0");
+ CollationIdentifier UTF8_LCASE_COLLATION_IDENT =
+ new CollationIdentifier(PROVIDER_SPARK, UTF8_LCASE_COLLATION_NAME,
"1.0");
+ return Arrays.asList(UTF8_BINARY_COLLATION_IDENT,
UTF8_LCASE_COLLATION_IDENT);
+ }
+
+ static CollationMeta loadCollationMeta(CollationIdentifier
collationIdentifier) {
+ try {
+ int collationId = CollationSpecUTF8.collationNameToId(
+ collationIdentifier.name, collationIdentifier.name.toUpperCase());
+ return
CollationSpecUTF8.fromCollationId(collationId).buildCollationMeta();
+ } catch (SparkException ignored) {
+ // ignore
+ return null;
+ }
+ }
}
private static class CollationSpecICU extends CollationSpec {
@@ -684,6 +763,20 @@ public final class CollationFactory {
/* supportsLowercaseEquality = */ false);
}
+ @Override
+ protected CollationMeta buildCollationMeta() {
+ return new CollationMeta(
+ CATALOG,
+ SCHEMA,
+ collationName(),
+ ICULocaleMap.get(locale).getDisplayLanguage(),
+ ICULocaleMap.get(locale).getDisplayCountry(),
+ VersionInfo.ICU_VERSION.toString(),
+ COLLATION_PAD_ATTRIBUTE,
+ caseSensitivity == CaseSensitivity.CS,
+ accentSensitivity == AccentSensitivity.AS);
+ }
+
/**
* Compute normalized collation name. Components of collation name are
given in order:
* - Locale name
@@ -704,6 +797,37 @@ public final class CollationFactory {
}
return builder.toString();
}
+
+ private static List<String> allCollationNames() {
+ List<String> collationNames = new ArrayList<>();
+ for (String locale: ICULocaleToId.keySet()) {
+ // CaseSensitivity.CS + AccentSensitivity.AS
+ collationNames.add(locale);
+ // CaseSensitivity.CS + AccentSensitivity.AI
+ collationNames.add(locale + "_AI");
+ // CaseSensitivity.CI + AccentSensitivity.AS
+ collationNames.add(locale + "_CI");
+ // CaseSensitivity.CI + AccentSensitivity.AI
+ collationNames.add(locale + "_CI_AI");
+ }
+ return collationNames.stream().sorted().toList();
+ }
+
+ static List<CollationIdentifier> listCollations() {
+ return allCollationNames().stream().map(name ->
+ new CollationIdentifier(PROVIDER_ICU, name,
VersionInfo.ICU_VERSION.toString())).toList();
+ }
+
+ static CollationMeta loadCollationMeta(CollationIdentifier
collationIdentifier) {
+ try {
+ int collationId = CollationSpecICU.collationNameToId(
+ collationIdentifier.name, collationIdentifier.name.toUpperCase());
+ return
CollationSpecICU.fromCollationId(collationId).buildCollationMeta();
+ } catch (SparkException ignored) {
+ // ignore
+ return null;
+ }
+ }
}
/**
@@ -730,9 +854,12 @@ public final class CollationFactory {
}
}
+ public static final String CATALOG = "SYSTEM";
+ public static final String SCHEMA = "BUILTIN";
public static final String PROVIDER_SPARK = "spark";
public static final String PROVIDER_ICU = "icu";
public static final List<String> SUPPORTED_PROVIDERS =
List.of(PROVIDER_SPARK, PROVIDER_ICU);
+ public static final String COLLATION_PAD_ATTRIBUTE = "NO_PAD";
public static final int UTF8_BINARY_COLLATION_ID =
Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION_ID;
@@ -923,4 +1050,12 @@ public final class CollationFactory {
return String.join(", ", suggestions);
}
+
+ public static List<CollationIdentifier> listCollations() {
+ return Collation.CollationSpec.listCollations();
+ }
+
+ public static CollationMeta loadCollationMeta(CollationIdentifier
collationIdentifier) {
+ return Collation.CollationSpec.loadCollationMeta(collationIdentifier);
+ }
}
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 3fa67036fd04..fe5ddf27bf6c 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -442,6 +442,7 @@ Below is a list of all the keywords in Spark SQL.
|CODEGEN|non-reserved|non-reserved|non-reserved|
|COLLATE|reserved|non-reserved|reserved|
|COLLATION|reserved|non-reserved|reserved|
+|COLLATIONS|reserved|non-reserved|reserved|
|COLLECTION|non-reserved|non-reserved|non-reserved|
|COLUMN|reserved|non-reserved|reserved|
|COLUMNS|non-reserved|non-reserved|non-reserved|
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 28ebaeaaed6d..9ea213f3bf4a 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -162,6 +162,7 @@ CLUSTERED: 'CLUSTERED';
CODEGEN: 'CODEGEN';
COLLATE: 'COLLATE';
COLLATION: 'COLLATION';
+COLLATIONS: 'COLLATIONS';
COLLECTION: 'COLLECTION';
COLUMN: 'COLUMN';
COLUMNS: 'COLUMNS';
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index e9fc6c3ca4f2..42f0094de351 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -260,6 +260,7 @@ statement
| SHOW PARTITIONS identifierReference partitionSpec?
#showPartitions
| SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)?
(LIKE? (legacy=multipartIdentifier | pattern=stringLit))?
#showFunctions
+ | SHOW COLLATIONS (LIKE? pattern=stringLit)?
#showCollations
| SHOW CREATE TABLE identifierReference (AS SERDE)?
#showCreateTable
| SHOW CURRENT namespace
#showCurrentNamespace
| SHOW CATALOGS (LIKE? pattern=stringLit)?
#showCatalogs
@@ -1837,6 +1838,7 @@ nonReserved
| CODEGEN
| COLLATE
| COLLATION
+ | COLLATIONS
| COLLECTION
| COLUMN
| COLUMNS
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index d3a6cb6ae284..5c14e261fafc 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeUnit
import javax.annotation.concurrent.GuardedBy
import scala.collection.mutable
+import scala.jdk.CollectionConverters.CollectionHasAsScala
import scala.util.{Failure, Success, Try}
import com.google.common.cache.{Cache, CacheBuilder}
@@ -39,7 +40,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias,
Cast, Expression, Expre
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser,
ParserInterface}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project,
SubqueryAlias, View}
import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory,
StringUtils}
+import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta
import org.apache.spark.sql.connector.catalog.CatalogManager
import
org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
@@ -1899,6 +1901,17 @@ class SessionCatalog(
.filter(isTemporaryFunction)
}
+ /**
+ * List all built-in collations with the given pattern.
+ */
+ def listCollations(pattern: Option[String]): Seq[CollationMeta] = {
+ val collationIdentifiers = CollationFactory.listCollations().asScala.toSeq
+ val filteredCollationNames = StringUtils.filterPattern(
+ collationIdentifiers.map(_.getName), pattern.getOrElse("*")).toSet
+ collationIdentifiers.filter(ident =>
filteredCollationNames.contains(ident.getName)).map(
+ CollationFactory.loadCollationMeta)
+ }
+
// -----------------
// | Other methods |
// -----------------
diff --git
a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
index 46da60b7897b..452cf930525b 100644
--- a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
+++ b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
@@ -48,6 +48,7 @@ CLOSE
COALESCE
COLLATE
COLLATION
+COLLATIONS
COLLECT
COLUMN
COMMIT
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a8261e5d98ba..640abaea58ab 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1096,4 +1096,16 @@ class SparkSqlAstBuilder extends AstBuilder {
withIdentClause(ctx.identifierReference(), UnresolvedNamespace(_)),
cleanedProperties)
}
+
+ /**
+ * Create a [[ShowCollationsCommand]] command.
+ * Expected format:
+ * {{{
+ * SHOW COLLATIONS (LIKE? pattern=stringLit)?;
+ * }}}
+ */
+ override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan =
withOrigin(ctx) {
+ val pattern = Option(ctx.pattern).map(x => string(visitStringLit(x)))
+ ShowCollationsCommand(pattern)
+ }
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
new file mode 100644
index 000000000000..179a841b013b
--- /dev/null
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute,
AttributeReference}
+import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta
+import org.apache.spark.sql.types.StringType
+
+/**
+ * A command for `SHOW COLLATIONS`.
+ *
+ * The syntax of this command is:
+ * {{{
+ * SHOW COLLATIONS (LIKE? pattern=stringLit)?;
+ * }}}
+ */
+case class ShowCollationsCommand(pattern: Option[String]) extends
LeafRunnableCommand {
+
+ override val output: Seq[Attribute] = Seq(
+ AttributeReference("COLLATION_CATALOG", StringType, nullable = false)(),
+ AttributeReference("COLLATION_SCHEMA", StringType, nullable = false)(),
+ AttributeReference("COLLATION_NAME", StringType, nullable = false)(),
+ AttributeReference("LANGUAGE", StringType)(),
+ AttributeReference("COUNTRY", StringType)(),
+ AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(),
+ AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(),
+ AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(),
+ AttributeReference("ICU_VERSION", StringType)())
+
+ override def run(sparkSession: SparkSession): Seq[Row] = {
+ val systemCollations: Seq[CollationMeta] =
+ sparkSession.sessionState.catalog.listCollations(pattern)
+
+ systemCollations.map(m => Row(
+ m.catalog,
+ m.schema,
+ m.collationName,
+ m.language,
+ m.country,
+ if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE",
+ if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE",
+ m.padAttribute,
+ m.icuVersion
+ ))
+ }
+}
diff --git
a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
index e6a36ac2445c..81ccc0f9efc1 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
@@ -48,6 +48,7 @@ CLUSTERED false
CODEGEN false
COLLATE true
COLLATION true
+COLLATIONS true
COLLECTION false
COLUMN true
COLUMNS false
@@ -381,6 +382,7 @@ CAST
CHECK
COLLATE
COLLATION
+COLLATIONS
COLUMN
CONSTRAINT
CREATE
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 19816c8252c9..e145c57332eb 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -48,6 +48,7 @@ CLUSTERED false
CODEGEN false
COLLATE false
COLLATION false
+COLLATIONS false
COLLECTION false
COLUMN false
COLUMNS false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index a61be9eca8c3..b25cddb80762 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -1624,4 +1624,46 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
}
}
}
+
+ test("show collations") {
+ assert(sql("SHOW COLLATIONS").collect().length >= 562)
+
+ // verify that the output ordering is as expected (UTF8_BINARY,
UTF8_LCASE, etc.)
+ val df = sql("SHOW COLLATIONS").limit(10)
+ checkAnswer(df,
+ Seq(Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+ "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null),
+ Row("SYSTEM", "BUILTIN", "UTF8_LCASE", null, null,
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", null),
+ Row("SYSTEM", "BUILTIN", "UNICODE", "", "",
+ "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
+ "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "",
+ "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "",
+ "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
+ "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "",
+ "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
+
+ checkAnswer(sql("SHOW COLLATIONS LIKE '*UTF8_BINARY*'"),
+ Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+ "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null))
+
+ checkAnswer(sql("SHOW COLLATIONS '*zh_Hant_HKG*'"),
+ Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR
China",
+ "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR
China",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR
China",
+ "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong
SAR China",
+ "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
+ }
}
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 6f0b6bccac30..edef6371be8a 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends
SharedThriftServer {
val sessionHandle = client.openSession(user, "")
val infoValue = client.getInfo(sessionHandle,
GetInfoType.CLI_ODBC_KEYWORDS)
// scalastyle:off line.size.limit
- assert(infoValue.getStringValue ==
"ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DAT
[...]
+ assert(infoValue.getStringValue ==
"ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,
[...]
// scalastyle:on line.size.limit
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]