MaxGekk commented on code in PR #48546:
URL: https://github.com/apache/spark/pull/48546#discussion_r1822850662
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2046,4 +2046,44 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
}
+
+ test("fully qualified name") {
+ Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE",
"UNICODE_CI_AI").foreach { collation =>
+ // Make sure that the collation expression returns the correct fully
qualified name.
+ val df = sql(s"SELECT collation('a' collate $collation)")
+ checkAnswer(df,
+
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+
+ // Make sure the user can specify the fully qualified name as a
collation name.
+ Seq[String]("contains", "startswith", "endswith").foreach{
binaryFunction =>
Review Comment:
```suggestion
Seq("contains", "startswith", "endswith").foreach { binaryFunction =>
```
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2046,4 +2046,44 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
}
+
+ test("fully qualified name") {
+ Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE",
"UNICODE_CI_AI").foreach { collation =>
+ // Make sure that the collation expression returns the correct fully
qualified name.
+ val df = sql(s"SELECT collation('a' collate $collation)")
+ checkAnswer(df,
+
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+
+ // Make sure the user can specify the fully qualified name as a
collation name.
+ Seq[String]("contains", "startswith", "endswith").foreach{
binaryFunction =>
+ val dfRegularName = sql(
+ s"SELECT $binaryFunction('a' collate $collation, 'A' collate
$collation)")
+ val dfFullyQualifiedName = sql(
+ s"SELECT $binaryFunction('a' collate system.builtin.$collation, 'A'
collate $collation)")
+ checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
+ }
+ }
+
+ // Wrong collation names raise a Spark exception.
+ Seq[(String, String)](
Review Comment:
nit: the types should be infered authomatically:
```suggestion
Seq(
```
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2046,4 +2046,44 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
}
+
+ test("fully qualified name") {
+ Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE",
"UNICODE_CI_AI").foreach { collation =>
+ // Make sure that the collation expression returns the correct fully
qualified name.
+ val df = sql(s"SELECT collation('a' collate $collation)")
+ checkAnswer(df,
+
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+
+ // Make sure the user can specify the fully qualified name as a
collation name.
+ Seq[String]("contains", "startswith", "endswith").foreach{
binaryFunction =>
+ val dfRegularName = sql(
+ s"SELECT $binaryFunction('a' collate $collation, 'A' collate
$collation)")
+ val dfFullyQualifiedName = sql(
+ s"SELECT $binaryFunction('a' collate system.builtin.$collation, 'A'
collate $collation)")
+ checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
Review Comment:
You can just compare 2 DataFrame:
```suggestion
checkAnswer(dfRegularName, dfFullyQualifiedName)
```
##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java:
##########
@@ -1180,9 +1180,33 @@ public static StringSearch getStringSearch(
* Returns the collation ID for the given collation name.
*/
public static int collationNameToId(String collationName) throws
SparkException {
+ // If collation name is given as a fully qualified name, extract the
actual collation name as
+ // the last part of the [catalog].[schema].[collation_name] name.
+ long numDots = collationName.chars().filter(ch -> ch == '.').count();
+ if (numDots > 0) {
+ String[] nameParts = collationName.split("\\.");
+ // Currently only predefined collations are supported.
+ if (numDots != 2 ||
!CollationFactory.CATALOG.equalsIgnoreCase(nameParts[0]) ||
+ !CollationFactory.SCHEMA.equalsIgnoreCase(nameParts[1])) {
+ throw
CollationFactory.Collation.CollationSpec.collationInvalidNameException(nameParts[nameParts.length
- 1]);
+ }
+ return Collation.CollationSpec.collationNameToId(nameParts[2]);
+ }
return Collation.CollationSpec.collationNameToId(collationName);
}
+ /**
+ * Returns the fully qualified collation name for the given collation ID.
+ */
+ public static String fullyQualifiedName(int collationId) {
+ Collation.CollationSpec.DefinitionOrigin definitionOrigin =
+ Collation.CollationSpec.getDefinitionOrigin(collationId);
+ // Currently only predefined collations are supported.
+ assert definitionOrigin ==
Collation.CollationSpec.DefinitionOrigin.PREDEFINED;
+ return String.format("%s.%s.%s", CATALOG, SCHEMA,
+ Collation.CollationSpec.fetchCollation(collationId).collationName);
Review Comment:
```suggestion
Collation.CollationSpec.fetchCollation(collationId).collationName);
```
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -2046,4 +2046,44 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
}
+
+ test("fully qualified name") {
+ Seq[String]("UTF8_BINARY", "UTF8_LCASE", "UNICODE",
"UNICODE_CI_AI").foreach { collation =>
+ // Make sure that the collation expression returns the correct fully
qualified name.
+ val df = sql(s"SELECT collation('a' collate $collation)")
+ checkAnswer(df,
+
Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+
+ // Make sure the user can specify the fully qualified name as a
collation name.
+ Seq[String]("contains", "startswith", "endswith").foreach{
binaryFunction =>
+ val dfRegularName = sql(
+ s"SELECT $binaryFunction('a' collate $collation, 'A' collate
$collation)")
+ val dfFullyQualifiedName = sql(
+ s"SELECT $binaryFunction('a' collate system.builtin.$collation, 'A'
collate $collation)")
+ checkAnswer(dfRegularName, dfFullyQualifiedName.collect())
+ }
+ }
+
+ // Wrong collation names raise a Spark exception.
+ Seq[(String, String)](
+ ("system.builtin2.UTF8_BINARY", "UTF8_BINARY"),
+ ("system.UTF8_BINARY", "UTF8_BINARY"),
+ ("builtin.UTF8_LCASE", "UTF8_LCASE")
+ ).foreach { collation =>
Review Comment:
Just use names instead of `collation._2`:
```suggestion
).foreach { case (collationName, proposals) =>
```
##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java:
##########
@@ -1154,9 +1154,33 @@ public static StringSearch getStringSearch(
* Returns the collation ID for the given collation name.
*/
public static int collationNameToId(String collationName) throws
SparkException {
+ // If collation name is given as a fully qualified name, extract the
actual collation name as
+ // the last part of the [catalog].[schema].[collation_name] name.
+ long numDots = collationName.chars().filter(ch -> ch == '.').count();
Review Comment:
Please, put the parsing code to a common place, and reuse it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]