This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new fa83490  [CALCITE-3951] Support different string comparison based on 
SqlCollation
fa83490 is described below

commit fa8349069d141d3c75bafa06d5fb8800711ec8d6
Author: rubenada <[email protected]>
AuthorDate: Wed Apr 22 12:19:06 2020 +0200

    [CALCITE-3951] Support different string comparison based on SqlCollation
---
 .../calcite/adapter/enumerable/EnumUtils.java      |  27 +++
 .../calcite/adapter/enumerable/PhysTypeImpl.java   |  53 +++--
 .../calcite/adapter/enumerable/RexImpTable.java    |   6 +
 .../calcite/config/CalciteSystemProperty.java      |   4 +-
 .../org/apache/calcite/jdbc/JavaCollation.java     |  65 ++++++
 .../java/org/apache/calcite/rex/RexBuilder.java    |   9 +-
 .../org/apache/calcite/runtime/SqlFunctions.java   |  31 +++
 .../java/org/apache/calcite/runtime/Utilities.java |  30 +++
 .../java/org/apache/calcite/sql/SqlCollation.java  |  65 ++++--
 .../main/java/org/apache/calcite/sql/SqlUtil.java  |   6 +-
 .../apache/calcite/sql/parser/SqlParserUtil.java   |   2 +-
 .../calcite/sql/type/SqlTypeFactoryImpl.java       |   7 +-
 .../org/apache/calcite/util/BuiltInMethod.java     |   7 +
 .../java/org/apache/calcite/util/NlsString.java    |   5 +-
 .../enumerable/EnumerableStringComparisonTest.java | 249 +++++++++++++++++++++
 .../apache/calcite/linq4j/function/Functions.java  |  26 +++
 16 files changed, 543 insertions(+), 49 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
index 77779a4..edf544b 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
@@ -47,6 +47,8 @@ import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexProgramBuilder;
 import org.apache.calcite.runtime.SortedMultiMap;
 import org.apache.calcite.runtime.SqlFunctions;
+import org.apache.calcite.runtime.Utilities;
+import org.apache.calcite.sql.SqlCollation;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.Util;
@@ -63,6 +65,7 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 /**
@@ -1064,4 +1067,28 @@ public class EnumUtils {
       }
     };
   }
+
+  public static Expression generateCollatorExpression(SqlCollation collation) {
+    if (collation == null || collation.getCollator() == null) {
+      return null;
+    }
+
+    // Utilities.generateCollator(
+    //      new Locale(
+    //          collation.getLocale().getLanguage(),
+    //          collation.getLocale().getCountry(),
+    //          collation.getLocale().getVariant()),
+    //      collation.getCollator().getStrength());
+    final Locale locale = collation.getLocale();
+    final int strength = collation.getCollator().getStrength();
+    return Expressions.call(
+        Utilities.class,
+        "generateCollator",
+        Expressions.new_(
+            Locale.class,
+            Expressions.constant(locale.getLanguage()),
+            Expressions.constant(locale.getCountry()),
+            Expressions.constant(locale.getVariant())),
+        Expressions.constant(strength));
+  }
 }
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
index c29fbba..d6f23b3 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
@@ -40,7 +40,6 @@ import org.apache.calcite.util.Util;
 
 import com.google.common.collect.ImmutableList;
 
-import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Type;
 import java.util.AbstractList;
@@ -48,6 +47,7 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 
+import static 
org.apache.calcite.adapter.enumerable.EnumUtils.generateCollatorExpression;
 import static 
org.apache.calcite.adapter.enumerable.EnumUtils.overridingMethodDecl;
 
 /** Implementation of {@link PhysType}. */
@@ -261,6 +261,10 @@ public class PhysTypeImpl implements PhysType {
     final Expression selector;
     if (collations.size() == 1) {
       RelFieldCollation collation = collations.get(0);
+      RelDataType fieldType = rowType.getFieldList() == null || 
rowType.getFieldList().isEmpty()
+          ? rowType
+          : rowType.getFieldList().get(collation.getFieldIndex()).getType();
+      Expression fieldComparator = 
generateCollatorExpression(fieldType.getCollation());
       ParameterExpression parameter =
           Expressions.parameter(javaRowClass, "v");
       selector =
@@ -270,13 +274,16 @@ public class PhysTypeImpl implements PhysType {
               parameter);
       return Pair.of(selector,
           Expressions.call(
-              BuiltInMethod.NULLS_COMPARATOR.method,
-              Expressions.constant(
-                  collation.nullDirection
-                      == RelFieldCollation.NullDirection.FIRST),
-              Expressions.constant(
-                  collation.getDirection()
-                      == RelFieldCollation.Direction.DESCENDING)));
+              fieldComparator == null ? BuiltInMethod.NULLS_COMPARATOR.method
+                  : BuiltInMethod.NULLS_COMPARATOR2.method,
+              Expressions.list(
+                  (Expression) Expressions.constant(
+                      collation.nullDirection
+                          == RelFieldCollation.NullDirection.FIRST),
+                  Expressions.constant(
+                      collation.direction
+                          == RelFieldCollation.Direction.DESCENDING))
+                  .appendIfNotNull(fieldComparator)));
     }
     selector =
         Expressions.call(BuiltInMethod.IDENTITY_SELECTOR.method);
@@ -297,6 +304,8 @@ public class PhysTypeImpl implements PhysType {
     body.add(Expressions.declare(mod, parameterC, null));
     for (RelFieldCollation collation : collations) {
       final int index = collation.getFieldIndex();
+      final RelDataType fieldType = 
rowType.getFieldList().get(index).getType();
+      final Expression fieldComparator = 
generateCollatorExpression(fieldType.getCollation());
       Expression arg0 = fieldReference(parameterV0, index);
       Expression arg1 = fieldReference(parameterV1, index);
       switch (Primitive.flavor(fieldClass(index))) {
@@ -310,19 +319,21 @@ public class PhysTypeImpl implements PhysType {
       final boolean descending =
           collation.getDirection()
               == RelFieldCollation.Direction.DESCENDING;
-      final Method method = (fieldNullable(index)
-          ? (nullsFirst ^ descending
-              ? BuiltInMethod.COMPARE_NULLS_FIRST
-              : BuiltInMethod.COMPARE_NULLS_LAST)
-          : BuiltInMethod.COMPARE).method;
       body.add(
           Expressions.statement(
               Expressions.assign(
                   parameterC,
-                  Expressions.call(method.getDeclaringClass(),
-                      method.getName(),
-                      arg0,
-                      arg1))));
+                  Expressions.call(
+                      Utilities.class,
+                      fieldNullable(index)
+                          ? (nullsFirst != descending
+                          ? "compareNullsFirst"
+                          : "compareNullsLast")
+                          : "compare",
+                      Expressions.list(
+                          arg0,
+                          arg1)
+                          .appendIfNotNull(fieldComparator)))));
       body.add(
           Expressions.ifThen(
               Expressions.notEqual(
@@ -396,6 +407,8 @@ public class PhysTypeImpl implements PhysType {
     body.add(Expressions.declare(mod, parameterC, null));
     for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
       final int index = fieldCollation.getFieldIndex();
+      final RelDataType fieldType = 
rowType.getFieldList().get(index).getType();
+      final Expression fieldComparator = 
generateCollatorExpression(fieldType.getCollation());
       Expression arg0 = fieldReference(parameterV0, index);
       Expression arg1 = fieldReference(parameterV1, index);
       switch (Primitive.flavor(fieldClass(index))) {
@@ -420,8 +433,10 @@ public class PhysTypeImpl implements PhysType {
                           ? "compareNullsFirst"
                           : "compareNullsLast")
                           : "compare",
-                      arg0,
-                      arg1))));
+                      Expressions.list(
+                          arg0,
+                          arg1)
+                          .appendIfNotNull(fieldComparator)))));
       body.add(
           Expressions.ifThen(
               Expressions.notEqual(
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 230e9de..839826d 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -83,6 +83,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.function.Supplier;
 
+import static 
org.apache.calcite.adapter.enumerable.EnumUtils.generateCollatorExpression;
 import static org.apache.calcite.linq4j.tree.ExpressionType.Add;
 import static org.apache.calcite.linq4j.tree.ExpressionType.AndAlso;
 import static org.apache.calcite.linq4j.tree.ExpressionType.Divide;
@@ -2504,6 +2505,11 @@ public class RexImpTable {
         final Type type0 = expressions.get(0).getType();
         final Type type1 = expressions.get(1).getType();
         final SqlBinaryOperator op = (SqlBinaryOperator) call.getOperator();
+        final RelDataType relDataType0 = call.getOperands().get(0).getType();
+        final Expression fieldComparator = 
generateCollatorExpression(relDataType0.getCollation());
+        if (fieldComparator != null) {
+          expressions.add(fieldComparator);
+        }
         final Primitive primitive = Primitive.ofBoxOr(type0);
         if (primitive == null
             || type1 == BigDecimal.class
diff --git 
a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java 
b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
index 3e93a5b..d15952b 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
@@ -288,12 +288,14 @@ public final class CalciteSystemProperty<T> {
 
   /**
    * The strength of the default collation.
+   * Allowed values (as defined in {@link java.text.Collator}) are: primary, 
secondary,
+   * tertiary, identical.
    *
    * <p>It is used in {@link org.apache.calcite.sql.SqlCollation} and
    * {@link org.apache.calcite.sql.SqlLiteral#SqlLiteral}.</p>
    */
   // TODO review zabetak:
-  // What are the allowed values? What happens if a wrong value is specified?
+  // What happens if a wrong value is specified?
   public static final CalciteSystemProperty<String> DEFAULT_COLLATION_STRENGTH 
=
       stringProperty("calcite.default.collation.strength", "primary");
 
diff --git a/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java 
b/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java
new file mode 100644
index 0000000..468c40f
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.jdbc;
+
+import org.apache.calcite.sql.SqlCollation;
+
+import java.nio.charset.Charset;
+import java.text.Collator;
+import java.util.Locale;
+
+/**
+ * Collation that uses a specific {@link Collator} for comparison.
+ */
+public class JavaCollation extends SqlCollation {
+  private final Collator collator;
+
+  public JavaCollation(Coercibility coercibility, Locale locale, Charset 
charset, int strength) {
+    super(coercibility, locale, charset, getStrengthString(strength));
+    collator = Collator.getInstance(locale);
+    collator.setStrength(strength);
+  }
+
+  // Strength values
+  private static final String STRENGTH_PRIMARY = "primary";
+  private static final String STRENGTH_SECONDARY = "secondary";
+  private static final String STRENGTH_TERTIARY = "tertiary";
+  private static final String STRENGTH_IDENTICAL = "identical";
+
+  private static String getStrengthString(int strengthValue) {
+    switch (strengthValue) {
+    case Collator.PRIMARY:
+      return STRENGTH_PRIMARY;
+    case Collator.SECONDARY:
+      return STRENGTH_SECONDARY;
+    case Collator.TERTIARY:
+      return STRENGTH_TERTIARY;
+    case Collator.IDENTICAL:
+      return STRENGTH_IDENTICAL;
+    default:
+      throw new IllegalArgumentException("Incorrect strength value.");
+    }
+  }
+
+  @Override protected String generateCollationName(Charset charset) {
+    return super.generateCollationName(charset) + "$JAVA_COLLATOR";
+  }
+
+  @Override public Collator getCollator() {
+    return collator;
+  }
+}
diff --git a/core/src/main/java/org/apache/calcite/rex/RexBuilder.java 
b/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
index d26c778..e58fe32 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
@@ -919,9 +919,12 @@ public class RexBuilder {
       // from the type if necessary.
       assert o instanceof NlsString;
       NlsString nlsString = (NlsString) o;
-      if ((nlsString.getCollation() == null)
-          || (nlsString.getCharset() == null)) {
-        assert type.getSqlTypeName() == SqlTypeName.CHAR;
+      if (nlsString.getCollation() == null
+          || nlsString.getCharset() == null
+          || !nlsString.getCharset().equals(type.getCharset())
+          || !nlsString.getCollation().equals(type.getCollation())) {
+        assert type.getSqlTypeName() == SqlTypeName.CHAR
+            || type.getSqlTypeName() == SqlTypeName.VARCHAR;
         assert type.getCharset().name() != null;
         assert type.getCollation() != null;
         o = new NlsString(
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 8b9b59d..b4fca2c 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -59,6 +59,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Base64;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -637,6 +638,11 @@ public class SqlFunctions {
     return b0.equals(b1);
   }
 
+  /** SQL <code>=</code> operator applied to String values with a certain 
Comparator. */
+  public static boolean eq(String s0, String s1, Comparator<String> 
comparator) {
+    return comparator.compare(s0, s1) == 0;
+  }
+
   /** SQL <code>=</code> operator applied to Object values (at least one 
operand
    * has ANY type; neither may be null). */
   public static boolean eqAny(Object b0, Object b1) {
@@ -676,6 +682,11 @@ public class SqlFunctions {
     return !eq(b0, b1);
   }
 
+  /** SQL <code>&lt;gt;</code> operator applied to OString values with a 
certain Comparator. */
+  public static boolean ne(String s0, String s1, Comparator<String> 
comparator) {
+    return !eq(s0, s1, comparator);
+  }
+
   /** SQL <code>&lt;gt;</code> operator applied to Object values (at least one
    *  operand has ANY type, including String; neither may be null). */
   public static boolean neAny(Object b0, Object b1) {
@@ -694,6 +705,11 @@ public class SqlFunctions {
     return b0.compareTo(b1) < 0;
   }
 
+  /** SQL <code>&lt;</code> operator applied to String values. */
+  public static boolean lt(String b0, String b1, Comparator<String> 
comparator) {
+    return comparator.compare(b0, b1) < 0;
+  }
+
   /** SQL <code>&lt;</code> operator applied to ByteString values. */
   public static boolean lt(ByteString b0, ByteString b1) {
     return b0.compareTo(b1) < 0;
@@ -729,6 +745,11 @@ public class SqlFunctions {
     return b0.compareTo(b1) <= 0;
   }
 
+  /** SQL <code>&le;</code> operator applied to String values. */
+  public static boolean le(String b0, String b1, Comparator<String> 
comparator) {
+    return comparator.compare(b0, b1) <= 0;
+  }
+
   /** SQL <code>&le;</code> operator applied to ByteString values. */
   public static boolean le(ByteString b0, ByteString b1) {
     return b0.compareTo(b1) <= 0;
@@ -765,6 +786,11 @@ public class SqlFunctions {
     return b0.compareTo(b1) > 0;
   }
 
+  /** SQL <code>&gt;</code> operator applied to String values. */
+  public static boolean gt(String b0, String b1, Comparator<String> 
comparator) {
+    return comparator.compare(b0, b1) > 0;
+  }
+
   /** SQL <code>&gt;</code> operator applied to ByteString values. */
   public static boolean gt(ByteString b0, ByteString b1) {
     return b0.compareTo(b1) > 0;
@@ -801,6 +827,11 @@ public class SqlFunctions {
     return b0.compareTo(b1) >= 0;
   }
 
+  /** SQL <code>&ge;</code> operator applied to String values. */
+  public static boolean ge(String b0, String b1, Comparator<String> 
comparator) {
+    return comparator.compare(b0, b1) >= 0;
+  }
+
   /** SQL <code>&ge;</code> operator applied to ByteString values. */
   public static boolean ge(ByteString b0, ByteString b1) {
     return b0.compareTo(b1) >= 0;
diff --git a/core/src/main/java/org/apache/calcite/runtime/Utilities.java 
b/core/src/main/java/org/apache/calcite/runtime/Utilities.java
index a79cbb6..68bfd3b 100644
--- a/core/src/main/java/org/apache/calcite/runtime/Utilities.java
+++ b/core/src/main/java/org/apache/calcite/runtime/Utilities.java
@@ -16,8 +16,11 @@
  */
 package org.apache.calcite.runtime;
 
+import java.text.Collator;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
 
 /**
@@ -212,6 +215,27 @@ public class Utilities {
                 : v0.compareTo(v1);
   }
 
+  public static int compare(Comparable v0, Comparable v1, Comparator 
comparator) {
+    //noinspection unchecked
+    return comparator.compare(v0, v1);
+  }
+
+  public static int compareNullsFirst(Comparable v0, Comparable v1, Comparator 
comparator) {
+    //noinspection unchecked
+    return v0 == v1 ? 0
+        : v0 == null ? -1
+            : v1 == null ? 1
+                : comparator.compare(v0, v1);
+  }
+
+  public static int compareNullsLast(Comparable v0, Comparable v1, Comparator 
comparator) {
+    //noinspection unchecked
+    return v0 == v1 ? 0
+        : v0 == null ? 1
+            : v1 == null ? -1
+                : comparator.compare(v0, v1);
+  }
+
   public static int compareNullsLast(List v0, List v1) {
     //noinspection unchecked
     return v0 == v1 ? 0
@@ -224,4 +248,10 @@ public class Utilities {
   public static Pattern.PatternBuilder patternBuilder() {
     return Pattern.builder();
   }
+
+  public static Collator generateCollator(Locale locale, int strength) {
+    final Collator collator = Collator.getInstance(locale);
+    collator.setStrength(strength);
+    return collator;
+  }
 }
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlCollation.java 
b/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
index 6a700f1..f1e3366 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
@@ -24,6 +24,7 @@ import org.apache.calcite.util.Util;
 
 import java.io.Serializable;
 import java.nio.charset.Charset;
+import java.text.Collator;
 import java.util.Locale;
 
 import static org.apache.calcite.util.Static.RESOURCE;
@@ -73,7 +74,19 @@ public class SqlCollation implements Serializable {
   //~ Constructors -----------------------------------------------------------
 
   /**
-   * Creates a Collation by its name and its coercibility
+   * Creates a SqlCollation with the default collation name and the given
+   * coercibility.
+   *
+   * @param coercibility Coercibility
+   */
+  public SqlCollation(Coercibility coercibility) {
+    this(
+        CalciteSystemProperty.DEFAULT_COLLATION.value(),
+        coercibility);
+  }
+
+  /**
+   * Creates a Collation by its name and its coercibility.
    *
    * @param collation    Collation specification
    * @param coercibility Coercibility
@@ -86,31 +99,30 @@ public class SqlCollation implements Serializable {
         SqlParserUtil.parseCollation(collation);
     Charset charset = parseValues.getCharset();
     this.wrappedCharset = SerializableCharset.forCharset(charset);
-    locale = parseValues.getLocale();
-    strength = parseValues.getStrength();
-    String c =
-        charset.name().toUpperCase(Locale.ROOT) + "$" + locale.toString();
-    if ((strength != null) && (strength.length() > 0)) {
-      c += "$" + strength;
-    }
-    collationName = c;
+    this.locale = parseValues.getLocale();
+    this.strength = parseValues.getStrength().toLowerCase(Locale.ROOT);
+    this.collationName = generateCollationName(charset);
   }
 
   /**
-   * Creates a SqlCollation with the default collation name and the given
-   * coercibility.
-   *
-   * @param coercibility Coercibility
+   * Creates a Collation by its coercibility, locale, charset and strength.
    */
-  public SqlCollation(Coercibility coercibility) {
-    this(
-        CalciteSystemProperty.DEFAULT_COLLATION.value(),
-        coercibility);
+  public SqlCollation(
+      Coercibility coercibility,
+      Locale locale,
+      Charset charset,
+      String strength) {
+    this.coercibility = coercibility;
+    charset = SqlUtil.getCharset(charset.name());
+    this.wrappedCharset = SerializableCharset.forCharset(charset);
+    this.locale = locale;
+    this.strength = strength.toLowerCase(Locale.ROOT);
+    this.collationName = generateCollationName(charset);
   }
 
   //~ Methods ----------------------------------------------------------------
 
-  public boolean equals(Object o) {
+  @Override public boolean equals(Object o) {
     return this == o
         || o instanceof SqlCollation
         && collationName.equals(((SqlCollation) o).collationName);
@@ -120,6 +132,10 @@ public class SqlCollation implements Serializable {
     return collationName.hashCode();
   }
 
+  protected String generateCollationName(Charset charset) {
+    return charset.name().toUpperCase(Locale.ROOT) + "$" + locale.toString() + 
"$" + strength;
+  }
+
   /**
    * Returns the collating sequence (the collation name) and the coercibility
    * for the resulting value of a dyadic operator.
@@ -279,4 +295,17 @@ public class SqlCollation implements Serializable {
   public final SqlCollation.Coercibility getCoercibility() {
     return coercibility;
   }
+
+  public final Locale getLocale() {
+    return locale;
+  }
+
+  /**
+   * @return the {@link Collator} to compare values having the current 
collation,
+   * or {@code null} if no specific {@link Collator} is needed, in which case
+   * {@link String#compareTo} will be used.
+   */
+  public Collator getCollator() {
+    return null;
+  }
 }
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlUtil.java 
b/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
index 67df931..16d463b 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
@@ -915,13 +915,13 @@ public abstract class SqlUtil {
       return "Big5";
     case "LATIN1":
       return "ISO-8859-1";
-    case "GB2312":
-    case "GBK":
-      return name;
     case "UTF8":
       return "UTF-8";
     case "UTF16":
+    case "UTF-16":
       return ConversionUtil.NATIVE_UTF16_CHARSET_NAME;
+    case "GB2312":
+    case "GBK":
     case "UTF-16BE":
     case "UTF-16LE":
     case "ISO-8859-1":
diff --git 
a/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java 
b/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
index 4f309d7..0cf9738 100644
--- a/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
+++ b/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
@@ -563,7 +563,7 @@ public final class SqlParserUtil {
           CalciteSystemProperty.DEFAULT_COLLATION_STRENGTH.value();
     }
 
-    Charset charset = Charset.forName(charsetStr);
+    Charset charset = SqlUtil.getCharset(charsetStr);
     String[] localeParts = localeStr.split("_");
     Locale locale;
     if (1 == localeParts.length) {
diff --git 
a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java 
b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
index 2b9d102..7709985 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
@@ -300,7 +300,6 @@ public class SqlTypeFactoryImpl extends 
RelDataTypeFactoryImpl {
         SqlCollation collation1 = type.getCollation();
         SqlCollation collation2 = resultType.getCollation();
 
-        // TODO:  refine collation combination rules
         final int precision =
             SqlTypeUtil.maxPrecision(resultType.getPrecision(),
                 type.getPrecision());
@@ -338,6 +337,10 @@ public class SqlTypeFactoryImpl extends 
RelDataTypeFactoryImpl {
                   precision);
         }
         Charset charset = null;
+        // TODO:  refine collation combination rules
+        SqlCollation collation0 = collation1 != null && collation2 != null
+            ? SqlCollation.getCoercibilityDyadicOperator(collation1, 
collation2)
+            : null;
         SqlCollation collation = null;
         if ((charset1 != null) || (charset2 != null)) {
           if (charset1 == null) {
@@ -362,7 +365,7 @@ public class SqlTypeFactoryImpl extends 
RelDataTypeFactoryImpl {
               createTypeWithCharsetAndCollation(
                   resultType,
                   charset,
-                  collation);
+                  collation0 != null ? collation0 : collation);
         }
       } else if (SqlTypeUtil.isExactNumeric(type)) {
         if (SqlTypeUtil.isExactNumeric(resultType)) {
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index f0b8f81..f4f18f0 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -236,6 +236,8 @@ public enum BuiltInMethod {
   EMPTY_ENUMERABLE(Linq4j.class, "emptyEnumerable"),
   NULLS_COMPARATOR(Functions.class, "nullsComparator", boolean.class,
       boolean.class),
+  NULLS_COMPARATOR2(Functions.class, "nullsComparator", boolean.class,
+      boolean.class, Comparator.class),
   ARRAY_COMPARER(Functions.class, "arrayComparer"),
   FUNCTION0_APPLY(Function0.class, "apply"),
   FUNCTION1_APPLY(Function1.class, "apply", Object.class),
@@ -501,6 +503,11 @@ public enum BuiltInMethod {
       Comparable.class),
   COMPARE_NULLS_LAST(Utilities.class, "compareNullsLast", Comparable.class,
       Comparable.class),
+  COMPARE2(Utilities.class, "compare", Comparable.class, Comparable.class, 
Comparator.class),
+  COMPARE_NULLS_FIRST2(Utilities.class, "compareNullsFirst", Comparable.class,
+      Comparable.class, Comparator.class),
+  COMPARE_NULLS_LAST2(Utilities.class, "compareNullsLast", Comparable.class,
+      Comparable.class, Comparator.class),
   ROUND_LONG(SqlFunctions.class, "round", long.class, long.class),
   ROUND_INT(SqlFunctions.class, "round", int.class, int.class),
   DATE_TO_INT(SqlFunctions.class, "toInt", java.util.Date.class),
diff --git a/core/src/main/java/org/apache/calcite/util/NlsString.java 
b/core/src/main/java/org/apache/calcite/util/NlsString.java
index bf51677..8c68256 100644
--- a/core/src/main/java/org/apache/calcite/util/NlsString.java
+++ b/core/src/main/java/org/apache/calcite/util/NlsString.java
@@ -171,8 +171,9 @@ public class NlsString implements Comparable<NlsString>, 
Cloneable {
   }
 
   @Override public int compareTo(NlsString other) {
-    // TODO jvs 18-Jan-2006:  Actual collation support.  This just uses
-    // the default collation.
+    if (collation != null && collation.getCollator() != null) {
+      return collation.getCollator().compare(getValue(), other.getValue());
+    }
     return getValue().compareTo(other.getValue());
   }
 
diff --git 
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
 
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
new file mode 100644
index 0000000..1027807
--- /dev/null
+++ 
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test.enumerable;
+
+import org.apache.calcite.adapter.enumerable.EnumerableRules;
+import org.apache.calcite.adapter.java.ReflectiveSchema;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.jdbc.JavaCollation;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.sql.SqlCollation;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.JdbcTest;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Util;
+
+import org.junit.jupiter.api.Test;
+
+import java.text.Collator;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.function.Consumer;
+
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.EQUALS;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.GREATER_THAN;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.LESS_THAN;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.NOT_EQUALS;
+
+/**
+ * Test cases for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-3951";>[CALCITE-3951]
+ * Support different string comparison based on SqlCollation</a>.
+ */
+class EnumerableStringComparisonTest {
+
+  private static final SqlCollation SPECIAL_COLLATION_PRIMARY =
+      new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+          Util.getDefaultCharset(), Collator.PRIMARY);
+
+  private static final SqlCollation SPECIAL_COLLATION_SECONDARY =
+      new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+          Util.getDefaultCharset(), Collator.SECONDARY);
+
+  private static final SqlCollation SPECIAL_COLLATION_TERTIARY =
+      new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+          Util.getDefaultCharset(), Collator.TERTIARY);
+
+  private static final SqlCollation SPECIAL_COLLATION_IDENTICAL =
+      new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+          Util.getDefaultCharset(), Collator.IDENTICAL);
+
+  private RelDataType createRecordVarcharSpecialCollation(RelBuilder builder) {
+    return builder.getTypeFactory().builder()
+        .add(
+            "name",
+            builder.getTypeFactory().createTypeWithCharsetAndCollation(
+                builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+                builder.getTypeFactory().getDefaultCharset(),
+                SPECIAL_COLLATION_TERTIARY))
+        .build();
+  }
+
+  private RelDataType createVarcharSpecialCollation(RelBuilder builder, 
SqlCollation collation) {
+    return builder.getTypeFactory().createTypeWithCharsetAndCollation(
+        builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+        builder.getTypeFactory().getDefaultCharset(),
+        collation);
+  }
+
+  @Test void testSortStringDefault() {
+    tester()
+        .query("?")
+        .withRel(builder -> builder
+            .values(
+                builder.getTypeFactory().builder()
+                    .add("name",
+                        
builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)).build(),
+                "Legal", "presales", "hr", "Administration", "MARKETING")
+            .sort(
+                builder.field(1, 0, "name"))
+            .build())
+        .explainHookMatches(""
+            + "EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "  EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, { 'hr' 
}, { 'Administration' }, { 'MARKETING' }]])\n")
+        .returnsOrdered("name=Administration\n"
+            + "name=Legal\n"
+            + "name=MARKETING\n"
+            + "name=hr\n"
+            + "name=presales");
+  }
+
+  @Test void testSortStringSpecialCollation() {
+    tester()
+        .query("?")
+        .withRel(builder -> builder
+            .values(
+                createRecordVarcharSpecialCollation(builder),
+                "Legal", "presales", "hr", "Administration", "MARKETING")
+            .sort(
+                builder.field(1, 0, "name"))
+            .build())
+        .explainHookMatches(""
+            + "EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "  EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, { 'hr' 
}, { 'Administration' }, { 'MARKETING' }]])\n")
+        .returnsOrdered("name=Administration\n"
+            + "name=hr\n"
+            + "name=Legal\n"
+            + "name=MARKETING\n"
+            + "name=presales");
+  }
+
+  @Test void testMergeJoinOnStringSpecialCollation() {
+    tester()
+        .query("?")
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
+          planner.addRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
+          planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+        })
+        .withRel(builder -> builder
+              .values(createRecordVarcharSpecialCollation(builder),
+                  "Legal", "presales", "HR", "Administration", 
"Marketing").as("v1")
+              .values(createRecordVarcharSpecialCollation(builder),
+                  "Marketing", "bureaucracy", "Sales", "HR").as("v2")
+              .join(JoinRelType.INNER,
+                  builder.equals(
+                      builder.field(2, 0, "name"),
+                      builder.field(2, 1, "name")))
+              .project(
+                  builder.field("v1", "name"),
+                  builder.field("v2", "name"))
+              .build())
+        .explainHookMatches("" // It is important that we have MergeJoin in 
the plan
+            + "EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner])\n"
+            + "  EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "    EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, { 
'HR' }, { 'Administration' }, { 'Marketing' }]])\n"
+            + "  EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "    EnumerableValues(tuples=[[{ 'Marketing' }, { 'bureaucracy' 
}, { 'Sales' }, { 'HR' }]])\n")
+        .returnsOrdered("name=HR; name0=HR\n"
+            + "name=Marketing; name0=Marketing");
+  }
+
+  @Test void testStringComparison() {
+    testStringComparison("a", "A", LESS_THAN, true);
+    testStringComparison("a", "A", GREATER_THAN, false);
+    testStringComparison("A", "a", LESS_THAN, false);
+    testStringComparison("A", "a", GREATER_THAN, true);
+
+    testStringComparison("aaa", "AAA", EQUALS, false);
+    testStringComparison("aaa", "AAA", NOT_EQUALS, true);
+    testStringComparison("AAA", "AAA", EQUALS, true);
+    testStringComparison("AAA", "AAA", NOT_EQUALS, false);
+    testStringComparison("AAA", "BBB", EQUALS, false);
+    testStringComparison("AAA", "BBB", NOT_EQUALS, true);
+
+    testStringComparison("a", "b", LESS_THAN, true);
+    testStringComparison("A", "B", LESS_THAN, true);
+    testStringComparison("a", "B", LESS_THAN, true);
+    testStringComparison("A", "b", LESS_THAN, true);
+    testStringComparison("a", "b", GREATER_THAN, false);
+    testStringComparison("A", "B", GREATER_THAN, false);
+    testStringComparison("a", "B", GREATER_THAN, false);
+    testStringComparison("A", "b", GREATER_THAN, false);
+
+    testStringComparison("b", "a", GREATER_THAN, true);
+    testStringComparison("B", "A", GREATER_THAN, true);
+    testStringComparison("B", "a", GREATER_THAN, true);
+    testStringComparison("b", "A", GREATER_THAN, true);
+    testStringComparison("b", "a", LESS_THAN, false);
+    testStringComparison("B", "A", LESS_THAN, false);
+    testStringComparison("B", "a", LESS_THAN, false);
+    testStringComparison("b", "A", LESS_THAN, false);
+
+    // Check differences regarding strength:
+
+    testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_PRIMARY, 
true);
+    testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_SECONDARY, 
true);
+    testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_TERTIARY, 
true);
+    testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_IDENTICAL, 
true);
+
+    testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_PRIMARY, 
true);
+    testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_SECONDARY, 
false);
+    testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_TERTIARY, 
false);
+    testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_IDENTICAL, 
false);
+
+    testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_PRIMARY, 
true);
+    testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_SECONDARY, 
true);
+    testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_TERTIARY, 
false);
+    testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_IDENTICAL, 
false);
+
+    testStringComparison("\u0001", "\u0002", EQUALS, 
SPECIAL_COLLATION_PRIMARY, true);
+    testStringComparison("\u0001", "\u0002", EQUALS, 
SPECIAL_COLLATION_SECONDARY, true);
+    testStringComparison("\u0001", "\u0002", EQUALS, 
SPECIAL_COLLATION_TERTIARY, true);
+    testStringComparison("\u0001", "\u0002", EQUALS, 
SPECIAL_COLLATION_IDENTICAL, false);
+  }
+
+  private void testStringComparison(String str1, String str2,
+                                    SqlOperator operator, boolean 
expectedResult) {
+    testStringComparison(str1, str2, operator, SPECIAL_COLLATION_TERTIARY, 
expectedResult);
+  }
+
+  private void testStringComparison(String str1, String str2,
+                                    SqlOperator operator, SqlCollation col,
+                                    boolean expectedResult) {
+    tester()
+        .query("?")
+        .withRel(builder -> {
+          final RexBuilder rexBuilder = builder.getRexBuilder();
+          final RelDataType varcharSpecialCollation = 
createVarcharSpecialCollation(builder, col);
+          return builder
+              .values(new String[]{"aux"}, false)
+              .project(
+                  Collections.singletonList(
+                      builder.call(
+                          operator,
+                          rexBuilder.makeCast(varcharSpecialCollation, 
builder.literal(str1)),
+                          rexBuilder.makeCast(varcharSpecialCollation, 
builder.literal(str2)))),
+                  Collections.singletonList("result"))
+              .build();
+        })
+        .returnsUnordered("result=" + expectedResult);
+  }
+
+  private CalciteAssert.AssertThat tester() {
+    return CalciteAssert.that()
+        .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+        .with(CalciteConnectionProperty.FORCE_DECORRELATE, false)
+        .withSchema("s", new ReflectiveSchema(new JdbcTest.HrSchema()));
+  }
+}
diff --git 
a/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java 
b/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
index 4406bfb..539ea50 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
@@ -438,6 +438,32 @@ public abstract class Functions {
   }
 
   /**
+   * Returns a {@link Comparator} that handles null values.
+   *
+   * @param nullsFirst Whether nulls come before all other values
+   * @param reverse Whether to reverse the usual order of {@link Comparable}s
+   * @param comparator Comparator to be used for comparison
+   */
+  @SuppressWarnings("unchecked")
+  public static <T extends Comparable<T>> Comparator<T> nullsComparator(
+      boolean nullsFirst,
+      boolean reverse,
+      Comparator<T> comparator) {
+    return (T o1, T o2) -> {
+      if (o1 == o2) {
+        return 0;
+      }
+      if (o1 == null) {
+        return nullsFirst ? -1 : 1;
+      }
+      if (o2 == null) {
+        return nullsFirst ? 1 : -1;
+      }
+      return reverse ? -comparator.compare(o1, o2) : comparator.compare(o1, 
o2);
+    };
+  }
+
+  /**
    * Returns an {@link EqualityComparer} that uses object identity and hash
    * code.
    */

Reply via email to