This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new fa83490 [CALCITE-3951] Support different string comparison based on
SqlCollation
fa83490 is described below
commit fa8349069d141d3c75bafa06d5fb8800711ec8d6
Author: rubenada <[email protected]>
AuthorDate: Wed Apr 22 12:19:06 2020 +0200
[CALCITE-3951] Support different string comparison based on SqlCollation
---
.../calcite/adapter/enumerable/EnumUtils.java | 27 +++
.../calcite/adapter/enumerable/PhysTypeImpl.java | 53 +++--
.../calcite/adapter/enumerable/RexImpTable.java | 6 +
.../calcite/config/CalciteSystemProperty.java | 4 +-
.../org/apache/calcite/jdbc/JavaCollation.java | 65 ++++++
.../java/org/apache/calcite/rex/RexBuilder.java | 9 +-
.../org/apache/calcite/runtime/SqlFunctions.java | 31 +++
.../java/org/apache/calcite/runtime/Utilities.java | 30 +++
.../java/org/apache/calcite/sql/SqlCollation.java | 65 ++++--
.../main/java/org/apache/calcite/sql/SqlUtil.java | 6 +-
.../apache/calcite/sql/parser/SqlParserUtil.java | 2 +-
.../calcite/sql/type/SqlTypeFactoryImpl.java | 7 +-
.../org/apache/calcite/util/BuiltInMethod.java | 7 +
.../java/org/apache/calcite/util/NlsString.java | 5 +-
.../enumerable/EnumerableStringComparisonTest.java | 249 +++++++++++++++++++++
.../apache/calcite/linq4j/function/Functions.java | 26 +++
16 files changed, 543 insertions(+), 49 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
index 77779a4..edf544b 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
@@ -47,6 +47,8 @@ import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexProgramBuilder;
import org.apache.calcite.runtime.SortedMultiMap;
import org.apache.calcite.runtime.SqlFunctions;
+import org.apache.calcite.runtime.Utilities;
+import org.apache.calcite.sql.SqlCollation;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;
@@ -63,6 +65,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
/**
@@ -1064,4 +1067,28 @@ public class EnumUtils {
}
};
}
+
+ public static Expression generateCollatorExpression(SqlCollation collation) {
+ if (collation == null || collation.getCollator() == null) {
+ return null;
+ }
+
+ // Utilities.generateCollator(
+ // new Locale(
+ // collation.getLocale().getLanguage(),
+ // collation.getLocale().getCountry(),
+ // collation.getLocale().getVariant()),
+ // collation.getCollator().getStrength());
+ final Locale locale = collation.getLocale();
+ final int strength = collation.getCollator().getStrength();
+ return Expressions.call(
+ Utilities.class,
+ "generateCollator",
+ Expressions.new_(
+ Locale.class,
+ Expressions.constant(locale.getLanguage()),
+ Expressions.constant(locale.getCountry()),
+ Expressions.constant(locale.getVariant())),
+ Expressions.constant(strength));
+ }
}
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
index c29fbba..d6f23b3 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java
@@ -40,7 +40,6 @@ import org.apache.calcite.util.Util;
import com.google.common.collect.ImmutableList;
-import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.lang.reflect.Type;
import java.util.AbstractList;
@@ -48,6 +47,7 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import static
org.apache.calcite.adapter.enumerable.EnumUtils.generateCollatorExpression;
import static
org.apache.calcite.adapter.enumerable.EnumUtils.overridingMethodDecl;
/** Implementation of {@link PhysType}. */
@@ -261,6 +261,10 @@ public class PhysTypeImpl implements PhysType {
final Expression selector;
if (collations.size() == 1) {
RelFieldCollation collation = collations.get(0);
+ RelDataType fieldType = rowType.getFieldList() == null ||
rowType.getFieldList().isEmpty()
+ ? rowType
+ : rowType.getFieldList().get(collation.getFieldIndex()).getType();
+ Expression fieldComparator =
generateCollatorExpression(fieldType.getCollation());
ParameterExpression parameter =
Expressions.parameter(javaRowClass, "v");
selector =
@@ -270,13 +274,16 @@ public class PhysTypeImpl implements PhysType {
parameter);
return Pair.of(selector,
Expressions.call(
- BuiltInMethod.NULLS_COMPARATOR.method,
- Expressions.constant(
- collation.nullDirection
- == RelFieldCollation.NullDirection.FIRST),
- Expressions.constant(
- collation.getDirection()
- == RelFieldCollation.Direction.DESCENDING)));
+ fieldComparator == null ? BuiltInMethod.NULLS_COMPARATOR.method
+ : BuiltInMethod.NULLS_COMPARATOR2.method,
+ Expressions.list(
+ (Expression) Expressions.constant(
+ collation.nullDirection
+ == RelFieldCollation.NullDirection.FIRST),
+ Expressions.constant(
+ collation.direction
+ == RelFieldCollation.Direction.DESCENDING))
+ .appendIfNotNull(fieldComparator)));
}
selector =
Expressions.call(BuiltInMethod.IDENTITY_SELECTOR.method);
@@ -297,6 +304,8 @@ public class PhysTypeImpl implements PhysType {
body.add(Expressions.declare(mod, parameterC, null));
for (RelFieldCollation collation : collations) {
final int index = collation.getFieldIndex();
+ final RelDataType fieldType =
rowType.getFieldList().get(index).getType();
+ final Expression fieldComparator =
generateCollatorExpression(fieldType.getCollation());
Expression arg0 = fieldReference(parameterV0, index);
Expression arg1 = fieldReference(parameterV1, index);
switch (Primitive.flavor(fieldClass(index))) {
@@ -310,19 +319,21 @@ public class PhysTypeImpl implements PhysType {
final boolean descending =
collation.getDirection()
== RelFieldCollation.Direction.DESCENDING;
- final Method method = (fieldNullable(index)
- ? (nullsFirst ^ descending
- ? BuiltInMethod.COMPARE_NULLS_FIRST
- : BuiltInMethod.COMPARE_NULLS_LAST)
- : BuiltInMethod.COMPARE).method;
body.add(
Expressions.statement(
Expressions.assign(
parameterC,
- Expressions.call(method.getDeclaringClass(),
- method.getName(),
- arg0,
- arg1))));
+ Expressions.call(
+ Utilities.class,
+ fieldNullable(index)
+ ? (nullsFirst != descending
+ ? "compareNullsFirst"
+ : "compareNullsLast")
+ : "compare",
+ Expressions.list(
+ arg0,
+ arg1)
+ .appendIfNotNull(fieldComparator)))));
body.add(
Expressions.ifThen(
Expressions.notEqual(
@@ -396,6 +407,8 @@ public class PhysTypeImpl implements PhysType {
body.add(Expressions.declare(mod, parameterC, null));
for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
final int index = fieldCollation.getFieldIndex();
+ final RelDataType fieldType =
rowType.getFieldList().get(index).getType();
+ final Expression fieldComparator =
generateCollatorExpression(fieldType.getCollation());
Expression arg0 = fieldReference(parameterV0, index);
Expression arg1 = fieldReference(parameterV1, index);
switch (Primitive.flavor(fieldClass(index))) {
@@ -420,8 +433,10 @@ public class PhysTypeImpl implements PhysType {
? "compareNullsFirst"
: "compareNullsLast")
: "compare",
- arg0,
- arg1))));
+ Expressions.list(
+ arg0,
+ arg1)
+ .appendIfNotNull(fieldComparator)))));
body.add(
Expressions.ifThen(
Expressions.notEqual(
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 230e9de..839826d 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -83,6 +83,7 @@ import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
+import static
org.apache.calcite.adapter.enumerable.EnumUtils.generateCollatorExpression;
import static org.apache.calcite.linq4j.tree.ExpressionType.Add;
import static org.apache.calcite.linq4j.tree.ExpressionType.AndAlso;
import static org.apache.calcite.linq4j.tree.ExpressionType.Divide;
@@ -2504,6 +2505,11 @@ public class RexImpTable {
final Type type0 = expressions.get(0).getType();
final Type type1 = expressions.get(1).getType();
final SqlBinaryOperator op = (SqlBinaryOperator) call.getOperator();
+ final RelDataType relDataType0 = call.getOperands().get(0).getType();
+ final Expression fieldComparator =
generateCollatorExpression(relDataType0.getCollation());
+ if (fieldComparator != null) {
+ expressions.add(fieldComparator);
+ }
final Primitive primitive = Primitive.ofBoxOr(type0);
if (primitive == null
|| type1 == BigDecimal.class
diff --git
a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
index 3e93a5b..d15952b 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
@@ -288,12 +288,14 @@ public final class CalciteSystemProperty<T> {
/**
* The strength of the default collation.
+ * Allowed values (as defined in {@link java.text.Collator}) are: primary,
secondary,
+ * tertiary, identical.
*
* <p>It is used in {@link org.apache.calcite.sql.SqlCollation} and
* {@link org.apache.calcite.sql.SqlLiteral#SqlLiteral}.</p>
*/
// TODO review zabetak:
- // What are the allowed values? What happens if a wrong value is specified?
+ // What happens if a wrong value is specified?
public static final CalciteSystemProperty<String> DEFAULT_COLLATION_STRENGTH
=
stringProperty("calcite.default.collation.strength", "primary");
diff --git a/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java
b/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java
new file mode 100644
index 0000000..468c40f
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/jdbc/JavaCollation.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.jdbc;
+
+import org.apache.calcite.sql.SqlCollation;
+
+import java.nio.charset.Charset;
+import java.text.Collator;
+import java.util.Locale;
+
+/**
+ * Collation that uses a specific {@link Collator} for comparison.
+ */
+public class JavaCollation extends SqlCollation {
+ private final Collator collator;
+
+ public JavaCollation(Coercibility coercibility, Locale locale, Charset
charset, int strength) {
+ super(coercibility, locale, charset, getStrengthString(strength));
+ collator = Collator.getInstance(locale);
+ collator.setStrength(strength);
+ }
+
+ // Strength values
+ private static final String STRENGTH_PRIMARY = "primary";
+ private static final String STRENGTH_SECONDARY = "secondary";
+ private static final String STRENGTH_TERTIARY = "tertiary";
+ private static final String STRENGTH_IDENTICAL = "identical";
+
+ private static String getStrengthString(int strengthValue) {
+ switch (strengthValue) {
+ case Collator.PRIMARY:
+ return STRENGTH_PRIMARY;
+ case Collator.SECONDARY:
+ return STRENGTH_SECONDARY;
+ case Collator.TERTIARY:
+ return STRENGTH_TERTIARY;
+ case Collator.IDENTICAL:
+ return STRENGTH_IDENTICAL;
+ default:
+ throw new IllegalArgumentException("Incorrect strength value.");
+ }
+ }
+
+ @Override protected String generateCollationName(Charset charset) {
+ return super.generateCollationName(charset) + "$JAVA_COLLATOR";
+ }
+
+ @Override public Collator getCollator() {
+ return collator;
+ }
+}
diff --git a/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
b/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
index d26c778..e58fe32 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexBuilder.java
@@ -919,9 +919,12 @@ public class RexBuilder {
// from the type if necessary.
assert o instanceof NlsString;
NlsString nlsString = (NlsString) o;
- if ((nlsString.getCollation() == null)
- || (nlsString.getCharset() == null)) {
- assert type.getSqlTypeName() == SqlTypeName.CHAR;
+ if (nlsString.getCollation() == null
+ || nlsString.getCharset() == null
+ || !nlsString.getCharset().equals(type.getCharset())
+ || !nlsString.getCollation().equals(type.getCollation())) {
+ assert type.getSqlTypeName() == SqlTypeName.CHAR
+ || type.getSqlTypeName() == SqlTypeName.VARCHAR;
assert type.getCharset().name() != null;
assert type.getCollation() != null;
o = new NlsString(
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 8b9b59d..b4fca2c 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -59,6 +59,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collection;
+import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -637,6 +638,11 @@ public class SqlFunctions {
return b0.equals(b1);
}
+ /** SQL <code>=</code> operator applied to String values with a certain
Comparator. */
+ public static boolean eq(String s0, String s1, Comparator<String>
comparator) {
+ return comparator.compare(s0, s1) == 0;
+ }
+
/** SQL <code>=</code> operator applied to Object values (at least one
operand
* has ANY type; neither may be null). */
public static boolean eqAny(Object b0, Object b1) {
@@ -676,6 +682,11 @@ public class SqlFunctions {
return !eq(b0, b1);
}
+ /** SQL <code><gt;</code> operator applied to OString values with a
certain Comparator. */
+ public static boolean ne(String s0, String s1, Comparator<String>
comparator) {
+ return !eq(s0, s1, comparator);
+ }
+
/** SQL <code><gt;</code> operator applied to Object values (at least one
* operand has ANY type, including String; neither may be null). */
public static boolean neAny(Object b0, Object b1) {
@@ -694,6 +705,11 @@ public class SqlFunctions {
return b0.compareTo(b1) < 0;
}
+ /** SQL <code><</code> operator applied to String values. */
+ public static boolean lt(String b0, String b1, Comparator<String>
comparator) {
+ return comparator.compare(b0, b1) < 0;
+ }
+
/** SQL <code><</code> operator applied to ByteString values. */
public static boolean lt(ByteString b0, ByteString b1) {
return b0.compareTo(b1) < 0;
@@ -729,6 +745,11 @@ public class SqlFunctions {
return b0.compareTo(b1) <= 0;
}
+ /** SQL <code>≤</code> operator applied to String values. */
+ public static boolean le(String b0, String b1, Comparator<String>
comparator) {
+ return comparator.compare(b0, b1) <= 0;
+ }
+
/** SQL <code>≤</code> operator applied to ByteString values. */
public static boolean le(ByteString b0, ByteString b1) {
return b0.compareTo(b1) <= 0;
@@ -765,6 +786,11 @@ public class SqlFunctions {
return b0.compareTo(b1) > 0;
}
+ /** SQL <code>></code> operator applied to String values. */
+ public static boolean gt(String b0, String b1, Comparator<String>
comparator) {
+ return comparator.compare(b0, b1) > 0;
+ }
+
/** SQL <code>></code> operator applied to ByteString values. */
public static boolean gt(ByteString b0, ByteString b1) {
return b0.compareTo(b1) > 0;
@@ -801,6 +827,11 @@ public class SqlFunctions {
return b0.compareTo(b1) >= 0;
}
+ /** SQL <code>≥</code> operator applied to String values. */
+ public static boolean ge(String b0, String b1, Comparator<String>
comparator) {
+ return comparator.compare(b0, b1) >= 0;
+ }
+
/** SQL <code>≥</code> operator applied to ByteString values. */
public static boolean ge(ByteString b0, ByteString b1) {
return b0.compareTo(b1) >= 0;
diff --git a/core/src/main/java/org/apache/calcite/runtime/Utilities.java
b/core/src/main/java/org/apache/calcite/runtime/Utilities.java
index a79cbb6..68bfd3b 100644
--- a/core/src/main/java/org/apache/calcite/runtime/Utilities.java
+++ b/core/src/main/java/org/apache/calcite/runtime/Utilities.java
@@ -16,8 +16,11 @@
*/
package org.apache.calcite.runtime;
+import java.text.Collator;
+import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.Objects;
/**
@@ -212,6 +215,27 @@ public class Utilities {
: v0.compareTo(v1);
}
+ public static int compare(Comparable v0, Comparable v1, Comparator
comparator) {
+ //noinspection unchecked
+ return comparator.compare(v0, v1);
+ }
+
+ public static int compareNullsFirst(Comparable v0, Comparable v1, Comparator
comparator) {
+ //noinspection unchecked
+ return v0 == v1 ? 0
+ : v0 == null ? -1
+ : v1 == null ? 1
+ : comparator.compare(v0, v1);
+ }
+
+ public static int compareNullsLast(Comparable v0, Comparable v1, Comparator
comparator) {
+ //noinspection unchecked
+ return v0 == v1 ? 0
+ : v0 == null ? 1
+ : v1 == null ? -1
+ : comparator.compare(v0, v1);
+ }
+
public static int compareNullsLast(List v0, List v1) {
//noinspection unchecked
return v0 == v1 ? 0
@@ -224,4 +248,10 @@ public class Utilities {
public static Pattern.PatternBuilder patternBuilder() {
return Pattern.builder();
}
+
+ public static Collator generateCollator(Locale locale, int strength) {
+ final Collator collator = Collator.getInstance(locale);
+ collator.setStrength(strength);
+ return collator;
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
b/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
index 6a700f1..f1e3366 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlCollation.java
@@ -24,6 +24,7 @@ import org.apache.calcite.util.Util;
import java.io.Serializable;
import java.nio.charset.Charset;
+import java.text.Collator;
import java.util.Locale;
import static org.apache.calcite.util.Static.RESOURCE;
@@ -73,7 +74,19 @@ public class SqlCollation implements Serializable {
//~ Constructors -----------------------------------------------------------
/**
- * Creates a Collation by its name and its coercibility
+ * Creates a SqlCollation with the default collation name and the given
+ * coercibility.
+ *
+ * @param coercibility Coercibility
+ */
+ public SqlCollation(Coercibility coercibility) {
+ this(
+ CalciteSystemProperty.DEFAULT_COLLATION.value(),
+ coercibility);
+ }
+
+ /**
+ * Creates a Collation by its name and its coercibility.
*
* @param collation Collation specification
* @param coercibility Coercibility
@@ -86,31 +99,30 @@ public class SqlCollation implements Serializable {
SqlParserUtil.parseCollation(collation);
Charset charset = parseValues.getCharset();
this.wrappedCharset = SerializableCharset.forCharset(charset);
- locale = parseValues.getLocale();
- strength = parseValues.getStrength();
- String c =
- charset.name().toUpperCase(Locale.ROOT) + "$" + locale.toString();
- if ((strength != null) && (strength.length() > 0)) {
- c += "$" + strength;
- }
- collationName = c;
+ this.locale = parseValues.getLocale();
+ this.strength = parseValues.getStrength().toLowerCase(Locale.ROOT);
+ this.collationName = generateCollationName(charset);
}
/**
- * Creates a SqlCollation with the default collation name and the given
- * coercibility.
- *
- * @param coercibility Coercibility
+ * Creates a Collation by its coercibility, locale, charset and strength.
*/
- public SqlCollation(Coercibility coercibility) {
- this(
- CalciteSystemProperty.DEFAULT_COLLATION.value(),
- coercibility);
+ public SqlCollation(
+ Coercibility coercibility,
+ Locale locale,
+ Charset charset,
+ String strength) {
+ this.coercibility = coercibility;
+ charset = SqlUtil.getCharset(charset.name());
+ this.wrappedCharset = SerializableCharset.forCharset(charset);
+ this.locale = locale;
+ this.strength = strength.toLowerCase(Locale.ROOT);
+ this.collationName = generateCollationName(charset);
}
//~ Methods ----------------------------------------------------------------
- public boolean equals(Object o) {
+ @Override public boolean equals(Object o) {
return this == o
|| o instanceof SqlCollation
&& collationName.equals(((SqlCollation) o).collationName);
@@ -120,6 +132,10 @@ public class SqlCollation implements Serializable {
return collationName.hashCode();
}
+ protected String generateCollationName(Charset charset) {
+ return charset.name().toUpperCase(Locale.ROOT) + "$" + locale.toString() +
"$" + strength;
+ }
+
/**
* Returns the collating sequence (the collation name) and the coercibility
* for the resulting value of a dyadic operator.
@@ -279,4 +295,17 @@ public class SqlCollation implements Serializable {
public final SqlCollation.Coercibility getCoercibility() {
return coercibility;
}
+
+ public final Locale getLocale() {
+ return locale;
+ }
+
+ /**
+ * @return the {@link Collator} to compare values having the current
collation,
+ * or {@code null} if no specific {@link Collator} is needed, in which case
+ * {@link String#compareTo} will be used.
+ */
+ public Collator getCollator() {
+ return null;
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
b/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
index 67df931..16d463b 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlUtil.java
@@ -915,13 +915,13 @@ public abstract class SqlUtil {
return "Big5";
case "LATIN1":
return "ISO-8859-1";
- case "GB2312":
- case "GBK":
- return name;
case "UTF8":
return "UTF-8";
case "UTF16":
+ case "UTF-16":
return ConversionUtil.NATIVE_UTF16_CHARSET_NAME;
+ case "GB2312":
+ case "GBK":
case "UTF-16BE":
case "UTF-16LE":
case "ISO-8859-1":
diff --git
a/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
b/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
index 4f309d7..0cf9738 100644
--- a/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
+++ b/core/src/main/java/org/apache/calcite/sql/parser/SqlParserUtil.java
@@ -563,7 +563,7 @@ public final class SqlParserUtil {
CalciteSystemProperty.DEFAULT_COLLATION_STRENGTH.value();
}
- Charset charset = Charset.forName(charsetStr);
+ Charset charset = SqlUtil.getCharset(charsetStr);
String[] localeParts = localeStr.split("_");
Locale locale;
if (1 == localeParts.length) {
diff --git
a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
index 2b9d102..7709985 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/SqlTypeFactoryImpl.java
@@ -300,7 +300,6 @@ public class SqlTypeFactoryImpl extends
RelDataTypeFactoryImpl {
SqlCollation collation1 = type.getCollation();
SqlCollation collation2 = resultType.getCollation();
- // TODO: refine collation combination rules
final int precision =
SqlTypeUtil.maxPrecision(resultType.getPrecision(),
type.getPrecision());
@@ -338,6 +337,10 @@ public class SqlTypeFactoryImpl extends
RelDataTypeFactoryImpl {
precision);
}
Charset charset = null;
+ // TODO: refine collation combination rules
+ SqlCollation collation0 = collation1 != null && collation2 != null
+ ? SqlCollation.getCoercibilityDyadicOperator(collation1,
collation2)
+ : null;
SqlCollation collation = null;
if ((charset1 != null) || (charset2 != null)) {
if (charset1 == null) {
@@ -362,7 +365,7 @@ public class SqlTypeFactoryImpl extends
RelDataTypeFactoryImpl {
createTypeWithCharsetAndCollation(
resultType,
charset,
- collation);
+ collation0 != null ? collation0 : collation);
}
} else if (SqlTypeUtil.isExactNumeric(type)) {
if (SqlTypeUtil.isExactNumeric(resultType)) {
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index f0b8f81..f4f18f0 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -236,6 +236,8 @@ public enum BuiltInMethod {
EMPTY_ENUMERABLE(Linq4j.class, "emptyEnumerable"),
NULLS_COMPARATOR(Functions.class, "nullsComparator", boolean.class,
boolean.class),
+ NULLS_COMPARATOR2(Functions.class, "nullsComparator", boolean.class,
+ boolean.class, Comparator.class),
ARRAY_COMPARER(Functions.class, "arrayComparer"),
FUNCTION0_APPLY(Function0.class, "apply"),
FUNCTION1_APPLY(Function1.class, "apply", Object.class),
@@ -501,6 +503,11 @@ public enum BuiltInMethod {
Comparable.class),
COMPARE_NULLS_LAST(Utilities.class, "compareNullsLast", Comparable.class,
Comparable.class),
+ COMPARE2(Utilities.class, "compare", Comparable.class, Comparable.class,
Comparator.class),
+ COMPARE_NULLS_FIRST2(Utilities.class, "compareNullsFirst", Comparable.class,
+ Comparable.class, Comparator.class),
+ COMPARE_NULLS_LAST2(Utilities.class, "compareNullsLast", Comparable.class,
+ Comparable.class, Comparator.class),
ROUND_LONG(SqlFunctions.class, "round", long.class, long.class),
ROUND_INT(SqlFunctions.class, "round", int.class, int.class),
DATE_TO_INT(SqlFunctions.class, "toInt", java.util.Date.class),
diff --git a/core/src/main/java/org/apache/calcite/util/NlsString.java
b/core/src/main/java/org/apache/calcite/util/NlsString.java
index bf51677..8c68256 100644
--- a/core/src/main/java/org/apache/calcite/util/NlsString.java
+++ b/core/src/main/java/org/apache/calcite/util/NlsString.java
@@ -171,8 +171,9 @@ public class NlsString implements Comparable<NlsString>,
Cloneable {
}
@Override public int compareTo(NlsString other) {
- // TODO jvs 18-Jan-2006: Actual collation support. This just uses
- // the default collation.
+ if (collation != null && collation.getCollator() != null) {
+ return collation.getCollator().compare(getValue(), other.getValue());
+ }
return getValue().compareTo(other.getValue());
}
diff --git
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
new file mode 100644
index 0000000..1027807
--- /dev/null
+++
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test.enumerable;
+
+import org.apache.calcite.adapter.enumerable.EnumerableRules;
+import org.apache.calcite.adapter.java.ReflectiveSchema;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.jdbc.JavaCollation;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.sql.SqlCollation;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.JdbcTest;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Util;
+
+import org.junit.jupiter.api.Test;
+
+import java.text.Collator;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.function.Consumer;
+
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.EQUALS;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.GREATER_THAN;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.LESS_THAN;
+import static org.apache.calcite.sql.fun.SqlStdOperatorTable.NOT_EQUALS;
+
+/**
+ * Test cases for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-3951">[CALCITE-3951]
+ * Support different string comparison based on SqlCollation</a>.
+ */
+class EnumerableStringComparisonTest {
+
+ private static final SqlCollation SPECIAL_COLLATION_PRIMARY =
+ new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+ Util.getDefaultCharset(), Collator.PRIMARY);
+
+ private static final SqlCollation SPECIAL_COLLATION_SECONDARY =
+ new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+ Util.getDefaultCharset(), Collator.SECONDARY);
+
+ private static final SqlCollation SPECIAL_COLLATION_TERTIARY =
+ new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+ Util.getDefaultCharset(), Collator.TERTIARY);
+
+ private static final SqlCollation SPECIAL_COLLATION_IDENTICAL =
+ new JavaCollation(SqlCollation.Coercibility.IMPLICIT, Locale.US,
+ Util.getDefaultCharset(), Collator.IDENTICAL);
+
+ private RelDataType createRecordVarcharSpecialCollation(RelBuilder builder) {
+ return builder.getTypeFactory().builder()
+ .add(
+ "name",
+ builder.getTypeFactory().createTypeWithCharsetAndCollation(
+ builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+ builder.getTypeFactory().getDefaultCharset(),
+ SPECIAL_COLLATION_TERTIARY))
+ .build();
+ }
+
+ private RelDataType createVarcharSpecialCollation(RelBuilder builder,
SqlCollation collation) {
+ return builder.getTypeFactory().createTypeWithCharsetAndCollation(
+ builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+ builder.getTypeFactory().getDefaultCharset(),
+ collation);
+ }
+
+ @Test void testSortStringDefault() {
+ tester()
+ .query("?")
+ .withRel(builder -> builder
+ .values(
+ builder.getTypeFactory().builder()
+ .add("name",
+
builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)).build(),
+ "Legal", "presales", "hr", "Administration", "MARKETING")
+ .sort(
+ builder.field(1, 0, "name"))
+ .build())
+ .explainHookMatches(""
+ + "EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+ + " EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, { 'hr'
}, { 'Administration' }, { 'MARKETING' }]])\n")
+ .returnsOrdered("name=Administration\n"
+ + "name=Legal\n"
+ + "name=MARKETING\n"
+ + "name=hr\n"
+ + "name=presales");
+ }
+
+ @Test void testSortStringSpecialCollation() {
+ tester()
+ .query("?")
+ .withRel(builder -> builder
+ .values(
+ createRecordVarcharSpecialCollation(builder),
+ "Legal", "presales", "hr", "Administration", "MARKETING")
+ .sort(
+ builder.field(1, 0, "name"))
+ .build())
+ .explainHookMatches(""
+ + "EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+ + " EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, { 'hr'
}, { 'Administration' }, { 'MARKETING' }]])\n")
+ .returnsOrdered("name=Administration\n"
+ + "name=hr\n"
+ + "name=Legal\n"
+ + "name=MARKETING\n"
+ + "name=presales");
+ }
+
+ @Test void testMergeJoinOnStringSpecialCollation() {
+ tester()
+ .query("?")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
+ planner.addRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
+ planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+ })
+ .withRel(builder -> builder
+ .values(createRecordVarcharSpecialCollation(builder),
+ "Legal", "presales", "HR", "Administration",
"Marketing").as("v1")
+ .values(createRecordVarcharSpecialCollation(builder),
+ "Marketing", "bureaucracy", "Sales", "HR").as("v2")
+ .join(JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "name"),
+ builder.field(2, 1, "name")))
+ .project(
+ builder.field("v1", "name"),
+ builder.field("v2", "name"))
+ .build())
+ .explainHookMatches("" // It is important that we have MergeJoin in
the plan
+ + "EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner])\n"
+ + " EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+ + " EnumerableValues(tuples=[[{ 'Legal' }, { 'presales' }, {
'HR' }, { 'Administration' }, { 'Marketing' }]])\n"
+ + " EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+ + " EnumerableValues(tuples=[[{ 'Marketing' }, { 'bureaucracy'
}, { 'Sales' }, { 'HR' }]])\n")
+ .returnsOrdered("name=HR; name0=HR\n"
+ + "name=Marketing; name0=Marketing");
+ }
+
+ @Test void testStringComparison() {
+ testStringComparison("a", "A", LESS_THAN, true);
+ testStringComparison("a", "A", GREATER_THAN, false);
+ testStringComparison("A", "a", LESS_THAN, false);
+ testStringComparison("A", "a", GREATER_THAN, true);
+
+ testStringComparison("aaa", "AAA", EQUALS, false);
+ testStringComparison("aaa", "AAA", NOT_EQUALS, true);
+ testStringComparison("AAA", "AAA", EQUALS, true);
+ testStringComparison("AAA", "AAA", NOT_EQUALS, false);
+ testStringComparison("AAA", "BBB", EQUALS, false);
+ testStringComparison("AAA", "BBB", NOT_EQUALS, true);
+
+ testStringComparison("a", "b", LESS_THAN, true);
+ testStringComparison("A", "B", LESS_THAN, true);
+ testStringComparison("a", "B", LESS_THAN, true);
+ testStringComparison("A", "b", LESS_THAN, true);
+ testStringComparison("a", "b", GREATER_THAN, false);
+ testStringComparison("A", "B", GREATER_THAN, false);
+ testStringComparison("a", "B", GREATER_THAN, false);
+ testStringComparison("A", "b", GREATER_THAN, false);
+
+ testStringComparison("b", "a", GREATER_THAN, true);
+ testStringComparison("B", "A", GREATER_THAN, true);
+ testStringComparison("B", "a", GREATER_THAN, true);
+ testStringComparison("b", "A", GREATER_THAN, true);
+ testStringComparison("b", "a", LESS_THAN, false);
+ testStringComparison("B", "A", LESS_THAN, false);
+ testStringComparison("B", "a", LESS_THAN, false);
+ testStringComparison("b", "A", LESS_THAN, false);
+
+ // Check differences regarding strength:
+
+ testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_PRIMARY,
true);
+ testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_SECONDARY,
true);
+ testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_TERTIARY,
true);
+ testStringComparison("ABC", "ABC", EQUALS, SPECIAL_COLLATION_IDENTICAL,
true);
+
+ testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_PRIMARY,
true);
+ testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_SECONDARY,
false);
+ testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_TERTIARY,
false);
+ testStringComparison("abc", "ÀBC", EQUALS, SPECIAL_COLLATION_IDENTICAL,
false);
+
+ testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_PRIMARY,
true);
+ testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_SECONDARY,
true);
+ testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_TERTIARY,
false);
+ testStringComparison("abc", "ABC", EQUALS, SPECIAL_COLLATION_IDENTICAL,
false);
+
+ testStringComparison("\u0001", "\u0002", EQUALS,
SPECIAL_COLLATION_PRIMARY, true);
+ testStringComparison("\u0001", "\u0002", EQUALS,
SPECIAL_COLLATION_SECONDARY, true);
+ testStringComparison("\u0001", "\u0002", EQUALS,
SPECIAL_COLLATION_TERTIARY, true);
+ testStringComparison("\u0001", "\u0002", EQUALS,
SPECIAL_COLLATION_IDENTICAL, false);
+ }
+
+ private void testStringComparison(String str1, String str2,
+ SqlOperator operator, boolean
expectedResult) {
+ testStringComparison(str1, str2, operator, SPECIAL_COLLATION_TERTIARY,
expectedResult);
+ }
+
+ private void testStringComparison(String str1, String str2,
+ SqlOperator operator, SqlCollation col,
+ boolean expectedResult) {
+ tester()
+ .query("?")
+ .withRel(builder -> {
+ final RexBuilder rexBuilder = builder.getRexBuilder();
+ final RelDataType varcharSpecialCollation =
createVarcharSpecialCollation(builder, col);
+ return builder
+ .values(new String[]{"aux"}, false)
+ .project(
+ Collections.singletonList(
+ builder.call(
+ operator,
+ rexBuilder.makeCast(varcharSpecialCollation,
builder.literal(str1)),
+ rexBuilder.makeCast(varcharSpecialCollation,
builder.literal(str2)))),
+ Collections.singletonList("result"))
+ .build();
+ })
+ .returnsUnordered("result=" + expectedResult);
+ }
+
+ private CalciteAssert.AssertThat tester() {
+ return CalciteAssert.that()
+ .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+ .with(CalciteConnectionProperty.FORCE_DECORRELATE, false)
+ .withSchema("s", new ReflectiveSchema(new JdbcTest.HrSchema()));
+ }
+}
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
index 4406bfb..539ea50 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/function/Functions.java
@@ -438,6 +438,32 @@ public abstract class Functions {
}
/**
+ * Returns a {@link Comparator} that handles null values.
+ *
+ * @param nullsFirst Whether nulls come before all other values
+ * @param reverse Whether to reverse the usual order of {@link Comparable}s
+ * @param comparator Comparator to be used for comparison
+ */
+ @SuppressWarnings("unchecked")
+ public static <T extends Comparable<T>> Comparator<T> nullsComparator(
+ boolean nullsFirst,
+ boolean reverse,
+ Comparator<T> comparator) {
+ return (T o1, T o2) -> {
+ if (o1 == o2) {
+ return 0;
+ }
+ if (o1 == null) {
+ return nullsFirst ? -1 : 1;
+ }
+ if (o2 == null) {
+ return nullsFirst ? 1 : -1;
+ }
+ return reverse ? -comparator.compare(o1, o2) : comparator.compare(o1,
o2);
+ };
+ }
+
+ /**
* Returns an {@link EqualityComparer} that uses object identity and hash
* code.
*/