This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 2789f5e4c3 [CALCITE-5003] MergeUnion on types with different collators 
produces wrong result
2789f5e4c3 is described below

commit 2789f5e4c361b052967f42b87447f04cc1ce7896
Author: rubenada <rube...@gmail.com>
AuthorDate: Fri Apr 29 09:41:10 2022 +0100

    [CALCITE-5003] MergeUnion on types with different collators produces wrong 
result
---
 .../enumerable/EnumerableMergeUnionRule.java       | 42 +++++++++++++++++++++-
 .../enumerable/EnumerableStringComparisonTest.java | 34 ++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java
 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java
index e72d9c63c4..2976f3e6dc 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java
@@ -19,18 +19,24 @@ package org.apache.calcite.adapter.enumerable;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelRule;
 import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.core.Union;
 import org.apache.calcite.rel.logical.LogicalSort;
 import org.apache.calcite.rel.logical.LogicalUnion;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.ImmutableBitSet;
 
 import org.immutables.value.Value;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Rule to convert a {@link org.apache.calcite.rel.logical.LogicalSort} on top 
of a
@@ -90,9 +96,43 @@ public class EnumerableMergeUnionRule extends 
RelRule<EnumerableMergeUnionRule.C
       }
     }
 
+    final RelBuilder builder = call.builder();
+    final List<RelDataTypeField> unionFieldList = 
union.getRowType().getFieldList();
     final List<RelNode> inputs = new ArrayList<>(unionInputsSize);
     for (RelNode input : union.getInputs()) {
-      final RelNode newInput = sort.copy(sort.getTraitSet(), input, collation, 
null, inputFetch);
+      // Check if type collations match, otherwise store it in this bitset to 
generate a cast later
+      // to guarantee that all inputs will be sorted in the same way
+      final ImmutableBitSet.Builder fieldsRequiringCastBuilder = 
ImmutableBitSet.builder();
+      for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
+        final int index = fieldCollation.getFieldIndex();
+        final RelDataType unionType = unionFieldList.get(index).getType();
+        final RelDataType inputType = 
input.getRowType().getFieldList().get(index).getType();
+        if (!Objects.equals(unionType.getCollation(), 
inputType.getCollation())) {
+          fieldsRequiringCastBuilder.set(index);
+        }
+      }
+      final ImmutableBitSet fieldsRequiringCast = 
fieldsRequiringCastBuilder.build();
+      final RelNode unsortedInput;
+      if (fieldsRequiringCast.isEmpty()) {
+        unsortedInput = input;
+      } else {
+        // At least one cast is required, generate the corresponding projection
+        builder.push(input);
+        final List<RexNode> fields = builder.fields();
+        final List<RexNode> projFields = new ArrayList<>(fields.size());
+        for (int i = 0; i < fields.size(); i++) {
+          RexNode node = fields.get(i);
+          if (fieldsRequiringCast.get(i)) {
+            final RelDataType targetType = unionFieldList.get(i).getType();
+            node = builder.getRexBuilder().makeCast(targetType, node);
+          }
+          projFields.add(node);
+        }
+        builder.project(projFields);
+        unsortedInput = builder.build();
+      }
+      final RelNode newInput =
+          sort.copy(sort.getTraitSet(), unsortedInput, collation, null, 
inputFetch);
       inputs.add(
           convert(newInput, 
newInput.getTraitSet().replace(EnumerableConvention.INSTANCE)));
     }
diff --git 
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
 
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
index 1c591e4741..6f0fd0273a 100644
--- 
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
+++ 
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java
@@ -166,6 +166,40 @@ class EnumerableStringComparisonTest {
             + "name=Marketing; name0=Marketing");
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-5003";>[CALCITE-5003]
+   * MergeUnion on types with different collators produces wrong result</a>. */
+  @Test void testMergeUnionOnStringDifferentCollation() {
+    tester()
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+            planner.removeRule(EnumerableRules.ENUMERABLE_UNION_RULE))
+        .withRel(b -> {
+          final RelBuilder builder = b.transform(c -> 
c.withSimplifyValues(false));
+          return builder
+              .values(builder.getTypeFactory().builder()
+                      .add("name",
+                          
builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)).build(),
+                  "facilities", "HR", "administration", "Marketing")
+              .values(createRecordVarcharSpecialCollation(builder),
+                  "Marketing", "administration", "presales", "HR")
+              .union(false)
+              .sort(0)
+              .build();
+        })
+        .explainHookMatches("" // It is important that we have MergeUnion in 
the plan
+            + "EnumerableMergeUnion(all=[false])\n"
+            + "  EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "    EnumerableCalc(expr#0=[{inputs}], expr#1=[CAST($t0):VARCHAR 
COLLATE \"ISO-8859-1$en_US$tertiary$JAVA_COLLATOR\" NOT NULL], name=[$t1])\n"
+            + "      EnumerableValues(tuples=[[{ 'facilities' }, { 'HR' }, { 
'administration' }, { 'Marketing' }]])\n"
+            + "  EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+            + "    EnumerableValues(tuples=[[{ 'Marketing' }, { 
'administration' }, { 'presales' }, { 'HR' }]])\n")
+        .returnsOrdered("name=administration\n"
+            + "name=facilities\n"
+            + "name=HR\n"
+            + "name=Marketing\n"
+            + "name=presales");
+  }
+
   /** Test case for
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4195";>[CALCITE-4195]
    * Cast between types with different collators must be evaluated as not 
monotonic</a>. */

Reply via email to