This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 2789f5e4c3 [CALCITE-5003] MergeUnion on types with different collators produces wrong result 2789f5e4c3 is described below commit 2789f5e4c361b052967f42b87447f04cc1ce7896 Author: rubenada <rube...@gmail.com> AuthorDate: Fri Apr 29 09:41:10 2022 +0100 [CALCITE-5003] MergeUnion on types with different collators produces wrong result --- .../enumerable/EnumerableMergeUnionRule.java | 42 +++++++++++++++++++++- .../enumerable/EnumerableStringComparisonTest.java | 34 ++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java index e72d9c63c4..2976f3e6dc 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeUnionRule.java @@ -19,18 +19,24 @@ package org.apache.calcite.adapter.enumerable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; import org.immutables.value.Value; import java.util.ArrayList; import java.util.List; +import java.util.Objects; /** * Rule to convert a {@link org.apache.calcite.rel.logical.LogicalSort} on top of a @@ -90,9 +96,43 @@ public class EnumerableMergeUnionRule extends RelRule<EnumerableMergeUnionRule.C } } + final RelBuilder builder = call.builder(); + final List<RelDataTypeField> unionFieldList = union.getRowType().getFieldList(); final List<RelNode> inputs = new ArrayList<>(unionInputsSize); for (RelNode input : union.getInputs()) { - final RelNode newInput = sort.copy(sort.getTraitSet(), input, collation, null, inputFetch); + // Check if type collations match, otherwise store it in this bitset to generate a cast later + // to guarantee that all inputs will be sorted in the same way + final ImmutableBitSet.Builder fieldsRequiringCastBuilder = ImmutableBitSet.builder(); + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + final int index = fieldCollation.getFieldIndex(); + final RelDataType unionType = unionFieldList.get(index).getType(); + final RelDataType inputType = input.getRowType().getFieldList().get(index).getType(); + if (!Objects.equals(unionType.getCollation(), inputType.getCollation())) { + fieldsRequiringCastBuilder.set(index); + } + } + final ImmutableBitSet fieldsRequiringCast = fieldsRequiringCastBuilder.build(); + final RelNode unsortedInput; + if (fieldsRequiringCast.isEmpty()) { + unsortedInput = input; + } else { + // At least one cast is required, generate the corresponding projection + builder.push(input); + final List<RexNode> fields = builder.fields(); + final List<RexNode> projFields = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + RexNode node = fields.get(i); + if (fieldsRequiringCast.get(i)) { + final RelDataType targetType = unionFieldList.get(i).getType(); + node = builder.getRexBuilder().makeCast(targetType, node); + } + projFields.add(node); + } + builder.project(projFields); + unsortedInput = builder.build(); + } + final RelNode newInput = + sort.copy(sort.getTraitSet(), unsortedInput, collation, null, inputFetch); inputs.add( convert(newInput, newInput.getTraitSet().replace(EnumerableConvention.INSTANCE))); } diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java index 1c591e4741..6f0fd0273a 100644 --- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java +++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableStringComparisonTest.java @@ -166,6 +166,40 @@ class EnumerableStringComparisonTest { + "name=Marketing; name0=Marketing"); } + /** Test case for + * <a href="https://issues.apache.org/jira/browse/CALCITE-5003">[CALCITE-5003] + * MergeUnion on types with different collators produces wrong result</a>. */ + @Test void testMergeUnionOnStringDifferentCollation() { + tester() + .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> + planner.removeRule(EnumerableRules.ENUMERABLE_UNION_RULE)) + .withRel(b -> { + final RelBuilder builder = b.transform(c -> c.withSimplifyValues(false)); + return builder + .values(builder.getTypeFactory().builder() + .add("name", + builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)).build(), + "facilities", "HR", "administration", "Marketing") + .values(createRecordVarcharSpecialCollation(builder), + "Marketing", "administration", "presales", "HR") + .union(false) + .sort(0) + .build(); + }) + .explainHookMatches("" // It is important that we have MergeUnion in the plan + + "EnumerableMergeUnion(all=[false])\n" + + " EnumerableSort(sort0=[$0], dir0=[ASC])\n" + + " EnumerableCalc(expr#0=[{inputs}], expr#1=[CAST($t0):VARCHAR COLLATE \"ISO-8859-1$en_US$tertiary$JAVA_COLLATOR\" NOT NULL], name=[$t1])\n" + + " EnumerableValues(tuples=[[{ 'facilities' }, { 'HR' }, { 'administration' }, { 'Marketing' }]])\n" + + " EnumerableSort(sort0=[$0], dir0=[ASC])\n" + + " EnumerableValues(tuples=[[{ 'Marketing' }, { 'administration' }, { 'presales' }, { 'HR' }]])\n") + .returnsOrdered("name=administration\n" + + "name=facilities\n" + + "name=HR\n" + + "name=Marketing\n" + + "name=presales"); + } + /** Test case for * <a href="https://issues.apache.org/jira/browse/CALCITE-4195">[CALCITE-4195] * Cast between types with different collators must be evaluated as not monotonic</a>. */