[calcite] branch master updated: [CALCITE-3920] Improve ORDER BY computation in Enumerable convention by exploiting LIMIT (Thomas Rebele)

rubenql Fri, 04 Sep 2020 00:53:16 -0700

This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git



The following commit(s) were added to refs/heads/master by this push:
     new 103c73f  [CALCITE-3920] Improve ORDER BY computation in Enumerable 
convention by exploiting LIMIT (Thomas Rebele)
103c73f is described below

commit 103c73f639443e84fae4a600aa5ab05a8139cf91
Author: tre <[email protected]>
AuthorDate: Wed Aug 12 15:59:37 2020 +0200

    [CALCITE-3920] Improve ORDER BY computation in Enumerable convention by 
exploiting LIMIT (Thomas Rebele)
---
 .../adapter/enumerable/EnumerableLimit.java        |   8 +-
 .../adapter/enumerable/EnumerableLimitSort.java    | 155 +++++++++++++++++++
 .../enumerable/EnumerableLimitSortRule.java        |  60 ++++++++
 .../adapter/enumerable/EnumerableRules.java        |   3 +
 .../calcite/adapter/enumerable/EnumerableSort.java |   4 +
 .../org/apache/calcite/util/BuiltInMethod.java     |   2 +
 .../org/apache/calcite/test/RelOptRulesTest.java   |  48 ++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    |  29 ++++
 .../apache/calcite/linq4j/EnumerableDefaults.java  |  94 ++++++++++++
 .../apache/calcite/linq4j/test/LimitSortTest.java  | 165 +++++++++++++++++++++
 10 files changed, 564 insertions(+), 4 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
index 9b68a52..b7eae20 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
@@ -125,16 +125,16 @@ public class EnumerableLimit extends SingleRel implements 
EnumerableRel {
     return implementor.result(physType, builder.toBlock());
   }
 
-  private static Expression getExpression(RexNode offset) {
-    if (offset instanceof RexDynamicParam) {
-      final RexDynamicParam param = (RexDynamicParam) offset;
+  static Expression getExpression(RexNode rexNode) {
+    if (rexNode instanceof RexDynamicParam) {
+      final RexDynamicParam param = (RexDynamicParam) rexNode;
       return Expressions.convert_(
           Expressions.call(DataContext.ROOT,
               BuiltInMethod.DATA_CONTEXT_GET.method,
               Expressions.constant("?" + param.getIndex())),
           Integer.class);
     } else {
-      return Expressions.constant(RexLiteral.intValue(offset));
+      return Expressions.constant(RexLiteral.intValue(rexNode));
     }
   }
 }
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
new file mode 100644
index 0000000..9531303
--- /dev/null
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.enumerable;
+
+import org.apache.calcite.linq4j.tree.BlockBuilder;
+import org.apache.calcite.linq4j.tree.Expression;
+import org.apache.calcite.linq4j.tree.Expressions;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexDynamicParam;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Pair;
+
+import static 
org.apache.calcite.adapter.enumerable.EnumerableLimit.getExpression;
+
+/**
+ * Implementation of {@link org.apache.calcite.rel.core.Sort} in
+ * {@link org.apache.calcite.adapter.enumerable.EnumerableConvention 
enumerable calling convention}.
+ * It optimizes sorts that have a limit and an optional offset.
+ */
+public class EnumerableLimitSort extends Sort implements EnumerableRel {
+
+  /**
+   * Creates an EnumerableLimitSort.
+   *
+   * <p>Use {@link #create} unless you know what you're doing.
+   */
+  public EnumerableLimitSort(
+      RelOptCluster cluster,
+      RelTraitSet traitSet,
+      RelNode input,
+      RelCollation collation,
+      RexNode offset,
+      RexNode fetch) {
+    super(cluster, traitSet, input, collation, offset, fetch);
+    assert this.getConvention() instanceof EnumerableConvention;
+    assert this.getConvention() == input.getConvention();
+  }
+
+  /** Creates an EnumerableLimitSort. */
+  public static EnumerableLimitSort create(
+      RelNode input,
+      RelCollation collation,
+      RexNode offset,
+      RexNode fetch) {
+    final RelOptCluster cluster = input.getCluster();
+    final RelTraitSet traitSet = 
cluster.traitSetOf(EnumerableConvention.INSTANCE).replace(
+        collation);
+    return new EnumerableLimitSort(cluster, traitSet, input, collation, 
offset, fetch);
+  }
+
+  @Override public EnumerableLimitSort copy(
+      RelTraitSet traitSet,
+      RelNode newInput,
+      RelCollation newCollation,
+      RexNode offset,
+      RexNode fetch) {
+    return new EnumerableLimitSort(
+        this.getCluster(),
+        traitSet,
+        newInput,
+        newCollation,
+        offset,
+        fetch);
+  }
+
+  @Override public Result implement(EnumerableRelImplementor implementor, 
Prefer pref) {
+    final BlockBuilder builder = new BlockBuilder();
+    final EnumerableRel child = (EnumerableRel) this.getInput();
+    final Result result = implementor.visitChild(this, 0, child, pref);
+    final PhysType physType = PhysTypeImpl.of(
+        implementor.getTypeFactory(),
+        this.getRowType(),
+        result.format);
+    final Expression childExp = builder.append("child", result.block);
+
+    final PhysType inputPhysType = result.physType;
+    final Pair<Expression, Expression> pair =
+        
inputPhysType.generateCollationKey(this.collation.getFieldCollations());
+
+    final Expression fetchVal;
+    if (this.fetch == null) {
+      fetchVal = Expressions.constant(Integer.valueOf(Integer.MAX_VALUE));
+    } else {
+      fetchVal = getExpression(this.fetch);
+    }
+
+    final Expression offsetVal = this.offset == null ? 
Expressions.constant(Integer.valueOf(0))
+        : getExpression(this.offset);
+
+    builder.add(
+        Expressions.return_(
+            null, Expressions.call(
+                BuiltInMethod.ORDER_BY_WITH_FETCH_AND_OFFSET.method, 
Expressions.list(
+                    childExp,
+                    builder.append("keySelector", pair.left))
+                    .appendIfNotNull(builder.appendIfNotNull("comparator", 
pair.right))
+                    .appendIfNotNull(
+                        builder.appendIfNotNull("offset",
+                            Expressions.constant(offsetVal)))
+                    .appendIfNotNull(
+                        builder.appendIfNotNull("fetch",
+                            Expressions.constant(fetchVal)))
+            )));
+    return implementor.result(physType, builder.toBlock());
+  }
+
+  @Override public RelOptCost computeSelfCost(RelOptPlanner planner, 
RelMetadataQuery mq) {
+    final double rowCount = mq.getRowCount(this.input).doubleValue();
+    double toSort = getValue(this.fetch, rowCount);
+    if (this.offset != null) {
+      toSort += getValue(this.offset, rowCount);
+    }
+    // we need to sort at most rowCount rows
+    toSort = Math.min(rowCount, toSort);
+
+    // we need to process rowCount rows, and for every row
+    // we search the key in a TreeMap with at most toSort entries
+    final double lookup = Math.max(1., Math.log(toSort));
+    final double bytesPerRow = this.getRowType().getFieldCount() * 4.;
+    final double cpu = (rowCount * lookup) * bytesPerRow;
+
+    RelOptCost cost = planner.getCostFactory().makeCost(rowCount, cpu, 0);
+    return cost;
+  }
+
+  private double getValue(RexNode r, double defaultValue) {
+    if (r == null || r instanceof RexDynamicParam) {
+      return defaultValue;
+    }
+    return RexLiteral.intValue(r);
+  }
+}
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
new file mode 100644
index 0000000..851e411
--- /dev/null
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.enumerable;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.logical.LogicalSort;
+
+/**
+ * Rule to convert an {@link EnumerableLimit} of on
+ * {@link EnumerableSort} into an {@link EnumerableLimitSort}.
+ */
+public class EnumerableLimitSortRule extends 
RelRule<EnumerableLimitSortRule.Config> {
+
+  /**
+   * Creates a EnumerableLimitSortRule.
+   */
+  public EnumerableLimitSortRule(Config config) {
+    super(config);
+  }
+
+  @Override public void onMatch(RelOptRuleCall call) {
+    final LogicalSort sort = call.rel(0);
+    RelNode input = sort.getInput();
+    final Sort o = EnumerableLimitSort.create(
+        convert(input, 
input.getTraitSet().replace(EnumerableConvention.INSTANCE)),
+        sort.getCollation(),
+        sort.offset, sort.fetch
+    );
+
+    call.transformTo(o);
+  }
+
+  /** Rule configuration. */
+  public interface Config extends RelRule.Config {
+    Config DEFAULT = EMPTY.withOperandSupplier(
+        b0 -> b0.operand(LogicalSort.class).predicate(sort -> sort.fetch != 
null).anyInputs())
+        .as(Config.class);
+
+    @Override default EnumerableLimitSortRule toRule() {
+      return new EnumerableLimitSortRule(this);
+    }
+  }
+}
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
index 1222353..8816923 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
@@ -86,6 +86,9 @@ public class EnumerableRules {
   public static final EnumerableSortRule ENUMERABLE_SORT_RULE =
       EnumerableSortRule.DEFAULT_CONFIG.toRule(EnumerableSortRule.class);
 
+  public static final EnumerableLimitSortRule ENUMERABLE_LIMIT_SORT_RULE =
+      EnumerableLimitSortRule.Config.DEFAULT.toRule();
+
   public static final EnumerableLimitRule ENUMERABLE_LIMIT_RULE =
       EnumerableLimitRule.Config.DEFAULT.toRule();
 
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
index 1cc8d7b..568a4ee 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
@@ -28,6 +28,8 @@ import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.Pair;
 
+import com.google.common.base.Preconditions;
+
 /** Implementation of {@link org.apache.calcite.rel.core.Sort} in
  * {@link org.apache.calcite.adapter.enumerable.EnumerableConvention 
enumerable calling convention}. */
 public class EnumerableSort extends Sort implements EnumerableRel {
@@ -41,6 +43,8 @@ public class EnumerableSort extends Sort implements 
EnumerableRel {
     super(cluster, traitSet, input, collation, offset, fetch);
     assert getConvention() instanceof EnumerableConvention;
     assert getConvention() == input.getConvention();
+    Preconditions.checkArgument(fetch == null);
+    Preconditions.checkArgument(offset == null);
   }
 
   /** Creates an EnumerableSort. */
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 6dbc319..1521a1b 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -228,6 +228,8 @@ public enum BuiltInMethod {
       Function2.class, Function1.class),
   ORDER_BY(ExtendedEnumerable.class, "orderBy", Function1.class,
       Comparator.class),
+  ORDER_BY_WITH_FETCH_AND_OFFSET(EnumerableDefaults.class, "orderBy", 
Enumerable.class,
+      Function1.class, Comparator.class, int.class, int.class),
   UNION(ExtendedEnumerable.class, "union", Enumerable.class),
   CONCAT(ExtendedEnumerable.class, "concat", Enumerable.class),
   REPEAT_UNION(EnumerableDefaults.class, "repeatUnion", Enumerable.class,
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index e5106e7..46a0477 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -1043,6 +1043,54 @@ class RelOptRulesTest extends RelOptTestBase {
         .check();
   }
 
+  /**
+   * Test if limit and sort are replaced by a limit sort.
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-3920";>[CALCITE-3920]
+   * Improve ORDER BY computation in Enumerable convention by exploiting 
LIMIT</a>.
+   */
+  @Test void testLimitSort() {
+    final String sql = "select mgr from sales.emp\n"
+        + "union select mgr from sales.emp\n"
+        + "order by mgr limit 10 offset 5";
+
+    VolcanoPlanner planner = new VolcanoPlanner(null, null);
+    planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+    RelOptUtil.registerDefaultRules(planner, false, false);
+    planner.addRule(EnumerableRules.ENUMERABLE_LIMIT_SORT_RULE);
+
+    Tester tester = createTester().withDecorrelation(true)
+        .withClusterFactory(
+            relOptCluster -> RelOptCluster.create(planner, 
relOptCluster.getRexBuilder()));
+
+    RelRoot root = tester.convertSqlToRel(sql);
+
+    String planBefore = NL + RelOptUtil.toString(root.rel);
+    getDiffRepos().assertEquals("planBefore", "${planBefore}", planBefore);
+
+    RuleSet ruleSet =
+        RuleSets.ofList(
+            EnumerableRules.ENUMERABLE_SORT_RULE,
+            EnumerableRules.ENUMERABLE_LIMIT_RULE,
+            EnumerableRules.ENUMERABLE_LIMIT_SORT_RULE,
+            EnumerableRules.ENUMERABLE_PROJECT_RULE,
+            EnumerableRules.ENUMERABLE_FILTER_RULE,
+            EnumerableRules.ENUMERABLE_UNION_RULE,
+            EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE
+        );
+    Program program = Programs.of(ruleSet);
+
+    RelTraitSet toTraits =
+        root.rel.getCluster().traitSet()
+            .replace(0, EnumerableConvention.INSTANCE);
+
+    RelNode relAfter = program.run(planner, root.rel, toTraits,
+        Collections.emptyList(), Collections.emptyList());
+
+    String planAfter = NL + RelOptUtil.toString(relAfter);
+    getDiffRepos().assertEquals("planAfter", "${planAfter}", planAfter);
+  }
+
   @Test void testSemiJoinRuleExists() {
     final String sql = "select * from dept where exists (\n"
         + "  select * from emp\n"
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index bd5176f..7662bf8 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -3806,6 +3806,35 @@ LogicalProject(SAL=[$5])
 ]]>
         </Resource>
     </TestCase>
+    <TestCase name="testLimitSort">
+        <Resource name="sql">
+            <![CDATA[select mgr from sales.emp
+union select mgr from sales.emp
+order by mgr limit 10 offset 5]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[5], fetch=[10])
+  LogicalProject(MGR=[$0])
+    LogicalUnion(all=[false])
+      LogicalProject(MGR=[$3])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalProject(MGR=[$3])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+EnumerableLimitSort(sort0=[$0], dir0=[ASC], offset=[5], fetch=[10])
+  EnumerableProject(MGR=[$0])
+    EnumerableUnion(all=[false])
+      EnumerableProject(MGR=[$3])
+        EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+      EnumerableProject(MGR=[$3])
+        EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+    </TestCase>
     <TestCase name="testMergeIntersect">
         <Resource name="sql">
             <![CDATA[select * from emp where deptno = 10
diff --git 
a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java 
b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index 512130d..e1b9de8 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -2625,6 +2625,100 @@ public abstract class EnumerableDefaults {
     };
   }
 
+
+  /**
+   * A sort implementation optimized for a sort with a fetch size (LIMIT).
+   * @param offset how many rows are skipped from the sorted output.
+   *               Must be greater than or equal to 0.
+   * @param fetch how many rows are retrieved. Must be greater than or equal 
to 0.
+   */
+  public static <TSource, TKey> Enumerable<TSource> orderBy(
+      Enumerable<TSource> source,
+      Function1<TSource, TKey> keySelector,
+      Comparator<TKey> comparator,
+      int offset, int fetch) {
+    // As discussed in CALCITE-3920 and CALCITE-4157, this method avoids to 
sort the complete input,
+    // if only the first N rows are actually needed. A TreeMap implementation 
has been chosen,
+    // so that it behaves similar to the orderBy method without fetch/offset.
+    // The TreeMap has a better performance if there are few distinct sort 
keys.
+    return new AbstractEnumerable<TSource>() {
+      @Override public Enumerator<TSource> enumerator() {
+        if (fetch == 0) {
+          return Linq4j.emptyEnumerator();
+        }
+
+        TreeMap<TKey, List<TSource>> map = new TreeMap<>(comparator);
+        long size = 0;
+        long needed = fetch + offset;
+
+        // read the input into a tree map
+        try (Enumerator<TSource> os = source.enumerator()) {
+          while (os.moveNext()) {
+            TSource o = os.current();
+            TKey key = keySelector.apply(o);
+            if (needed >= 0 && size >= needed) {
+              // the current row will never appear in the output, so just skip 
it
+              if (comparator.compare(key, map.lastKey()) >= 0) {
+                continue;
+              }
+              // remove last entry from tree map, so that we keep at most 
'needed' rows
+              List<TSource> l = map.get(map.lastKey());
+              if (l.size() == 1) {
+                map.remove(map.lastKey());
+              } else {
+                l.remove(l.size() - 1);
+              }
+              size--;
+            }
+            // add the current element to the map
+            map.compute(key, (k, l) -> {
+              // for first entry, use a singleton list to save space
+              // when we go from 1 to 2 elements, switch to array list
+              if (l == null) {
+                return Collections.singletonList(o);
+              }
+              if (l.size() == 1) {
+                l = new ArrayList<>(l);
+              }
+              l.add(o);
+              return l;
+            });
+            size++;
+          }
+        }
+
+        // skip the first 'offset' rows by deleting them from the map
+        if (offset > 0) {
+          // search the key up to (but excluding) which we have to remove 
entries from the map
+          int skipped = 0;
+          TKey until = null;
+          for (Map.Entry<TKey, List<TSource>> e : map.entrySet()) {
+            skipped += e.getValue().size();
+
+            if (skipped > offset) {
+              // we might need to remove entries from the list
+              List<TSource> l = e.getValue();
+              int toKeep = skipped - offset;
+              if (toKeep < l.size()) {
+                l.subList(0, l.size() - toKeep).clear();
+              }
+
+              until = e.getKey();
+              break;
+            }
+          }
+          if (until == null) {
+            // the offset is bigger than the number of rows in the map
+            return Linq4j.emptyEnumerator();
+          }
+          map.headMap(until).clear();
+        }
+
+        return new LookupImpl<>(map).valuesEnumerable().enumerator();
+      }
+    };
+  }
+
   /**
    * Sorts the elements of a sequence in descending
    * order according to a key.
diff --git 
a/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java 
b/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java
new file mode 100644
index 0000000..6b04c61
--- /dev/null
+++ b/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.linq4j.test;
+
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.EnumerableDefaults;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.function.Function1;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Performs a randomized test of {@link EnumerableDefaults#orderBy(Enumerable, 
Function1, Comparator, int, int)}.
+ */
+class LimitSortTest {
+
+  /** Row class. */
+  private static class Row {
+    String key;
+    int index;
+
+    @Override public String toString() {
+      return this.key + "/" + this.index;
+    }
+  }
+
+  private Stream<Row> rowStream(long seed) {
+    Random rnd = new Random(seed);
+    int n = rnd.nextInt(1_000_000);
+    return IntStream.range(0, n).mapToObj(i -> {
+      int a = n < 2 ? 0 : rnd.nextInt(n / 2);
+      String k = Integer.toString(a, Character.MAX_RADIX);
+      Row r = new Row();
+      r.key = "" + k;
+      r.index = i;
+      return r;
+    });
+  }
+
+  private Enumerable<Row> enumerable(long seed) {
+    return Linq4j.asEnumerable(() -> this.rowStream(seed).iterator());
+  }
+
+  @Test void test() {
+    for (int i = 0; i < 5; i++) {
+      long seed = System.nanoTime() ^ System.currentTimeMillis();
+      try {
+        this.randomizedTest(seed);
+      } catch (AssertionError e) {
+        // replace with AssertionFailedError
+        throw new RuntimeException("Failed for seed " + seed, e);
+      }
+    }
+  }
+
+  private void randomizedTest(final long seed) {
+    Random rnd = new Random(seed);
+    int fetch = rnd.nextInt(10_000) + 1;
+    int tmp = rnd.nextInt(10_000);
+    int offset = Math.max(0, (int) (tmp - .1 * tmp));
+
+    Comparator<String> cmp = Comparator.<String>naturalOrder()::compare;
+    Enumerable<Row> ordered = EnumerableDefaults.orderBy(
+        this.enumerable(seed),
+        s -> s.key,
+        cmp,
+        offset, fetch
+    );
+
+    List<Row> result = ordered.toList();
+    assertTrue(
+        result.size() <= fetch,
+        "Fetch " + fetch + " has not been respected, result size was " + 
result.size()
+            + ", offset " + offset);
+
+    // check result is sorted correctly
+    for (int i = 1; i < result.size(); i++) {
+      Row left = result.get(i - 1);
+      Row right = result.get(i);
+      // use left < right instead of <=, as rows might not appear twice
+      assertTrue(isSmaller(left, right),
+          "The following elements have not been ordered correctly: " + left + 
" " + right);
+    }
+
+    // check offset and fetch size have been respected
+    Row first;
+    Row last;
+    if (result.isEmpty()) {
+      // may happen if the offset is bigger than the number of items
+      first = null;
+      last = null;
+    } else {
+      first = result.get(0);
+      last = result.get(result.size() - 1);
+    }
+
+    int totalItems = 0;
+    int actOffset = 0;
+    int actFetch = 0;
+    for (Row r : (Iterable<Row>) this.rowStream(seed)::iterator) {
+      totalItems++;
+      if (isSmaller(r, first)) {
+        actOffset++;
+      } else if (isSmallerEq(r, last)) {
+        actFetch++;
+      }
+    }
+
+    // we can skip at most 'totalItems'
+    int expOffset = Math.min(offset, totalItems);
+    assertEquals(expOffset, actOffset, "Offset has not been respected.");
+    // we can only fetch items if there are enough
+    int expFetch = Math.min(totalItems - expOffset, fetch);
+    assertEquals(expFetch, actFetch, "Fetch has not been respected.");
+  }
+
+  /** A comparison function that takes the order of creation into account. */
+  private static boolean isSmaller(Row left, Row right) {
+    if (right == null) {
+      return true;
+    }
+
+    int c = left.key.compareTo(right.key);
+    if (c != 0) {
+      return c < 0;
+    }
+    return left.index < right.index;
+  }
+
+  /** See {@link #isSmaller(Row, Row)}. */
+  private static boolean isSmallerEq(Row left, Row right) {
+    if (right == null) {
+      return true;
+    }
+
+    int c = left.key.compareTo(right.key);
+    if (c != 0) {
+      return c < 0;
+    }
+    return left.index <= right.index;
+  }
+}

[calcite] branch master updated: [CALCITE-3920] Improve ORDER BY computation in Enumerable convention by exploiting LIMIT (Thomas Rebele)

Reply via email to