This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 103c73f [CALCITE-3920] Improve ORDER BY computation in Enumerable
convention by exploiting LIMIT (Thomas Rebele)
103c73f is described below
commit 103c73f639443e84fae4a600aa5ab05a8139cf91
Author: tre <[email protected]>
AuthorDate: Wed Aug 12 15:59:37 2020 +0200
[CALCITE-3920] Improve ORDER BY computation in Enumerable convention by
exploiting LIMIT (Thomas Rebele)
---
.../adapter/enumerable/EnumerableLimit.java | 8 +-
.../adapter/enumerable/EnumerableLimitSort.java | 155 +++++++++++++++++++
.../enumerable/EnumerableLimitSortRule.java | 60 ++++++++
.../adapter/enumerable/EnumerableRules.java | 3 +
.../calcite/adapter/enumerable/EnumerableSort.java | 4 +
.../org/apache/calcite/util/BuiltInMethod.java | 2 +
.../org/apache/calcite/test/RelOptRulesTest.java | 48 ++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 29 ++++
.../apache/calcite/linq4j/EnumerableDefaults.java | 94 ++++++++++++
.../apache/calcite/linq4j/test/LimitSortTest.java | 165 +++++++++++++++++++++
10 files changed, 564 insertions(+), 4 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
index 9b68a52..b7eae20 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimit.java
@@ -125,16 +125,16 @@ public class EnumerableLimit extends SingleRel implements
EnumerableRel {
return implementor.result(physType, builder.toBlock());
}
- private static Expression getExpression(RexNode offset) {
- if (offset instanceof RexDynamicParam) {
- final RexDynamicParam param = (RexDynamicParam) offset;
+ static Expression getExpression(RexNode rexNode) {
+ if (rexNode instanceof RexDynamicParam) {
+ final RexDynamicParam param = (RexDynamicParam) rexNode;
return Expressions.convert_(
Expressions.call(DataContext.ROOT,
BuiltInMethod.DATA_CONTEXT_GET.method,
Expressions.constant("?" + param.getIndex())),
Integer.class);
} else {
- return Expressions.constant(RexLiteral.intValue(offset));
+ return Expressions.constant(RexLiteral.intValue(rexNode));
}
}
}
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
new file mode 100644
index 0000000..9531303
--- /dev/null
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSort.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.enumerable;
+
+import org.apache.calcite.linq4j.tree.BlockBuilder;
+import org.apache.calcite.linq4j.tree.Expression;
+import org.apache.calcite.linq4j.tree.Expressions;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexDynamicParam;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Pair;
+
+import static
org.apache.calcite.adapter.enumerable.EnumerableLimit.getExpression;
+
+/**
+ * Implementation of {@link org.apache.calcite.rel.core.Sort} in
+ * {@link org.apache.calcite.adapter.enumerable.EnumerableConvention
enumerable calling convention}.
+ * It optimizes sorts that have a limit and an optional offset.
+ */
+public class EnumerableLimitSort extends Sort implements EnumerableRel {
+
+ /**
+ * Creates an EnumerableLimitSort.
+ *
+ * <p>Use {@link #create} unless you know what you're doing.
+ */
+ public EnumerableLimitSort(
+ RelOptCluster cluster,
+ RelTraitSet traitSet,
+ RelNode input,
+ RelCollation collation,
+ RexNode offset,
+ RexNode fetch) {
+ super(cluster, traitSet, input, collation, offset, fetch);
+ assert this.getConvention() instanceof EnumerableConvention;
+ assert this.getConvention() == input.getConvention();
+ }
+
+ /** Creates an EnumerableLimitSort. */
+ public static EnumerableLimitSort create(
+ RelNode input,
+ RelCollation collation,
+ RexNode offset,
+ RexNode fetch) {
+ final RelOptCluster cluster = input.getCluster();
+ final RelTraitSet traitSet =
cluster.traitSetOf(EnumerableConvention.INSTANCE).replace(
+ collation);
+ return new EnumerableLimitSort(cluster, traitSet, input, collation,
offset, fetch);
+ }
+
+ @Override public EnumerableLimitSort copy(
+ RelTraitSet traitSet,
+ RelNode newInput,
+ RelCollation newCollation,
+ RexNode offset,
+ RexNode fetch) {
+ return new EnumerableLimitSort(
+ this.getCluster(),
+ traitSet,
+ newInput,
+ newCollation,
+ offset,
+ fetch);
+ }
+
+ @Override public Result implement(EnumerableRelImplementor implementor,
Prefer pref) {
+ final BlockBuilder builder = new BlockBuilder();
+ final EnumerableRel child = (EnumerableRel) this.getInput();
+ final Result result = implementor.visitChild(this, 0, child, pref);
+ final PhysType physType = PhysTypeImpl.of(
+ implementor.getTypeFactory(),
+ this.getRowType(),
+ result.format);
+ final Expression childExp = builder.append("child", result.block);
+
+ final PhysType inputPhysType = result.physType;
+ final Pair<Expression, Expression> pair =
+
inputPhysType.generateCollationKey(this.collation.getFieldCollations());
+
+ final Expression fetchVal;
+ if (this.fetch == null) {
+ fetchVal = Expressions.constant(Integer.valueOf(Integer.MAX_VALUE));
+ } else {
+ fetchVal = getExpression(this.fetch);
+ }
+
+ final Expression offsetVal = this.offset == null ?
Expressions.constant(Integer.valueOf(0))
+ : getExpression(this.offset);
+
+ builder.add(
+ Expressions.return_(
+ null, Expressions.call(
+ BuiltInMethod.ORDER_BY_WITH_FETCH_AND_OFFSET.method,
Expressions.list(
+ childExp,
+ builder.append("keySelector", pair.left))
+ .appendIfNotNull(builder.appendIfNotNull("comparator",
pair.right))
+ .appendIfNotNull(
+ builder.appendIfNotNull("offset",
+ Expressions.constant(offsetVal)))
+ .appendIfNotNull(
+ builder.appendIfNotNull("fetch",
+ Expressions.constant(fetchVal)))
+ )));
+ return implementor.result(physType, builder.toBlock());
+ }
+
+ @Override public RelOptCost computeSelfCost(RelOptPlanner planner,
RelMetadataQuery mq) {
+ final double rowCount = mq.getRowCount(this.input).doubleValue();
+ double toSort = getValue(this.fetch, rowCount);
+ if (this.offset != null) {
+ toSort += getValue(this.offset, rowCount);
+ }
+ // we need to sort at most rowCount rows
+ toSort = Math.min(rowCount, toSort);
+
+ // we need to process rowCount rows, and for every row
+ // we search the key in a TreeMap with at most toSort entries
+ final double lookup = Math.max(1., Math.log(toSort));
+ final double bytesPerRow = this.getRowType().getFieldCount() * 4.;
+ final double cpu = (rowCount * lookup) * bytesPerRow;
+
+ RelOptCost cost = planner.getCostFactory().makeCost(rowCount, cpu, 0);
+ return cost;
+ }
+
+ private double getValue(RexNode r, double defaultValue) {
+ if (r == null || r instanceof RexDynamicParam) {
+ return defaultValue;
+ }
+ return RexLiteral.intValue(r);
+ }
+}
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
new file mode 100644
index 0000000..851e411
--- /dev/null
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableLimitSortRule.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.enumerable;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.logical.LogicalSort;
+
+/**
+ * Rule to convert an {@link EnumerableLimit} of on
+ * {@link EnumerableSort} into an {@link EnumerableLimitSort}.
+ */
+public class EnumerableLimitSortRule extends
RelRule<EnumerableLimitSortRule.Config> {
+
+ /**
+ * Creates a EnumerableLimitSortRule.
+ */
+ public EnumerableLimitSortRule(Config config) {
+ super(config);
+ }
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ final LogicalSort sort = call.rel(0);
+ RelNode input = sort.getInput();
+ final Sort o = EnumerableLimitSort.create(
+ convert(input,
input.getTraitSet().replace(EnumerableConvention.INSTANCE)),
+ sort.getCollation(),
+ sort.offset, sort.fetch
+ );
+
+ call.transformTo(o);
+ }
+
+ /** Rule configuration. */
+ public interface Config extends RelRule.Config {
+ Config DEFAULT = EMPTY.withOperandSupplier(
+ b0 -> b0.operand(LogicalSort.class).predicate(sort -> sort.fetch !=
null).anyInputs())
+ .as(Config.class);
+
+ @Override default EnumerableLimitSortRule toRule() {
+ return new EnumerableLimitSortRule(this);
+ }
+ }
+}
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
index 1222353..8816923 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableRules.java
@@ -86,6 +86,9 @@ public class EnumerableRules {
public static final EnumerableSortRule ENUMERABLE_SORT_RULE =
EnumerableSortRule.DEFAULT_CONFIG.toRule(EnumerableSortRule.class);
+ public static final EnumerableLimitSortRule ENUMERABLE_LIMIT_SORT_RULE =
+ EnumerableLimitSortRule.Config.DEFAULT.toRule();
+
public static final EnumerableLimitRule ENUMERABLE_LIMIT_RULE =
EnumerableLimitRule.Config.DEFAULT.toRule();
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
index 1cc8d7b..568a4ee 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableSort.java
@@ -28,6 +28,8 @@ import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.Pair;
+import com.google.common.base.Preconditions;
+
/** Implementation of {@link org.apache.calcite.rel.core.Sort} in
* {@link org.apache.calcite.adapter.enumerable.EnumerableConvention
enumerable calling convention}. */
public class EnumerableSort extends Sort implements EnumerableRel {
@@ -41,6 +43,8 @@ public class EnumerableSort extends Sort implements
EnumerableRel {
super(cluster, traitSet, input, collation, offset, fetch);
assert getConvention() instanceof EnumerableConvention;
assert getConvention() == input.getConvention();
+ Preconditions.checkArgument(fetch == null);
+ Preconditions.checkArgument(offset == null);
}
/** Creates an EnumerableSort. */
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 6dbc319..1521a1b 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -228,6 +228,8 @@ public enum BuiltInMethod {
Function2.class, Function1.class),
ORDER_BY(ExtendedEnumerable.class, "orderBy", Function1.class,
Comparator.class),
+ ORDER_BY_WITH_FETCH_AND_OFFSET(EnumerableDefaults.class, "orderBy",
Enumerable.class,
+ Function1.class, Comparator.class, int.class, int.class),
UNION(ExtendedEnumerable.class, "union", Enumerable.class),
CONCAT(ExtendedEnumerable.class, "concat", Enumerable.class),
REPEAT_UNION(EnumerableDefaults.class, "repeatUnion", Enumerable.class,
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index e5106e7..46a0477 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -1043,6 +1043,54 @@ class RelOptRulesTest extends RelOptTestBase {
.check();
}
+ /**
+ * Test if limit and sort are replaced by a limit sort.
+ * Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-3920">[CALCITE-3920]
+ * Improve ORDER BY computation in Enumerable convention by exploiting
LIMIT</a>.
+ */
+ @Test void testLimitSort() {
+ final String sql = "select mgr from sales.emp\n"
+ + "union select mgr from sales.emp\n"
+ + "order by mgr limit 10 offset 5";
+
+ VolcanoPlanner planner = new VolcanoPlanner(null, null);
+ planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+ RelOptUtil.registerDefaultRules(planner, false, false);
+ planner.addRule(EnumerableRules.ENUMERABLE_LIMIT_SORT_RULE);
+
+ Tester tester = createTester().withDecorrelation(true)
+ .withClusterFactory(
+ relOptCluster -> RelOptCluster.create(planner,
relOptCluster.getRexBuilder()));
+
+ RelRoot root = tester.convertSqlToRel(sql);
+
+ String planBefore = NL + RelOptUtil.toString(root.rel);
+ getDiffRepos().assertEquals("planBefore", "${planBefore}", planBefore);
+
+ RuleSet ruleSet =
+ RuleSets.ofList(
+ EnumerableRules.ENUMERABLE_SORT_RULE,
+ EnumerableRules.ENUMERABLE_LIMIT_RULE,
+ EnumerableRules.ENUMERABLE_LIMIT_SORT_RULE,
+ EnumerableRules.ENUMERABLE_PROJECT_RULE,
+ EnumerableRules.ENUMERABLE_FILTER_RULE,
+ EnumerableRules.ENUMERABLE_UNION_RULE,
+ EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE
+ );
+ Program program = Programs.of(ruleSet);
+
+ RelTraitSet toTraits =
+ root.rel.getCluster().traitSet()
+ .replace(0, EnumerableConvention.INSTANCE);
+
+ RelNode relAfter = program.run(planner, root.rel, toTraits,
+ Collections.emptyList(), Collections.emptyList());
+
+ String planAfter = NL + RelOptUtil.toString(relAfter);
+ getDiffRepos().assertEquals("planAfter", "${planAfter}", planAfter);
+ }
+
@Test void testSemiJoinRuleExists() {
final String sql = "select * from dept where exists (\n"
+ " select * from emp\n"
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index bd5176f..7662bf8 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -3806,6 +3806,35 @@ LogicalProject(SAL=[$5])
]]>
</Resource>
</TestCase>
+ <TestCase name="testLimitSort">
+ <Resource name="sql">
+ <![CDATA[select mgr from sales.emp
+union select mgr from sales.emp
+order by mgr limit 10 offset 5]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[5], fetch=[10])
+ LogicalProject(MGR=[$0])
+ LogicalUnion(all=[false])
+ LogicalProject(MGR=[$3])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(MGR=[$3])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+EnumerableLimitSort(sort0=[$0], dir0=[ASC], offset=[5], fetch=[10])
+ EnumerableProject(MGR=[$0])
+ EnumerableUnion(all=[false])
+ EnumerableProject(MGR=[$3])
+ EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+ EnumerableProject(MGR=[$3])
+ EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
<TestCase name="testMergeIntersect">
<Resource name="sql">
<![CDATA[select * from emp where deptno = 10
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index 512130d..e1b9de8 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -2625,6 +2625,100 @@ public abstract class EnumerableDefaults {
};
}
+
+ /**
+ * A sort implementation optimized for a sort with a fetch size (LIMIT).
+ * @param offset how many rows are skipped from the sorted output.
+ * Must be greater than or equal to 0.
+ * @param fetch how many rows are retrieved. Must be greater than or equal
to 0.
+ */
+ public static <TSource, TKey> Enumerable<TSource> orderBy(
+ Enumerable<TSource> source,
+ Function1<TSource, TKey> keySelector,
+ Comparator<TKey> comparator,
+ int offset, int fetch) {
+ // As discussed in CALCITE-3920 and CALCITE-4157, this method avoids to
sort the complete input,
+ // if only the first N rows are actually needed. A TreeMap implementation
has been chosen,
+ // so that it behaves similar to the orderBy method without fetch/offset.
+ // The TreeMap has a better performance if there are few distinct sort
keys.
+ return new AbstractEnumerable<TSource>() {
+ @Override public Enumerator<TSource> enumerator() {
+ if (fetch == 0) {
+ return Linq4j.emptyEnumerator();
+ }
+
+ TreeMap<TKey, List<TSource>> map = new TreeMap<>(comparator);
+ long size = 0;
+ long needed = fetch + offset;
+
+ // read the input into a tree map
+ try (Enumerator<TSource> os = source.enumerator()) {
+ while (os.moveNext()) {
+ TSource o = os.current();
+ TKey key = keySelector.apply(o);
+ if (needed >= 0 && size >= needed) {
+ // the current row will never appear in the output, so just skip
it
+ if (comparator.compare(key, map.lastKey()) >= 0) {
+ continue;
+ }
+ // remove last entry from tree map, so that we keep at most
'needed' rows
+ List<TSource> l = map.get(map.lastKey());
+ if (l.size() == 1) {
+ map.remove(map.lastKey());
+ } else {
+ l.remove(l.size() - 1);
+ }
+ size--;
+ }
+ // add the current element to the map
+ map.compute(key, (k, l) -> {
+ // for first entry, use a singleton list to save space
+ // when we go from 1 to 2 elements, switch to array list
+ if (l == null) {
+ return Collections.singletonList(o);
+ }
+ if (l.size() == 1) {
+ l = new ArrayList<>(l);
+ }
+ l.add(o);
+ return l;
+ });
+ size++;
+ }
+ }
+
+ // skip the first 'offset' rows by deleting them from the map
+ if (offset > 0) {
+ // search the key up to (but excluding) which we have to remove
entries from the map
+ int skipped = 0;
+ TKey until = null;
+ for (Map.Entry<TKey, List<TSource>> e : map.entrySet()) {
+ skipped += e.getValue().size();
+
+ if (skipped > offset) {
+ // we might need to remove entries from the list
+ List<TSource> l = e.getValue();
+ int toKeep = skipped - offset;
+ if (toKeep < l.size()) {
+ l.subList(0, l.size() - toKeep).clear();
+ }
+
+ until = e.getKey();
+ break;
+ }
+ }
+ if (until == null) {
+ // the offset is bigger than the number of rows in the map
+ return Linq4j.emptyEnumerator();
+ }
+ map.headMap(until).clear();
+ }
+
+ return new LookupImpl<>(map).valuesEnumerable().enumerator();
+ }
+ };
+ }
+
/**
* Sorts the elements of a sequence in descending
* order according to a key.
diff --git
a/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java
b/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java
new file mode 100644
index 0000000..6b04c61
--- /dev/null
+++ b/linq4j/src/test/java/org/apache/calcite/linq4j/test/LimitSortTest.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.linq4j.test;
+
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.EnumerableDefaults;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.function.Function1;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Performs a randomized test of {@link EnumerableDefaults#orderBy(Enumerable,
Function1, Comparator, int, int)}.
+ */
+class LimitSortTest {
+
+ /** Row class. */
+ private static class Row {
+ String key;
+ int index;
+
+ @Override public String toString() {
+ return this.key + "/" + this.index;
+ }
+ }
+
+ private Stream<Row> rowStream(long seed) {
+ Random rnd = new Random(seed);
+ int n = rnd.nextInt(1_000_000);
+ return IntStream.range(0, n).mapToObj(i -> {
+ int a = n < 2 ? 0 : rnd.nextInt(n / 2);
+ String k = Integer.toString(a, Character.MAX_RADIX);
+ Row r = new Row();
+ r.key = "" + k;
+ r.index = i;
+ return r;
+ });
+ }
+
+ private Enumerable<Row> enumerable(long seed) {
+ return Linq4j.asEnumerable(() -> this.rowStream(seed).iterator());
+ }
+
+ @Test void test() {
+ for (int i = 0; i < 5; i++) {
+ long seed = System.nanoTime() ^ System.currentTimeMillis();
+ try {
+ this.randomizedTest(seed);
+ } catch (AssertionError e) {
+ // replace with AssertionFailedError
+ throw new RuntimeException("Failed for seed " + seed, e);
+ }
+ }
+ }
+
+ private void randomizedTest(final long seed) {
+ Random rnd = new Random(seed);
+ int fetch = rnd.nextInt(10_000) + 1;
+ int tmp = rnd.nextInt(10_000);
+ int offset = Math.max(0, (int) (tmp - .1 * tmp));
+
+ Comparator<String> cmp = Comparator.<String>naturalOrder()::compare;
+ Enumerable<Row> ordered = EnumerableDefaults.orderBy(
+ this.enumerable(seed),
+ s -> s.key,
+ cmp,
+ offset, fetch
+ );
+
+ List<Row> result = ordered.toList();
+ assertTrue(
+ result.size() <= fetch,
+ "Fetch " + fetch + " has not been respected, result size was " +
result.size()
+ + ", offset " + offset);
+
+ // check result is sorted correctly
+ for (int i = 1; i < result.size(); i++) {
+ Row left = result.get(i - 1);
+ Row right = result.get(i);
+ // use left < right instead of <=, as rows might not appear twice
+ assertTrue(isSmaller(left, right),
+ "The following elements have not been ordered correctly: " + left +
" " + right);
+ }
+
+ // check offset and fetch size have been respected
+ Row first;
+ Row last;
+ if (result.isEmpty()) {
+ // may happen if the offset is bigger than the number of items
+ first = null;
+ last = null;
+ } else {
+ first = result.get(0);
+ last = result.get(result.size() - 1);
+ }
+
+ int totalItems = 0;
+ int actOffset = 0;
+ int actFetch = 0;
+ for (Row r : (Iterable<Row>) this.rowStream(seed)::iterator) {
+ totalItems++;
+ if (isSmaller(r, first)) {
+ actOffset++;
+ } else if (isSmallerEq(r, last)) {
+ actFetch++;
+ }
+ }
+
+ // we can skip at most 'totalItems'
+ int expOffset = Math.min(offset, totalItems);
+ assertEquals(expOffset, actOffset, "Offset has not been respected.");
+ // we can only fetch items if there are enough
+ int expFetch = Math.min(totalItems - expOffset, fetch);
+ assertEquals(expFetch, actFetch, "Fetch has not been respected.");
+ }
+
+ /** A comparison function that takes the order of creation into account. */
+ private static boolean isSmaller(Row left, Row right) {
+ if (right == null) {
+ return true;
+ }
+
+ int c = left.key.compareTo(right.key);
+ if (c != 0) {
+ return c < 0;
+ }
+ return left.index < right.index;
+ }
+
+ /** See {@link #isSmaller(Row, Row)}. */
+ private static boolean isSmallerEq(Row left, Row right) {
+ if (right == null) {
+ return true;
+ }
+
+ int c = left.key.compareTo(right.key);
+ if (c != 0) {
+ return c < 0;
+ }
+ return left.index <= right.index;
+ }
+}