This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 8ea4160f10 [CALCITE-5722] Sarg.isComplementedPoints fails with anti-points which are equal under `compareTo` but not `equals` 8ea4160f10 is described below commit 8ea4160f10e95aca6c3b0029d505bbc56975a873 Author: ian.bertolacci <ian.bertola...@workday.com> AuthorDate: Wed May 24 12:49:40 2023 -0700 [CALCITE-5722] Sarg.isComplementedPoints fails with anti-points which are equal under `compareTo` but not `equals` BigDecimal is a type that implements Comparable but whose natural order is not consistent with equals. For example, if x = BigDecimal("1.0") and and y = BigDecimal("1"), x.equals(y) returns false and x.compareTo(y) returns 0. Guava RangeSet is OK with such types, and Sarg should be also. Before this fix, Sarg could not deduce that z < 1 or 1.0 < z is equivalent to z <> 1 Close apache/calcite#3224 --- .../java/org/apache/calcite/util/RangeSets.java | 10 +- .../org/apache/calcite/rex/RexProgramTest.java | 54 +++++++++ .../java/org/apache/calcite/util/RangeSetTest.java | 125 ++++++++++++++++++++- .../java/org/apache/calcite/test/Matchers.java | 14 ++- 4 files changed, 192 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/util/RangeSets.java b/core/src/main/java/org/apache/calcite/util/RangeSets.java index a0967e2383..465ae83cc8 100644 --- a/core/src/main/java/org/apache/calcite/util/RangeSets.java +++ b/core/src/main/java/org/apache/calcite/util/RangeSets.java @@ -129,7 +129,7 @@ public class RangeSets { public static <C extends Comparable<C>> boolean isPoint(Range<C> range) { return range.hasLowerBound() && range.hasUpperBound() - && range.lowerEndpoint().equals(range.upperEndpoint()) + && range.lowerEndpoint().compareTo(range.upperEndpoint()) == 0 && !range.isEmpty(); } @@ -183,6 +183,10 @@ public class RangeSets { if (range.upperBoundType() == BoundType.OPEN) { return handler.closedOpen(lower, upper); } else { + // We use .equals rather than .compareTo, because if the endpoints + // are not equal the range could not have been created using + // Range.singleton. (If they are equal, we cannot distinguish + // between closed(v, v) but assume singleton(v).) if (lower.equals(upper)) { return handler.singleton(lower); } else { @@ -249,6 +253,10 @@ public class RangeSets { if (range.upperBoundType() == BoundType.OPEN) { consumer.closedOpen(lower, upper); } else { + // We use .equals rather than .compareTo, because if the endpoints + // are not equal the range could not have been created using + // Range.singleton. (If they are equal, we cannot distinguish + // between closed(v, v) but assume singleton(v).) if (lower.equals(upper)) { consumer.singleton(lower); } else { diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java index e7663ba51f..1a356e75a9 100644 --- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java +++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java @@ -1713,6 +1713,60 @@ class RexProgramTest extends RexProgramTestBase { .expandedSearch(expanded); } + @Test void testSimplifyRange8() { + final RexNode aRef = input(tInt(true), 0); + // a not in (1.0, 2.0) + RexNode expr = + not( + in(aRef, + literal(new BigDecimal("1.0")), + literal(new BigDecimal("2.0")))); + final String simplified = "SEARCH($0, " + + "Sarg[(-\u221e..1.0:DECIMAL(2, 1))," + + " (1.0:DECIMAL(2, 1)..2.0:DECIMAL(2, 1))," + + " (2.0:DECIMAL(2, 1)..+\u221e)]:DECIMAL(2, 1))"; + checkSimplify(expr, simplified); + + // The following identical to previous, and simplifies to the same: + // a < 1.0 or (1.0 < a and a < 2.0) or 2.0 < a + RexNode expr2 = + or(lt(aRef, literal(new BigDecimal("1.0"))), + and(lt(literal(new BigDecimal("1.0")), aRef), + lt(aRef, literal(new BigDecimal("2.0")))), + lt(literal(new BigDecimal("2.0")), aRef)); + checkSimplify(expr2, simplified); + + // Identical to previous, except one "2.0" is "2": + // a < 1.0 or (1.0 < a and a < 2.0) or 2.00 < a + RexNode expr3 = + or(lt(aRef, literal(new BigDecimal("1.0"))), + and(lt(literal(new BigDecimal("1.0")), aRef), + lt(aRef, literal(new BigDecimal("2.0")))), + lt(literal(new BigDecimal("2")), aRef)); + final String simplified3 = "SEARCH($0, " + + "Sarg[(-\u221e..1.0:DECIMAL(11, 1))," + + " (1.0:DECIMAL(11, 1)..2.0:DECIMAL(11, 1))," + + " (2:DECIMAL(11, 1)..+\u221e)]:DECIMAL(11, 1))"; + checkSimplify(expr3, simplified3); + } + + @Test void testSimplifyRange9() { + // Contiguous ranges can be merged: + // a > 1 and <= 2 or a > 2.0 and a < 3 + // becomes + // a > 1 and a < 3 + final RexNode aRef = input(tInt(true), 0); + RexNode expr = + or( + and(gt(aRef, literal(1)), + le(aRef, literal(2))), + and(gt(aRef, literal(new BigDecimal("2.0"))), + lt(aRef, literal(3)))); + final String simplified = "SEARCH($0, " + + "Sarg[(1:DECIMAL(11, 1)..3:DECIMAL(11, 1))]:DECIMAL(11, 1))"; + checkSimplify(expr, simplified); + } + /** Unit test for * <a href="https://issues.apache.org/jira/browse/CALCITE-4352">[CALCITE-4352] * OR simplification incorrectly loses term</a>. */ diff --git a/core/src/test/java/org/apache/calcite/util/RangeSetTest.java b/core/src/test/java/org/apache/calcite/util/RangeSetTest.java index aa619ff3f1..76dab211a4 100644 --- a/core/src/test/java/org/apache/calcite/util/RangeSetTest.java +++ b/core/src/test/java/org/apache/calcite/util/RangeSetTest.java @@ -17,6 +17,7 @@ package org.apache.calcite.util; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.rel.externalize.RelJson; +import org.apache.calcite.test.Matchers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableRangeSet; @@ -29,16 +30,20 @@ import org.junit.jupiter.api.Test; import java.math.BigDecimal; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.function.BiConsumer; +import java.util.function.Function; import static org.apache.calcite.test.Matchers.isRangeSet; import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.hasToString; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import static java.util.Arrays.asList; /** * Unit test for {@link RangeSets} and other utilities relating to Guava @@ -155,6 +160,118 @@ class RangeSetTest { assertThat(RangeSets.isPoint(Range.atMost(0)), is(false)); assertThat(RangeSets.isPoint(Range.greaterThan(0)), is(false)); assertThat(RangeSets.isPoint(Range.atLeast(0)), is(false)); + + // Test situation where endpoints of closed range are equal under + // Comparable.compareTo but not T.equals. + final BigDecimal one = new BigDecimal("1"); + final BigDecimal onePoint = new BigDecimal("1.0"); + assertThat(RangeSets.isPoint(Range.closed(one, onePoint)), is(true)); + } + + /** Tests ranges with a data type that implements {@link Comparable} + * but is not consistent with {@code equals}. + * + * <p>Per {@link Comparable}: + * + * <blockquote> + * Virtually all Java core classes that implement Comparable have natural + * orderings that are consistent with equals. One exception is + * {@link BigDecimal}, whose natural ordering equates {@code BigDecimal} + * objects with equal numerical values and different representations + * (such as 4.0 and 4.00). For {@link BigDecimal#equals} to return true, + * the representation and numerical value of the two {@code BigDecimal} + * objects must be the same. + * </blockquote> + */ + @Test void testNotConsistentWithEquals() { + final BigDecimal one = new BigDecimal("1"); + final BigDecimal onePoint = new BigDecimal("1.0"); + final BigDecimal two = BigDecimal.valueOf(2); + assertThat(one.equals(onePoint), is(false)); + assertThat(onePoint.equals(one), is(false)); + assertThat(one.compareTo(onePoint), is(0)); + assertThat(onePoint.equals(one), is(false)); + assertThat(one.equals(BigDecimal.ONE), is(true)); + assertThat(onePoint.equals(BigDecimal.ONE), is(false)); + assertThat(RangeSets.isPoint(Range.closed(one, onePoint)), is(true)); + + // Ranges (0, 1], [1.0, 2) merges to [0, 2). + final Range<BigDecimal> range01 = Range.closed(BigDecimal.ZERO, one); + final Range<BigDecimal> range01point = Range.closed(BigDecimal.ZERO, onePoint); + final Range<BigDecimal> range1point2 = Range.closedOpen(onePoint, two); + final Range<BigDecimal> range12 = Range.closedOpen(one, two); + assertThrows(IllegalArgumentException.class, + () -> ImmutableRangeSet.<BigDecimal>builder() + .add(range01point) + .add(range12) + .build()); + + assertThat(RangeSets.compare(range01, range12), is(-1)); + assertThat(RangeSets.compare(range12, range01), is(1)); + assertThat(RangeSets.compare(range01, range01point), is(0)); + assertThat(RangeSets.compare(range12, range1point2), is(0)); + + // Ranges are merged correctly. + final ImmutableRangeSet<BigDecimal> rangeSet = + unionRangeSet(asList(range01point, range12)); + final ImmutableRangeSet<BigDecimal> rangeSet2 = + unionRangeSet(asList(range01, range12)); + final ImmutableRangeSet<BigDecimal> rangeSet3 = + unionRangeSet(asList(range01, range1point2)); + assertThat(rangeSet.asRanges(), hasSize(1)); + assertThat(rangeSet, is(rangeSet2)); + assertThat(rangeSet, is(rangeSet3)); + + // Check the Consumer mechanism; RangeSets.printer is a Consumer, + // and it gives the Consumer mechanism a pretty good workout. + final Function<RangeSet<BigDecimal>, String> f = + rs -> { + final StringBuilder buf = new StringBuilder(); + final RangeSets.Consumer<BigDecimal> printer = + RangeSets.printer(buf, StringBuilder::append); + RangeSets.forEach(rs, printer); + return Matchers.sanitizeRangeSet(buf.toString()); + }; + final Function<Range<BigDecimal>, String> f2 = + r -> f.apply(ImmutableRangeSet.of(r)); + assertThat(f.apply(rangeSet), is("[0..2)")); + + // If a closed range has bounds that are equal, Consumer should treat them + // as singletons of the lower bound; but not if the bounds compareTo 0. + assertThat(f2.apply(Range.singleton(onePoint)), is("1.0")); + assertThat(f2.apply(Range.closed(one, one)), is("1")); + assertThat(f2.apply(Range.closed(one, onePoint)), is("[1..1.0]")); + assertThat(f2.apply(Range.closed(onePoint, one)), is("[1.0..1]")); + assertThat(f2.apply(Range.closed(onePoint, onePoint)), is("1.0")); + assertThat(f2.apply(Range.closed(onePoint, two)), is("[1.0..2]")); + + // As for Consumer, now for Handler. + // RangeSets.copy tests Handler pretty thoroughly. + final Function<RangeSet<BigDecimal>, RangeSet<BigDecimal>> g = + rs -> RangeSets.copy(rs, v -> v.multiply(two)); + final Function<Range<BigDecimal>, RangeSet<BigDecimal>> g2 = + r -> g.apply(ImmutableRangeSet.of(r)); + assertThat(g.apply(rangeSet), isRangeSet("[[0..4)]")); + + assertThat(g2.apply(Range.singleton(onePoint)), isRangeSet("[[2.0..2.0]]")); + assertThat(g2.apply(Range.closed(one, one)), isRangeSet("[[2..2]]")); + assertThat(g2.apply(Range.closed(one, onePoint)), isRangeSet("[[2..2.0]]")); + assertThat(g2.apply(Range.closed(onePoint, one)), isRangeSet("[[2.0..2]]")); + assertThat(g2.apply(Range.closed(onePoint, onePoint)), + isRangeSet("[[2.0..2.0]]")); + assertThat(g2.apply(Range.closed(onePoint, two)), isRangeSet("[[2.0..4]]")); + } + + /** Equivalent to {@link ImmutableRangeSet#unionOf(Iterable)}, which was only + * added in Guava 21. */ + @SuppressWarnings("unchecked") + private static <C extends Comparable<C>> ImmutableRangeSet<C> unionRangeSet( + List<? extends Range<C>> ranges) { + final TreeRangeSet<C> treeRangeSet = TreeRangeSet.create(); + for (Range<C> range : ranges) { + treeRangeSet.add(range); + } + return ImmutableRangeSet.copyOf(treeRangeSet); } /** Tests {@link RangeSets#isOpenInterval(RangeSet)}. */ @@ -420,7 +537,7 @@ class RangeSetTest { final ImmutableRangeSet<Integer> empty = ImmutableRangeSet.of(); final List<Range<Integer>> ranges = - Arrays.asList(Range.all(), + asList(Range.all(), Range.atMost(3), Range.atLeast(4), Range.lessThan(5), @@ -442,7 +559,7 @@ class RangeSetTest { + "closedOpen(14, 15)"; final List<Range<Integer>> sortedRanges = - Arrays.asList( + asList( Range.lessThan(3), Range.atMost(3), Range.lessThan(5), @@ -460,7 +577,7 @@ class RangeSetTest { Range.closedOpen(14, 15)); final List<Range<Integer>> disjointRanges = - Arrays.asList(Range.lessThan(5), + asList(Range.lessThan(5), Range.greaterThan(16), Range.singleton(7), Range.open(8, 9), diff --git a/testkit/src/main/java/org/apache/calcite/test/Matchers.java b/testkit/src/main/java/org/apache/calcite/test/Matchers.java index 03553cecfe..3a3648be1a 100644 --- a/testkit/src/main/java/org/apache/calcite/test/Matchers.java +++ b/testkit/src/main/java/org/apache/calcite/test/Matchers.java @@ -287,13 +287,15 @@ public class Matchers { * <p>This method is necessary because {@link RangeSet#toString()} changed * behavior. Guava 19 - 28 used a unicode symbol; Guava 29 onwards uses "..". */ - @SuppressWarnings("BetaApi") + @SuppressWarnings({"BetaApi", "rawtypes"}) public static Matcher<RangeSet> isRangeSet(final String value) { - return compose(Is.is(value), input -> { - // Change all '\u2025' (a unicode symbol denoting a range) to '..', - // consistent with Guava 29+. - return input.toString().replace("\u2025", ".."); - }); + return compose(Is.is(value), input -> sanitizeRangeSet(input.toString())); + } + + /** Changes all '\u2025' (a unicode symbol denoting a range) to '..', + * consistent with Guava 29+. */ + public static String sanitizeRangeSet(String string) { + return string.replace("\u2025", ".."); } /**