[CALCITE-1658] DateRangeRules incorrectly rewrites EXTRACT calls (Nishant Bangarwa)
Fix several issues where DateRangeRules incorrectly rewrites EXTRACT calls, causing wrong results. Close apache/calcite#596 Close apache/calcite#587 (unrelated, but forgot to close it in [CALCITE-2102]) Project: http://git-wip-us.apache.org/repos/asf/calcite/repo Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/1e9fd38b Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/1e9fd38b Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/1e9fd38b Branch: refs/heads/master Commit: 1e9fd38ba7b702675a066f6bd712d03b7b07410e Parents: 2a171fb Author: Nishant <[email protected]> Authored: Mon Jan 1 16:15:32 2018 +0530 Committer: Julian Hyde <[email protected]> Committed: Tue Jan 2 14:24:51 2018 -0800 ---------------------------------------------------------------------- .../calcite/rel/rules/DateRangeRules.java | 94 +++++++++---- .../calcite/rel/rules/DateRangeRulesTest.java | 136 +++++++++++++++++-- .../calcite/test/DruidDateRangeRulesTest.java | 30 +--- 3 files changed, 197 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java index 5b7cf95..9288ce4 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java @@ -38,12 +38,14 @@ import org.apache.calcite.util.Bug; import org.apache.calcite.util.DateString; import org.apache.calcite.util.Util; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.BoundType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableRangeSet; -import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; @@ -120,19 +122,41 @@ public abstract class DateRangeRules { .put(TimeUnitRange.MICROSECOND, TimeUnitRange.SECOND) .build(); - /** Returns whether an expression contains one or more calls to the - * {@code EXTRACT} function. */ - public static Set<TimeUnitRange> extractTimeUnits(RexNode e) { + /** Tests whether an expression contains one or more calls to the + * {@code EXTRACT} function, and if so, returns the time units used. + * + * <p>The result is an immutable set in natural order. This is important, + * because code relies on the collection being sorted (so YEAR comes before + * MONTH before HOUR) and unique. A predicate on MONTH is not useful if there + * is no predicate on YEAR. Then when we apply the predicate on DAY it doesn't + * generate hundreds of ranges we'll later throw away. */ + static ImmutableSortedSet<TimeUnitRange> extractTimeUnits(RexNode e) { final ExtractFinder finder = ExtractFinder.THREAD_INSTANCES.get(); try { assert finder.timeUnits.isEmpty() : "previous user did not clean up"; e.accept(finder); - return ImmutableSet.copyOf(finder.timeUnits); + return ImmutableSortedSet.copyOf(finder.timeUnits); } finally { finder.timeUnits.clear(); } } + /** Replaces calls to EXTRACT in an expression. */ + @VisibleForTesting + public static RexNode replaceTimeUnits(RexBuilder rexBuilder, RexNode e) { + final ImmutableSortedSet<TimeUnitRange> timeUnits = extractTimeUnits(e); + if (!timeUnits.contains(TimeUnitRange.YEAR)) { + // bail out if there is no year extract. + return e; + } + final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>(); + for (TimeUnitRange timeUnit : timeUnits) { + e = e.accept( + new ExtractShuttle(rexBuilder, timeUnit, operandRanges, timeUnits)); + } + return e; + } + /** Rule that converts EXTRACT in a Filter condition into a date range. */ @SuppressWarnings("WeakerAccess") public static class FilterDateRangeRule extends RelOptRule { @@ -144,17 +168,8 @@ public abstract class DateRangeRules { @Override public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); - RexNode condition = filter.getCondition(); - final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>(); - Set<TimeUnitRange> timeUnitRangeSet = extractTimeUnits(condition); - if (!timeUnitRangeSet.contains(TimeUnitRange.YEAR)) { - // bail out if there is no year extract. - return; - } - for (TimeUnitRange timeUnit : timeUnitRangeSet) { - condition = condition.accept( - new ExtractShuttle(rexBuilder, timeUnit, operandRanges)); - } + final RexNode condition = + replaceTimeUnits(rexBuilder, filter.getCondition()); if (RexUtil.eq(condition, filter.getCondition())) { return; } @@ -194,19 +209,24 @@ public abstract class DateRangeRules { } /** Walks over an expression, replacing {@code EXTRACT} with date ranges. */ - public static class ExtractShuttle extends RexShuttle { + @VisibleForTesting + static class ExtractShuttle extends RexShuttle { private final RexBuilder rexBuilder; private final TimeUnitRange timeUnit; private final Map<String, RangeSet<Calendar>> operandRanges; private final Deque<RexCall> calls = new ArrayDeque<>(); + private final ImmutableSortedSet<TimeUnitRange> timeUnitRanges; - public ExtractShuttle(RexBuilder rexBuilder, TimeUnitRange timeUnit, - Map<String, RangeSet<Calendar>> operandRanges) { - this.rexBuilder = rexBuilder; - this.timeUnit = timeUnit; + @VisibleForTesting + ExtractShuttle(RexBuilder rexBuilder, TimeUnitRange timeUnit, + Map<String, RangeSet<Calendar>> operandRanges, + ImmutableSortedSet<TimeUnitRange> timeUnitRanges) { + this.rexBuilder = Preconditions.checkNotNull(rexBuilder); + this.timeUnit = Preconditions.checkNotNull(timeUnit); Bug.upgrade("Change type to Map<RexNode, RangeSet<Calendar>> when" + " [CALCITE-1367] is fixed"); - this.operandRanges = operandRanges; + this.operandRanges = Preconditions.checkNotNull(operandRanges); + this.timeUnitRanges = Preconditions.checkNotNull(timeUnitRanges); } @Override public RexNode visitCall(RexCall call) { @@ -220,14 +240,14 @@ public abstract class DateRangeRules { final RexNode op1 = call.operands.get(1); switch (op0.getKind()) { case LITERAL: - if (isExtractCall(op1)) { + if (isExtractCall(op1) && canRewriteExtract()) { return foo(call.getKind().reverse(), ((RexCall) op1).getOperands().get(1), (RexLiteral) op0); } } switch (op1.getKind()) { case LITERAL: - if (isExtractCall(op0)) { + if (isExtractCall(op0) && canRewriteExtract()) { return foo(call.getKind(), ((RexCall) op0).getOperands().get(1), (RexLiteral) op1); } @@ -243,6 +263,19 @@ public abstract class DateRangeRules { } } + private boolean canRewriteExtract() { + // We rely on timeUnits being sorted (so YEAR comes before MONTH + // before HOUR) and unique. If we have seen a predicate on YEAR, + // operandRanges will not be empty. This checks whether we can rewrite + // the "extract" condition. For example, in the condition + // + // extract(MONTH from time) = someValue + // OR extract(YEAR from time) = someValue + // + // we cannot rewrite extract on MONTH. + return timeUnit == TimeUnitRange.YEAR || !operandRanges.isEmpty(); + } + @Override protected List<RexNode> visitList(List<? extends RexNode> exprs, boolean[] update) { if (exprs.isEmpty()) { @@ -252,12 +285,23 @@ public abstract class DateRangeRules { case AND: return super.visitList(exprs, update); default: + if (timeUnit != TimeUnitRange.YEAR) { + // Already visited for lower TimeUnit ranges in the loop below. + // Early bail out. + //noinspection unchecked + return (List<RexNode>) exprs; + } final Map<String, RangeSet<Calendar>> save = ImmutableMap.copyOf(operandRanges); final ImmutableList.Builder<RexNode> clonedOperands = ImmutableList.builder(); for (RexNode operand : exprs) { - RexNode clonedOperand = operand.accept(this); + RexNode clonedOperand = operand; + for (TimeUnitRange timeUnit : timeUnitRanges) { + clonedOperand = clonedOperand.accept( + new ExtractShuttle(rexBuilder, timeUnit, operandRanges, + timeUnitRanges)); + } if ((clonedOperand != operand) && (update != null)) { update[0] = true; } http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java index 5fd746d..aec4d20 100644 --- a/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java @@ -23,17 +23,15 @@ import org.apache.calcite.test.RexImplicationCheckerTest.Fixture; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Ordering; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.RangeSet; import org.hamcrest.CoreMatchers; import org.hamcrest.Matcher; - import org.junit.Test; import java.util.Calendar; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Set; @@ -180,6 +178,128 @@ public class DateRangeRulesTest { + " AND(>=($9, 2016-02-29), <($9, 2016-03-01))))")); } + /** Test case #1 for + * <a href="https://issues.apache.org/jira/browse/CALCITE-1658">[CALCITE-1658] + * DateRangeRules issues</a>. */ + @Test public void testExtractWithOrCondition1() { + // (EXTRACT(YEAR FROM __time) = 2000 + // AND EXTRACT(MONTH FROM __time) IN (2, 3, 5)) + // OR (EXTRACT(YEAR FROM __time) = 2001 + // AND EXTRACT(MONTH FROM __time) = 1) + final Fixture2 f = new Fixture2(); + checkDateRange(f, + f.or( + f.and(f.eq(f.exYear, f.literal(2000)), + f.or(f.eq(f.exMonth, f.literal(2)), + f.eq(f.exMonth, f.literal(3)), + f.eq(f.exMonth, f.literal(5)))), + f.and(f.eq(f.exYear, f.literal(2001)), + f.eq(f.exMonth, f.literal(1)))), + is("OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01))," + + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01))," + + " AND(>=($9, 2000-03-01), <($9, 2000-04-01))," + + " AND(>=($9, 2000-05-01), <($9, 2000-06-01))))," + + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01))," + + " AND(>=($9, 2001-01-01), <($9, 2001-02-01))))")); + } + + /** Test case #2 for + * <a href="https://issues.apache.org/jira/browse/CALCITE-1658">[CALCITE-1658] + * DateRangeRules issues</a>. */ + @Test public void testExtractWithOrCondition2() { + // EXTRACT(YEAR FROM __time) IN (2000, 2001) + // AND ((EXTRACT(YEAR FROM __time) = 2000 + // AND EXTRACT(MONTH FROM __time) IN (2, 3, 5)) + // OR (EXTRACT(YEAR FROM __time) = 2001 + // AND EXTRACT(MONTH FROM __time) = 1)) + final Fixture2 f = new Fixture2(); + checkDateRange(f, + f.and( + f.or(f.eq(f.exYear, f.literal(2000)), + f.eq(f.exYear, f.literal(2001))), + f.or( + f.and(f.eq(f.exYear, f.literal(2000)), + f.or(f.eq(f.exMonth, f.literal(2)), + f.eq(f.exMonth, f.literal(3)), + f.eq(f.exMonth, f.literal(5)))), + f.and(f.eq(f.exYear, f.literal(2001)), + f.eq(f.exMonth, f.literal(1))))), + is("AND(OR(AND(>=($9, 2000-01-01), <($9, 2001-01-01))," + + " AND(>=($9, 2001-01-01), <($9, 2002-01-01)))," + + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01))," + + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01))," + + " AND(>=($9, 2000-03-01), <($9, 2000-04-01))," + + " AND(>=($9, 2000-05-01), <($9, 2000-06-01))))," + + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01))," + + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))")); + } + + /** Test case #3 for + * <a href="https://issues.apache.org/jira/browse/CALCITE-1658">[CALCITE-1658] + * DateRangeRules issues</a>. */ + @Test public void testExtractPartialRewriteForNotEqualsYear() { + // EXTRACT(YEAR FROM __time) <> 2000 + // AND ((EXTRACT(YEAR FROM __time) = 2000 + // AND EXTRACT(MONTH FROM __time) IN (2, 3, 5)) + // OR (EXTRACT(YEAR FROM __time) = 2001 + // AND EXTRACT(MONTH FROM __time) = 1)) + final Fixture2 f = new Fixture2(); + checkDateRange(f, + f.and( + f.ne(f.exYear, f.literal(2000)), + f.or( + f.and(f.eq(f.exYear, f.literal(2000)), + f.or(f.eq(f.exMonth, f.literal(2)), + f.eq(f.exMonth, f.literal(3)), + f.eq(f.exMonth, f.literal(5)))), + f.and(f.eq(f.exYear, f.literal(2001)), + f.eq(f.exMonth, f.literal(1))))), + is("AND(<>(EXTRACT(FLAG(YEAR), $9), 2000)," + + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01))," + + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01))," + + " AND(>=($9, 2000-03-01), <($9, 2000-04-01))," + + " AND(>=($9, 2000-05-01), <($9, 2000-06-01))))," + + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01))," + + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))")); + } + + /** Test case #4 for + * <a href="https://issues.apache.org/jira/browse/CALCITE-1658">[CALCITE-1658] + * DateRangeRules issues</a>. */ + @Test public void testExtractPartialRewriteForInMonth() { + // EXTRACT(MONTH FROM __time) in (1, 2, 3, 4, 5) + // AND ((EXTRACT(YEAR FROM __time) = 2000 + // AND EXTRACT(MONTH FROM __time) IN (2, 3, 5)) + // OR (EXTRACT(YEAR FROM __time) = 2001 + // AND EXTRACT(MONTH FROM __time) = 1)) + final Fixture2 f = new Fixture2(); + checkDateRange(f, + f.and( + f.or(f.eq(f.exMonth, f.literal(1)), + f.eq(f.exMonth, f.literal(2)), + f.eq(f.exMonth, f.literal(3)), + f.eq(f.exMonth, f.literal(4)), + f.eq(f.exMonth, f.literal(5))), + f.or( + f.and(f.eq(f.exYear, f.literal(2000)), + f.or(f.eq(f.exMonth, f.literal(2)), + f.eq(f.exMonth, f.literal(3)), + f.eq(f.exMonth, f.literal(5)))), + f.and(f.eq(f.exYear, f.literal(2001)), + f.eq(f.exMonth, f.literal(1))))), + is("AND(OR(=(EXTRACT(FLAG(MONTH), $9), 1)," + + " =(EXTRACT(FLAG(MONTH), $9), 2)," + + " =(EXTRACT(FLAG(MONTH), $9), 3)," + + " =(EXTRACT(FLAG(MONTH), $9), 4)," + + " =(EXTRACT(FLAG(MONTH), $9), 5))," + + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01))," + + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01))," + + " AND(>=($9, 2000-03-01), <($9, 2000-04-01))," + + " AND(>=($9, 2000-05-01), <($9, 2000-06-01))))," + + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01))," + + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))")); + } + private static Set<TimeUnitRange> set(TimeUnitRange... es) { return ImmutableSet.copyOf(es); } @@ -191,16 +311,12 @@ public class DateRangeRulesTest { private void checkDateRange(Fixture f, RexNode e, Matcher<String> matcher, Matcher<String> simplifyMatcher) { final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>(); - // We rely on the collection being sorted (so YEAR comes before MONTH - // before HOUR) and unique. A predicate on MONTH is not useful if there is - // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't - // generate hundreds of ranges we'll later throw away. - final List<TimeUnitRange> timeUnits = - Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e)); + final ImmutableSortedSet<TimeUnitRange> timeUnits = + DateRangeRules.extractTimeUnits(e); for (TimeUnitRange timeUnit : timeUnits) { e = e.accept( new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit, - operandRanges)); + operandRanges, timeUnits)); } assertThat(e.toString(), matcher); final RexNode e2 = f.simplify.simplify(e); http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java ---------------------------------------------------------------------- diff --git a/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java index 58f5b9e..0b5d034 100644 --- a/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java +++ b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java @@ -26,17 +26,13 @@ import org.apache.calcite.util.TimestampString; import org.apache.calcite.util.Util; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Ordering; -import com.google.common.collect.RangeSet; import org.hamcrest.Matcher; import org.joda.time.Interval; import org.junit.Test; import java.util.Calendar; -import java.util.HashMap; import java.util.List; -import java.util.Map; import static org.hamcrest.core.Is.is; import static org.hamcrest.core.IsNull.notNullValue; @@ -145,18 +141,7 @@ public class DruidDateRangeRulesTest { // HiveRexExecutorImpl is used in Hive private void checkDateRangeNoSimplify(Fixture f, RexNode e, Matcher<String> intervalMatcher) { - final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>(); - // We rely on the collection being sorted (so YEAR comes before MONTH - // before HOUR) and unique. A predicate on MONTH is not useful if there is - // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't - // generate hundreds of ranges we'll later throw away. - final List<TimeUnitRange> timeUnits = - Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e)); - for (TimeUnitRange timeUnit : timeUnits) { - e = e.accept( - new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit, - operandRanges)); - } + e = DateRangeRules.replaceTimeUnits(f.rexBuilder, e); final List<Interval> intervals = DruidDateTimeUtils.createInterval(e, "UTC"); assertThat(intervals, notNullValue()); @@ -164,18 +149,7 @@ public class DruidDateRangeRulesTest { } private void checkDateRange(Fixture f, RexNode e, Matcher<String> intervalMatcher) { - final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>(); - // We rely on the collection being sorted (so YEAR comes before MONTH - // before HOUR) and unique. A predicate on MONTH is not useful if there is - // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't - // generate hundreds of ranges we'll later throw away. - final List<TimeUnitRange> timeUnits = - Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e)); - for (TimeUnitRange timeUnit : timeUnits) { - e = e.accept( - new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit, - operandRanges)); - } + e = DateRangeRules.replaceTimeUnits(f.rexBuilder, e); final RexNode e2 = f.simplify.simplify(e); List<Interval> intervals = DruidDateTimeUtils.createInterval(e2, "UTC");
