[CALCITE-1658] DateRangeRules incorrectly rewrites EXTRACT calls (Nishant 
Bangarwa)

Fix several issues where DateRangeRules incorrectly rewrites EXTRACT
calls, causing wrong results.

Close apache/calcite#596

Close apache/calcite#587 (unrelated, but forgot to close it in [CALCITE-2102])


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/1e9fd38b
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/1e9fd38b
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/1e9fd38b

Branch: refs/heads/master
Commit: 1e9fd38ba7b702675a066f6bd712d03b7b07410e
Parents: 2a171fb
Author: Nishant <[email protected]>
Authored: Mon Jan 1 16:15:32 2018 +0530
Committer: Julian Hyde <[email protected]>
Committed: Tue Jan 2 14:24:51 2018 -0800

----------------------------------------------------------------------
 .../calcite/rel/rules/DateRangeRules.java       |  94 +++++++++----
 .../calcite/rel/rules/DateRangeRulesTest.java   | 136 +++++++++++++++++--
 .../calcite/test/DruidDateRangeRulesTest.java   |  30 +---
 3 files changed, 197 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java 
b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java
index 5b7cf95..9288ce4 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/DateRangeRules.java
@@ -38,12 +38,14 @@ import org.apache.calcite.util.Bug;
 import org.apache.calcite.util.DateString;
 import org.apache.calcite.util.Util;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.BoundType;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableRangeSet;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Range;
 import com.google.common.collect.RangeSet;
 import com.google.common.collect.TreeRangeSet;
@@ -120,19 +122,41 @@ public abstract class DateRangeRules {
           .put(TimeUnitRange.MICROSECOND, TimeUnitRange.SECOND)
           .build();
 
-  /** Returns whether an expression contains one or more calls to the
-   * {@code EXTRACT} function. */
-  public static Set<TimeUnitRange> extractTimeUnits(RexNode e) {
+  /** Tests whether an expression contains one or more calls to the
+   * {@code EXTRACT} function, and if so, returns the time units used.
+   *
+   * <p>The result is an immutable set in natural order. This is important,
+   * because code relies on the collection being sorted (so YEAR comes before
+   * MONTH before HOUR) and unique. A predicate on MONTH is not useful if there
+   * is no predicate on YEAR. Then when we apply the predicate on DAY it 
doesn't
+   * generate hundreds of ranges we'll later throw away. */
+  static ImmutableSortedSet<TimeUnitRange> extractTimeUnits(RexNode e) {
     final ExtractFinder finder = ExtractFinder.THREAD_INSTANCES.get();
     try {
       assert finder.timeUnits.isEmpty() : "previous user did not clean up";
       e.accept(finder);
-      return ImmutableSet.copyOf(finder.timeUnits);
+      return ImmutableSortedSet.copyOf(finder.timeUnits);
     } finally {
       finder.timeUnits.clear();
     }
   }
 
+  /** Replaces calls to EXTRACT in an expression. */
+  @VisibleForTesting
+  public static RexNode replaceTimeUnits(RexBuilder rexBuilder, RexNode e) {
+    final ImmutableSortedSet<TimeUnitRange> timeUnits = extractTimeUnits(e);
+    if (!timeUnits.contains(TimeUnitRange.YEAR)) {
+      // bail out if there is no year extract.
+      return e;
+    }
+    final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>();
+    for (TimeUnitRange timeUnit : timeUnits) {
+      e = e.accept(
+          new ExtractShuttle(rexBuilder, timeUnit, operandRanges, timeUnits));
+    }
+    return e;
+  }
+
   /** Rule that converts EXTRACT in a Filter condition into a date range. */
   @SuppressWarnings("WeakerAccess")
   public static class FilterDateRangeRule extends RelOptRule {
@@ -144,17 +168,8 @@ public abstract class DateRangeRules {
     @Override public void onMatch(RelOptRuleCall call) {
       final Filter filter = call.rel(0);
       final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
-      RexNode condition = filter.getCondition();
-      final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>();
-      Set<TimeUnitRange> timeUnitRangeSet = extractTimeUnits(condition);
-      if (!timeUnitRangeSet.contains(TimeUnitRange.YEAR)) {
-        // bail out if there is no year extract.
-        return;
-      }
-      for (TimeUnitRange timeUnit : timeUnitRangeSet) {
-        condition = condition.accept(
-            new ExtractShuttle(rexBuilder, timeUnit, operandRanges));
-      }
+      final RexNode condition =
+          replaceTimeUnits(rexBuilder, filter.getCondition());
       if (RexUtil.eq(condition, filter.getCondition())) {
         return;
       }
@@ -194,19 +209,24 @@ public abstract class DateRangeRules {
   }
 
   /** Walks over an expression, replacing {@code EXTRACT} with date ranges. */
-  public static class ExtractShuttle extends RexShuttle {
+  @VisibleForTesting
+  static class ExtractShuttle extends RexShuttle {
     private final RexBuilder rexBuilder;
     private final TimeUnitRange timeUnit;
     private final Map<String, RangeSet<Calendar>> operandRanges;
     private final Deque<RexCall> calls = new ArrayDeque<>();
+    private final ImmutableSortedSet<TimeUnitRange> timeUnitRanges;
 
-    public ExtractShuttle(RexBuilder rexBuilder, TimeUnitRange timeUnit,
-        Map<String, RangeSet<Calendar>> operandRanges) {
-      this.rexBuilder = rexBuilder;
-      this.timeUnit = timeUnit;
+    @VisibleForTesting
+    ExtractShuttle(RexBuilder rexBuilder, TimeUnitRange timeUnit,
+        Map<String, RangeSet<Calendar>> operandRanges,
+        ImmutableSortedSet<TimeUnitRange> timeUnitRanges) {
+      this.rexBuilder = Preconditions.checkNotNull(rexBuilder);
+      this.timeUnit = Preconditions.checkNotNull(timeUnit);
       Bug.upgrade("Change type to Map<RexNode, RangeSet<Calendar>> when"
           + " [CALCITE-1367] is fixed");
-      this.operandRanges = operandRanges;
+      this.operandRanges = Preconditions.checkNotNull(operandRanges);
+      this.timeUnitRanges = Preconditions.checkNotNull(timeUnitRanges);
     }
 
     @Override public RexNode visitCall(RexCall call) {
@@ -220,14 +240,14 @@ public abstract class DateRangeRules {
         final RexNode op1 = call.operands.get(1);
         switch (op0.getKind()) {
         case LITERAL:
-          if (isExtractCall(op1)) {
+          if (isExtractCall(op1) && canRewriteExtract()) {
             return foo(call.getKind().reverse(),
                 ((RexCall) op1).getOperands().get(1), (RexLiteral) op0);
           }
         }
         switch (op1.getKind()) {
         case LITERAL:
-          if (isExtractCall(op0)) {
+          if (isExtractCall(op0) && canRewriteExtract()) {
             return foo(call.getKind(), ((RexCall) op0).getOperands().get(1),
                 (RexLiteral) op1);
           }
@@ -243,6 +263,19 @@ public abstract class DateRangeRules {
       }
     }
 
+    private boolean canRewriteExtract() {
+      // We rely on timeUnits being sorted (so YEAR comes before MONTH
+      // before HOUR) and unique. If we have seen a predicate on YEAR,
+      // operandRanges will not be empty. This checks whether we can rewrite
+      // the "extract" condition. For example, in the condition
+      //
+      //   extract(MONTH from time) = someValue
+      //   OR extract(YEAR from time) = someValue
+      //
+      // we cannot rewrite extract on MONTH.
+      return timeUnit == TimeUnitRange.YEAR || !operandRanges.isEmpty();
+    }
+
     @Override protected List<RexNode> visitList(List<? extends RexNode> exprs,
         boolean[] update) {
       if (exprs.isEmpty()) {
@@ -252,12 +285,23 @@ public abstract class DateRangeRules {
       case AND:
         return super.visitList(exprs, update);
       default:
+        if (timeUnit != TimeUnitRange.YEAR) {
+          // Already visited for lower TimeUnit ranges in the loop below.
+          // Early bail out.
+          //noinspection unchecked
+          return (List<RexNode>) exprs;
+        }
         final Map<String, RangeSet<Calendar>> save =
             ImmutableMap.copyOf(operandRanges);
         final ImmutableList.Builder<RexNode> clonedOperands =
             ImmutableList.builder();
         for (RexNode operand : exprs) {
-          RexNode clonedOperand = operand.accept(this);
+          RexNode clonedOperand = operand;
+          for (TimeUnitRange timeUnit : timeUnitRanges) {
+            clonedOperand = clonedOperand.accept(
+                new ExtractShuttle(rexBuilder, timeUnit, operandRanges,
+                    timeUnitRanges));
+          }
           if ((clonedOperand != operand) && (update != null)) {
             update[0] = true;
           }

http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java 
b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java
index 5fd746d..aec4d20 100644
--- a/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/rel/rules/DateRangeRulesTest.java
@@ -23,17 +23,15 @@ import 
org.apache.calcite.test.RexImplicationCheckerTest.Fixture;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Ordering;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.RangeSet;
 
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Matcher;
-
 import org.junit.Test;
 
 import java.util.Calendar;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -180,6 +178,128 @@ public class DateRangeRulesTest {
             + " AND(>=($9, 2016-02-29), <($9, 2016-03-01))))"));
   }
 
+  /** Test case #1 for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1658";>[CALCITE-1658]
+   * DateRangeRules issues</a>. */
+  @Test public void testExtractWithOrCondition1() {
+    // (EXTRACT(YEAR FROM __time) = 2000
+    //    AND EXTRACT(MONTH FROM __time) IN (2, 3, 5))
+    // OR (EXTRACT(YEAR FROM __time) = 2001
+    //    AND EXTRACT(MONTH FROM __time) = 1)
+    final Fixture2 f = new Fixture2();
+    checkDateRange(f,
+        f.or(
+            f.and(f.eq(f.exYear, f.literal(2000)),
+                f.or(f.eq(f.exMonth, f.literal(2)),
+                    f.eq(f.exMonth, f.literal(3)),
+                    f.eq(f.exMonth, f.literal(5)))),
+            f.and(f.eq(f.exYear, f.literal(2001)),
+                f.eq(f.exMonth, f.literal(1)))),
+        is("OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01)),"
+            + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01)),"
+            + " AND(>=($9, 2000-03-01), <($9, 2000-04-01)),"
+            + " AND(>=($9, 2000-05-01), <($9, 2000-06-01)))),"
+            + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01)),"
+            + " AND(>=($9, 2001-01-01), <($9, 2001-02-01))))"));
+  }
+
+  /** Test case #2 for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1658";>[CALCITE-1658]
+   * DateRangeRules issues</a>. */
+  @Test public void testExtractWithOrCondition2() {
+    // EXTRACT(YEAR FROM __time) IN (2000, 2001)
+    //   AND ((EXTRACT(YEAR FROM __time) = 2000
+    //         AND EXTRACT(MONTH FROM __time) IN (2, 3, 5))
+    //     OR (EXTRACT(YEAR FROM __time) = 2001
+    //       AND EXTRACT(MONTH FROM __time) = 1))
+    final Fixture2 f = new Fixture2();
+    checkDateRange(f,
+        f.and(
+            f.or(f.eq(f.exYear, f.literal(2000)),
+                f.eq(f.exYear, f.literal(2001))),
+            f.or(
+                f.and(f.eq(f.exYear, f.literal(2000)),
+                    f.or(f.eq(f.exMonth, f.literal(2)),
+                        f.eq(f.exMonth, f.literal(3)),
+                        f.eq(f.exMonth, f.literal(5)))),
+                f.and(f.eq(f.exYear, f.literal(2001)),
+                    f.eq(f.exMonth, f.literal(1))))),
+        is("AND(OR(AND(>=($9, 2000-01-01), <($9, 2001-01-01)),"
+            + " AND(>=($9, 2001-01-01), <($9, 2002-01-01))),"
+            + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01)),"
+            + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01)),"
+            + " AND(>=($9, 2000-03-01), <($9, 2000-04-01)),"
+            + " AND(>=($9, 2000-05-01), <($9, 2000-06-01)))),"
+            + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01)),"
+            + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))"));
+  }
+
+  /** Test case #3 for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1658";>[CALCITE-1658]
+   * DateRangeRules issues</a>. */
+  @Test public void testExtractPartialRewriteForNotEqualsYear() {
+    // EXTRACT(YEAR FROM __time) <> 2000
+    // AND ((EXTRACT(YEAR FROM __time) = 2000
+    //     AND EXTRACT(MONTH FROM __time) IN (2, 3, 5))
+    //   OR (EXTRACT(YEAR FROM __time) = 2001
+    //     AND EXTRACT(MONTH FROM __time) = 1))
+    final Fixture2 f = new Fixture2();
+    checkDateRange(f,
+        f.and(
+            f.ne(f.exYear, f.literal(2000)),
+            f.or(
+                f.and(f.eq(f.exYear, f.literal(2000)),
+                    f.or(f.eq(f.exMonth, f.literal(2)),
+                        f.eq(f.exMonth, f.literal(3)),
+                        f.eq(f.exMonth, f.literal(5)))),
+                f.and(f.eq(f.exYear, f.literal(2001)),
+                    f.eq(f.exMonth, f.literal(1))))),
+        is("AND(<>(EXTRACT(FLAG(YEAR), $9), 2000),"
+            + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01)),"
+            + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01)),"
+            + " AND(>=($9, 2000-03-01), <($9, 2000-04-01)),"
+            + " AND(>=($9, 2000-05-01), <($9, 2000-06-01)))),"
+            + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01)),"
+            + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))"));
+  }
+
+  /** Test case #4 for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1658";>[CALCITE-1658]
+   * DateRangeRules issues</a>. */
+  @Test public void testExtractPartialRewriteForInMonth() {
+    // EXTRACT(MONTH FROM __time) in (1, 2, 3, 4, 5)
+    // AND ((EXTRACT(YEAR FROM __time) = 2000
+    //     AND EXTRACT(MONTH FROM __time) IN (2, 3, 5))
+    //   OR (EXTRACT(YEAR FROM __time) = 2001
+    //     AND EXTRACT(MONTH FROM __time) = 1))
+    final Fixture2 f = new Fixture2();
+    checkDateRange(f,
+        f.and(
+            f.or(f.eq(f.exMonth, f.literal(1)),
+                f.eq(f.exMonth, f.literal(2)),
+                f.eq(f.exMonth, f.literal(3)),
+                f.eq(f.exMonth, f.literal(4)),
+                f.eq(f.exMonth, f.literal(5))),
+            f.or(
+                f.and(f.eq(f.exYear, f.literal(2000)),
+                    f.or(f.eq(f.exMonth, f.literal(2)),
+                        f.eq(f.exMonth, f.literal(3)),
+                        f.eq(f.exMonth, f.literal(5)))),
+                f.and(f.eq(f.exYear, f.literal(2001)),
+                    f.eq(f.exMonth, f.literal(1))))),
+        is("AND(OR(=(EXTRACT(FLAG(MONTH), $9), 1),"
+            + " =(EXTRACT(FLAG(MONTH), $9), 2),"
+            + " =(EXTRACT(FLAG(MONTH), $9), 3),"
+            + " =(EXTRACT(FLAG(MONTH), $9), 4),"
+            + " =(EXTRACT(FLAG(MONTH), $9), 5)),"
+            + " OR(AND(AND(>=($9, 2000-01-01), <($9, 2001-01-01)),"
+            + " OR(AND(>=($9, 2000-02-01), <($9, 2000-03-01)),"
+            + " AND(>=($9, 2000-03-01), <($9, 2000-04-01)),"
+            + " AND(>=($9, 2000-05-01), <($9, 2000-06-01)))),"
+            + " AND(AND(>=($9, 2001-01-01), <($9, 2002-01-01)),"
+            + " AND(>=($9, 2001-01-01), <($9, 2001-02-01)))))"));
+  }
+
   private static Set<TimeUnitRange> set(TimeUnitRange... es) {
     return ImmutableSet.copyOf(es);
   }
@@ -191,16 +311,12 @@ public class DateRangeRulesTest {
   private void checkDateRange(Fixture f, RexNode e, Matcher<String> matcher,
       Matcher<String> simplifyMatcher) {
     final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>();
-    // We rely on the collection being sorted (so YEAR comes before MONTH
-    // before HOUR) and unique. A predicate on MONTH is not useful if there is
-    // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't
-    // generate hundreds of ranges we'll later throw away.
-    final List<TimeUnitRange> timeUnits =
-        Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e));
+    final ImmutableSortedSet<TimeUnitRange> timeUnits =
+        DateRangeRules.extractTimeUnits(e);
     for (TimeUnitRange timeUnit : timeUnits) {
       e = e.accept(
           new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit,
-              operandRanges));
+              operandRanges, timeUnits));
     }
     assertThat(e.toString(), matcher);
     final RexNode e2 = f.simplify.simplify(e);

http://git-wip-us.apache.org/repos/asf/calcite/blob/1e9fd38b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java
----------------------------------------------------------------------
diff --git 
a/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java 
b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java
index 58f5b9e..0b5d034 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidDateRangeRulesTest.java
@@ -26,17 +26,13 @@ import org.apache.calcite.util.TimestampString;
 import org.apache.calcite.util.Util;
 
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.RangeSet;
 
 import org.hamcrest.Matcher;
 import org.joda.time.Interval;
 import org.junit.Test;
 
 import java.util.Calendar;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import static org.hamcrest.core.Is.is;
 import static org.hamcrest.core.IsNull.notNullValue;
@@ -145,18 +141,7 @@ public class DruidDateRangeRulesTest {
   // HiveRexExecutorImpl is used in Hive
   private void checkDateRangeNoSimplify(Fixture f, RexNode e,
       Matcher<String> intervalMatcher) {
-    final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>();
-    // We rely on the collection being sorted (so YEAR comes before MONTH
-    // before HOUR) and unique. A predicate on MONTH is not useful if there is
-    // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't
-    // generate hundreds of ranges we'll later throw away.
-    final List<TimeUnitRange> timeUnits =
-        Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e));
-    for (TimeUnitRange timeUnit : timeUnits) {
-      e = e.accept(
-          new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit,
-              operandRanges));
-    }
+    e = DateRangeRules.replaceTimeUnits(f.rexBuilder, e);
     final List<Interval> intervals =
         DruidDateTimeUtils.createInterval(e, "UTC");
     assertThat(intervals, notNullValue());
@@ -164,18 +149,7 @@ public class DruidDateRangeRulesTest {
   }
 
   private void checkDateRange(Fixture f, RexNode e, Matcher<String> 
intervalMatcher) {
-    final Map<String, RangeSet<Calendar>> operandRanges = new HashMap<>();
-    // We rely on the collection being sorted (so YEAR comes before MONTH
-    // before HOUR) and unique. A predicate on MONTH is not useful if there is
-    // no predicate on YEAR. Then when we apply the predicate on DAY it doesn't
-    // generate hundreds of ranges we'll later throw away.
-    final List<TimeUnitRange> timeUnits =
-        Ordering.natural().sortedCopy(DateRangeRules.extractTimeUnits(e));
-    for (TimeUnitRange timeUnit : timeUnits) {
-      e = e.accept(
-          new DateRangeRules.ExtractShuttle(f.rexBuilder, timeUnit,
-              operandRanges));
-    }
+    e = DateRangeRules.replaceTimeUnits(f.rexBuilder, e);
     final RexNode e2 = f.simplify.simplify(e);
     List<Interval> intervals =
         DruidDateTimeUtils.createInterval(e2, "UTC");

Reply via email to