This is an automated email from the ASF dual-hosted git repository.

libenchao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new a921eb26a2 [CALCITE-5971] Add SampleToFilterRule to rewrite bernoulli 
Sample to Filter
a921eb26a2 is described below

commit a921eb26a23435b05fdee1d404e9b4ee2f65421d
Author: shenlang <[email protected]>
AuthorDate: Thu Sep 7 20:16:30 2023 +0800

    [CALCITE-5971] Add SampleToFilterRule to rewrite bernoulli Sample to Filter
    
    Close apache/calcite#3410
---
 .../java/org/apache/calcite/plan/RelOptRules.java  |  3 +-
 .../org/apache/calcite/rel/rules/CoreRules.java    |  5 ++
 .../calcite/rel/rules/SampleToFilterRule.java      | 97 ++++++++++++++++++++++
 .../org/apache/calcite/test/RelOptRulesTest.java   | 20 +++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 38 +++++++++
 5 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptRules.java 
b/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
index 20956f9fe6..91b023bca7 100644
--- a/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
+++ b/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
@@ -87,7 +87,8 @@ public class RelOptRules {
           CoreRules.SORT_REMOVE_CONSTANT_KEYS,
           CoreRules.SORT_UNION_TRANSPOSE,
           CoreRules.EXCHANGE_REMOVE_CONSTANT_KEYS,
-          CoreRules.SORT_EXCHANGE_REMOVE_CONSTANT_KEYS);
+          CoreRules.SORT_EXCHANGE_REMOVE_CONSTANT_KEYS,
+          CoreRules.SAMPLE_TO_FILTER);
 
   static final List<RelOptRule> ABSTRACT_RULES =
       ImmutableList.of(CoreRules.AGGREGATE_ANY_PULL_UP_CONSTANTS,
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java 
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index 50411d2462..43b1eb830a 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -25,6 +25,7 @@ import org.apache.calcite.rel.core.Intersect;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.Minus;
 import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sample;
 import org.apache.calcite.rel.core.SetOp;
 import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.core.TableScan;
@@ -756,6 +757,10 @@ public class CoreRules {
   public static final UnionToDistinctRule UNION_TO_DISTINCT =
       UnionToDistinctRule.Config.DEFAULT.toRule();
 
+  /** Rule that rewrite {@link Sample} which is bernoulli to the {@link 
Filter}. */
+  public static final SampleToFilterRule SAMPLE_TO_FILTER =
+      SampleToFilterRule.Config.DEFAULT.toRule();
+
   /** Rule that applies an {@link Aggregate} to a {@link Values} (currently 
just
    * an empty {@code Values}). */
   public static final AggregateValuesRule AGGREGATE_VALUES =
diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java
new file mode 100644
index 0000000000..6d3d952317
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Sample;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+/**
+ * This rule rewrite {@link Sample} which is bernoulli to the {@link Filter}.
+ *
+ * <p> For example:
+ * <blockquote><pre>{@code
+ *    select deptno from "scott".dept tablesample bernoulli(50);
+ * }</pre></blockquote>
+ *
+ * <p> will convert to:
+ * <blockquote><pre>{@code
+ *    select deptno from "scott".dept where rand() < 0.5;
+ * }</pre></blockquote>
+ *
+ * <p> The sql:
+ * <blockquote><pre>{@code
+ *    select deptno from "scott".dept tablesample bernoulli(50) REPEATABLE(10);
+ * }</pre></blockquote>
+ *
+ * <p> will convert to:
+ * <blockquote><pre>{@code
+ *    select deptno from "scott".dept where rand(10) < 0.5;
+ * }</pre></blockquote>
+ *
+ * @see CoreRules#SAMPLE_TO_FILTER
+ */
[email protected]
+public class SampleToFilterRule
+    extends RelRule<SampleToFilterRule.Config>
+    implements TransformationRule {
+
+  protected SampleToFilterRule(final SampleToFilterRule.Config config) {
+    super(config);
+  }
+
+  @Override public void onMatch(final RelOptRuleCall call) {
+    final Sample sample = call.rel(0);
+    final RelBuilder relBuilder = call.builder();
+    relBuilder.push(sample.getInput());
+
+    RexNode randFunc = sample.getSamplingParameters().isRepeatable()
+        ? relBuilder.call(SqlStdOperatorTable.RAND,
+        relBuilder.literal(sample.getSamplingParameters().getRepeatableSeed()))
+        : relBuilder.call(SqlStdOperatorTable.RAND);
+
+    relBuilder.filter(
+        relBuilder.lessThan(randFunc,
+            relBuilder.literal(sample.getSamplingParameters().sampleRate)));
+    call.transformTo(relBuilder.build());
+  }
+
+  /** Rule configuration. */
+  @Value.Immutable
+  public interface Config extends RelRule.Config {
+    SampleToFilterRule.Config DEFAULT = ImmutableSampleToFilterRule.Config.of()
+        .withOperandFor(Sample.class);
+
+    @Override default SampleToFilterRule toRule() {
+      return new SampleToFilterRule(this);
+    }
+
+    /** Defines an operand tree for the given classes. */
+    default SampleToFilterRule.Config withOperandFor(Class<? extends Sample> 
sampleClass) {
+      return withOperandSupplier(b ->
+          b.operand(sampleClass)
+              .predicate(sample -> 
sample.getSamplingParameters().isBernoulli()).anyInputs())
+          .as(SampleToFilterRule.Config.class);
+    }
+  }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index eb46822eb3..dbdde0e7b4 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -262,6 +262,26 @@ class RelOptRulesTest extends RelOptTestBase {
         .check();
   }
 
+  /**
+   * Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-5971";>[CALCITE-5971]
+   * Add the RelRule to rewrite the bernoulli sample as Filter</a>. */
+  @Test void testSampleToFilter() {
+    final String sql = "select deptno from emp tablesample bernoulli(50)";
+    sql(sql)
+        .withRule(CoreRules.SAMPLE_TO_FILTER)
+        .check();
+  }
+
+  /**
+   * Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-5971";>[CALCITE-5971]
+   * Add the RelRule to rewrite the bernoulli sample as Filter</a>. */
+  @Test void testSampleToFilterWithSeed() {
+    final String sql = "select deptno from emp tablesample bernoulli(50) 
REPEATABLE(10)";
+    sql(sql)
+        .withRule(CoreRules.SAMPLE_TO_FILTER)
+        .check();
+  }
+
   /**
    * Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-5813";>[CALCITE-5813]
    * Type inference for sql functions REPEAT, SPACE, XML_TRANSFORM,
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 86edca1ab2..8ab7d8bed0 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -12706,6 +12706,44 @@ LogicalProject(EXPR$0=[1])
       LogicalFilter(condition=[=($1, 'Charlie')])
         LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
       LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSampleToFilter">
+    <Resource name="sql">
+      <![CDATA[select deptno from emp tablesample bernoulli(50)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$7])
+  Sample(mode=[bernoulli], rate=[0.5], repeatableSeed=[-])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[<(RAND(), 0.5:DECIMAL(2, 1))])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSampleToFilterWithSeed">
+    <Resource name="sql">
+      <![CDATA[select deptno from emp tablesample bernoulli(50) 
REPEATABLE(10)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$7])
+  Sample(mode=[bernoulli], rate=[0.5], repeatableSeed=[10])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[<(RAND(10), 0.5:DECIMAL(2, 1))])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>

Reply via email to