This is an automated email from the ASF dual-hosted git repository.
libenchao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new a921eb26a2 [CALCITE-5971] Add SampleToFilterRule to rewrite bernoulli
Sample to Filter
a921eb26a2 is described below
commit a921eb26a23435b05fdee1d404e9b4ee2f65421d
Author: shenlang <[email protected]>
AuthorDate: Thu Sep 7 20:16:30 2023 +0800
[CALCITE-5971] Add SampleToFilterRule to rewrite bernoulli Sample to Filter
Close apache/calcite#3410
---
.../java/org/apache/calcite/plan/RelOptRules.java | 3 +-
.../org/apache/calcite/rel/rules/CoreRules.java | 5 ++
.../calcite/rel/rules/SampleToFilterRule.java | 97 ++++++++++++++++++++++
.../org/apache/calcite/test/RelOptRulesTest.java | 20 +++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 38 +++++++++
5 files changed, 162 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
b/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
index 20956f9fe6..91b023bca7 100644
--- a/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
+++ b/core/src/main/java/org/apache/calcite/plan/RelOptRules.java
@@ -87,7 +87,8 @@ public class RelOptRules {
CoreRules.SORT_REMOVE_CONSTANT_KEYS,
CoreRules.SORT_UNION_TRANSPOSE,
CoreRules.EXCHANGE_REMOVE_CONSTANT_KEYS,
- CoreRules.SORT_EXCHANGE_REMOVE_CONSTANT_KEYS);
+ CoreRules.SORT_EXCHANGE_REMOVE_CONSTANT_KEYS,
+ CoreRules.SAMPLE_TO_FILTER);
static final List<RelOptRule> ABSTRACT_RULES =
ImmutableList.of(CoreRules.AGGREGATE_ANY_PULL_UP_CONSTANTS,
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index 50411d2462..43b1eb830a 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -25,6 +25,7 @@ import org.apache.calcite.rel.core.Intersect;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Minus;
import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sample;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
@@ -756,6 +757,10 @@ public class CoreRules {
public static final UnionToDistinctRule UNION_TO_DISTINCT =
UnionToDistinctRule.Config.DEFAULT.toRule();
+ /** Rule that rewrite {@link Sample} which is bernoulli to the {@link
Filter}. */
+ public static final SampleToFilterRule SAMPLE_TO_FILTER =
+ SampleToFilterRule.Config.DEFAULT.toRule();
+
/** Rule that applies an {@link Aggregate} to a {@link Values} (currently
just
* an empty {@code Values}). */
public static final AggregateValuesRule AGGREGATE_VALUES =
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java
new file mode 100644
index 0000000000..6d3d952317
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SampleToFilterRule.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Sample;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+/**
+ * This rule rewrite {@link Sample} which is bernoulli to the {@link Filter}.
+ *
+ * <p> For example:
+ * <blockquote><pre>{@code
+ * select deptno from "scott".dept tablesample bernoulli(50);
+ * }</pre></blockquote>
+ *
+ * <p> will convert to:
+ * <blockquote><pre>{@code
+ * select deptno from "scott".dept where rand() < 0.5;
+ * }</pre></blockquote>
+ *
+ * <p> The sql:
+ * <blockquote><pre>{@code
+ * select deptno from "scott".dept tablesample bernoulli(50) REPEATABLE(10);
+ * }</pre></blockquote>
+ *
+ * <p> will convert to:
+ * <blockquote><pre>{@code
+ * select deptno from "scott".dept where rand(10) < 0.5;
+ * }</pre></blockquote>
+ *
+ * @see CoreRules#SAMPLE_TO_FILTER
+ */
[email protected]
+public class SampleToFilterRule
+ extends RelRule<SampleToFilterRule.Config>
+ implements TransformationRule {
+
+ protected SampleToFilterRule(final SampleToFilterRule.Config config) {
+ super(config);
+ }
+
+ @Override public void onMatch(final RelOptRuleCall call) {
+ final Sample sample = call.rel(0);
+ final RelBuilder relBuilder = call.builder();
+ relBuilder.push(sample.getInput());
+
+ RexNode randFunc = sample.getSamplingParameters().isRepeatable()
+ ? relBuilder.call(SqlStdOperatorTable.RAND,
+ relBuilder.literal(sample.getSamplingParameters().getRepeatableSeed()))
+ : relBuilder.call(SqlStdOperatorTable.RAND);
+
+ relBuilder.filter(
+ relBuilder.lessThan(randFunc,
+ relBuilder.literal(sample.getSamplingParameters().sampleRate)));
+ call.transformTo(relBuilder.build());
+ }
+
+ /** Rule configuration. */
+ @Value.Immutable
+ public interface Config extends RelRule.Config {
+ SampleToFilterRule.Config DEFAULT = ImmutableSampleToFilterRule.Config.of()
+ .withOperandFor(Sample.class);
+
+ @Override default SampleToFilterRule toRule() {
+ return new SampleToFilterRule(this);
+ }
+
+ /** Defines an operand tree for the given classes. */
+ default SampleToFilterRule.Config withOperandFor(Class<? extends Sample>
sampleClass) {
+ return withOperandSupplier(b ->
+ b.operand(sampleClass)
+ .predicate(sample ->
sample.getSamplingParameters().isBernoulli()).anyInputs())
+ .as(SampleToFilterRule.Config.class);
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index eb46822eb3..dbdde0e7b4 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -262,6 +262,26 @@ class RelOptRulesTest extends RelOptTestBase {
.check();
}
+ /**
+ * Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-5971">[CALCITE-5971]
+ * Add the RelRule to rewrite the bernoulli sample as Filter</a>. */
+ @Test void testSampleToFilter() {
+ final String sql = "select deptno from emp tablesample bernoulli(50)";
+ sql(sql)
+ .withRule(CoreRules.SAMPLE_TO_FILTER)
+ .check();
+ }
+
+ /**
+ * Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-5971">[CALCITE-5971]
+ * Add the RelRule to rewrite the bernoulli sample as Filter</a>. */
+ @Test void testSampleToFilterWithSeed() {
+ final String sql = "select deptno from emp tablesample bernoulli(50)
REPEATABLE(10)";
+ sql(sql)
+ .withRule(CoreRules.SAMPLE_TO_FILTER)
+ .check();
+ }
+
/**
* Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-5813">[CALCITE-5813]
* Type inference for sql functions REPEAT, SPACE, XML_TRANSFORM,
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 86edca1ab2..8ab7d8bed0 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -12706,6 +12706,44 @@ LogicalProject(EXPR$0=[1])
LogicalFilter(condition=[=($1, 'Charlie')])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSampleToFilter">
+ <Resource name="sql">
+ <![CDATA[select deptno from emp tablesample bernoulli(50)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$7])
+ Sample(mode=[bernoulli], rate=[0.5], repeatableSeed=[-])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[<(RAND(), 0.5:DECIMAL(2, 1))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSampleToFilterWithSeed">
+ <Resource name="sql">
+ <![CDATA[select deptno from emp tablesample bernoulli(50)
REPEATABLE(10)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$7])
+ Sample(mode=[bernoulli], rate=[0.5], repeatableSeed=[10])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[<(RAND(10), 0.5:DECIMAL(2, 1))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>