This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 85b33ad2d2e [Fix](variant) filter with variant access may lead to to
parition/tablet prune fall through (#32560)
85b33ad2d2e is described below
commit 85b33ad2d2e659d8f07168bd70d1b6f02a5033cf
Author: lihangyu <[email protected]>
AuthorDate: Fri Mar 22 14:33:13 2024 +0800
[Fix](variant) filter with variant access may lead to to parition/tablet
prune fall through (#32560)
Query like `select * from ut_p partitions(p2) where cast(var['a'] as int)
> 0` will fall through parition/tablet prunning since it's plan like
```
mysql> explain analyzed plan select * from ut_p where id = 3 and
cast(var['a'] as int) = 789;
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Explain String(Nereids Planner)
|
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| LogicalResultSink[26] ( outputExprs=[id#0, var#1] )
|
| +--LogicalProject[25] ( distinct=false, projects=[id#0, var#1],
excepts=[] )
|
| +--LogicalFilter[24] ( predicates=((cast(var#4 as INT) = 789) AND
(id#0 = 3)) )
|
| +--LogicalFilter[23] ( predicates=(0 = __DORIS_DELETE_SIGN__#2) )
|
| +--LogicalProject[22] ( distinct=false, projects=[id#0, var#1,
__DORIS_DELETE_SIGN__#2, __DORIS_VERSION_COL__#3, element_at(var#1, 'a') AS
`var`#4], excepts=[] ) |
| +--LogicalOlapScan (
qualified=regression_test_variant_p0.ut_p, indexName=<index_not_selected>,
selectedIndexId=10145, preAgg=ON ) |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
6 rows in set (0.01 sec)
```
with an extra LogicalProject on top of LogicalOlapScan, so we should handle
such case to prune parition/tablet
---
.../org/apache/doris/nereids/rules/RuleType.java | 3 +
.../rules/rewrite/PruneOlapScanPartition.java | 114 +++++++++-------
.../nereids/rules/rewrite/PruneOlapScanTablet.java | 73 ++++++----
.../data/variant_p0/select_partition.out | 29 ++++
.../suites/variant_p0/select_partition.groovy | 152 +++++++++++++++++++++
5 files changed, 297 insertions(+), 74 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index cc66c27fbcc..aa38af04ec5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -241,11 +241,14 @@ public enum RuleType {
MATERIALIZED_INDEX_FILTER_PROJECT_SCAN(RuleTypeClass.REWRITE),
OLAP_SCAN_PARTITION_PRUNE(RuleTypeClass.REWRITE),
+
+ OLAP_SCAN_WITH_PROJECT_PARTITION_PRUNE(RuleTypeClass.REWRITE),
FILE_SCAN_PARTITION_PRUNE(RuleTypeClass.REWRITE),
PUSH_CONJUNCTS_INTO_JDBC_SCAN(RuleTypeClass.REWRITE),
PUSH_CONJUNCTS_INTO_ODBC_SCAN(RuleTypeClass.REWRITE),
PUSH_CONJUNCTS_INTO_ES_SCAN(RuleTypeClass.REWRITE),
OLAP_SCAN_TABLET_PRUNE(RuleTypeClass.REWRITE),
+ OLAP_SCAN_WITH_PROJECT_TABLET_PRUNE(RuleTypeClass.REWRITE),
PUSH_AGGREGATE_TO_OLAP_SCAN(RuleTypeClass.REWRITE),
EXTRACT_SINGLE_TABLE_EXPRESSION_FROM_DISJUNCTION(RuleTypeClass.REWRITE),
HIDE_ONE_ROW_RELATION_UNDER_UNION(RuleTypeClass.REWRITE),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
index 816dc4b645f..60df874f2a1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
@@ -21,17 +21,21 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner;
import
org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType;
import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import java.util.ArrayList;
@@ -45,59 +49,75 @@ import java.util.stream.Collectors;
* Used to prune partition of olap scan, should execute after
SwapProjectAndFilter, MergeConsecutiveFilters,
* MergeConsecutiveProjects and all predicate push down related rules.
*/
-public class PruneOlapScanPartition extends OneRewriteRuleFactory {
+public class PruneOlapScanPartition implements RewriteRuleFactory {
+ private <T extends Plan> Plan prunePartitions(CascadesContext ctx,
+ LogicalOlapScan scan, LogicalFilter<T> originalFilter) {
+ OlapTable table = scan.getTable();
+ Set<String> partitionColumnNameSet =
Utils.execWithReturnVal(table::getPartitionColumnNames);
+ if (partitionColumnNameSet.isEmpty()) {
+ return originalFilter;
+ }
- @Override
- public Rule build() {
- return logicalFilter(logicalOlapScan()).when(p ->
!p.child().isPartitionPruned()).thenApply(ctx -> {
- LogicalFilter<LogicalOlapScan> filter = ctx.root;
- LogicalOlapScan scan = filter.child();
- OlapTable table = scan.getTable();
- Set<String> partitionColumnNameSet =
Utils.execWithReturnVal(table::getPartitionColumnNames);
- if (partitionColumnNameSet.isEmpty()) {
- return filter;
- }
+ List<Slot> output = scan.getOutput();
+ Map<String, Slot> scanOutput =
Maps.newHashMapWithExpectedSize(output.size() * 2);
+ for (Slot slot : output) {
+ scanOutput.put(slot.getName().toLowerCase(), slot);
+ }
- List<Slot> output = scan.getOutput();
- Map<String, Slot> scanOutput =
Maps.newHashMapWithExpectedSize(output.size() * 2);
- for (Slot slot : output) {
- scanOutput.put(slot.getName().toLowerCase(), slot);
+ PartitionInfo partitionInfo = table.getPartitionInfo();
+ List<Column> partitionColumns = partitionInfo.getPartitionColumns();
+ List<Slot> partitionSlots = new ArrayList<>(partitionColumns.size());
+ for (Column column : partitionColumns) {
+ Slot slot = scanOutput.get(column.getName().toLowerCase());
+ if (slot == null) {
+ return originalFilter;
+ } else {
+ partitionSlots.add(slot);
}
+ }
- PartitionInfo partitionInfo = table.getPartitionInfo();
- List<Column> partitionColumns =
partitionInfo.getPartitionColumns();
- List<Slot> partitionSlots = new
ArrayList<>(partitionColumns.size());
- for (Column column : partitionColumns) {
- Slot slot = scanOutput.get(column.getName().toLowerCase());
- if (slot == null) {
- return filter;
- } else {
- partitionSlots.add(slot);
- }
- }
+ List<Long> manuallySpecifiedPartitions =
scan.getManuallySpecifiedPartitions();
- List<Long> manuallySpecifiedPartitions =
scan.getManuallySpecifiedPartitions();
+ Map<Long, PartitionItem> idToPartitions;
+ if (manuallySpecifiedPartitions.isEmpty()) {
+ idToPartitions = partitionInfo.getIdToItem(false);
+ } else {
+ Map<Long, PartitionItem> allPartitions =
partitionInfo.getAllPartitions();
+ idToPartitions = allPartitions.keySet().stream()
+ .filter(id -> manuallySpecifiedPartitions.contains(id))
+ .collect(Collectors.toMap(Function.identity(), id ->
allPartitions.get(id)));
+ }
+ List<Long> prunedPartitions = PartitionPruner.prune(
+ partitionSlots, originalFilter.getPredicate(), idToPartitions,
ctx,
+ PartitionTableType.OLAP);
+ if (prunedPartitions.isEmpty()) {
+ return new LogicalEmptyRelation(
+
ConnectContext.get().getStatementContext().getNextRelationId(),
+ originalFilter.getOutput());
+ }
- Map<Long, PartitionItem> idToPartitions;
- if (manuallySpecifiedPartitions.isEmpty()) {
- idToPartitions = partitionInfo.getIdToItem(false);
- } else {
- Map<Long, PartitionItem> allPartitions =
partitionInfo.getAllPartitions();
- idToPartitions = allPartitions.keySet().stream()
- .filter(id -> manuallySpecifiedPartitions.contains(id))
- .collect(Collectors.toMap(Function.identity(), id ->
allPartitions.get(id)));
- }
- List<Long> prunedPartitions = PartitionPruner.prune(
- partitionSlots, filter.getPredicate(), idToPartitions,
ctx.cascadesContext,
- PartitionTableType.OLAP);
+ LogicalOlapScan rewrittenScan =
scan.withSelectedPartitionIds(prunedPartitions);
+ if (originalFilter.child() instanceof LogicalProject) {
+ LogicalProject<LogicalOlapScan> rewrittenProject
+ = (LogicalProject<LogicalOlapScan>)
originalFilter.child()
+
.withChildren(ImmutableList.of(rewrittenScan));
+ return new LogicalFilter<>(originalFilter.getConjuncts(),
rewrittenProject);
+ }
+ return originalFilter.withChildren(ImmutableList.of(rewrittenScan));
+ }
- if (prunedPartitions.isEmpty()) {
- return new LogicalEmptyRelation(
-
ConnectContext.get().getStatementContext().getNextRelationId(),
- filter.getOutput());
- }
- LogicalOlapScan rewrittenScan =
scan.withSelectedPartitionIds(prunedPartitions);
- return new LogicalFilter<>(filter.getConjuncts(), rewrittenScan);
- }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE);
+ @Override
+ public List<Rule> buildRules() {
+ return ImmutableList.of(
+ logicalFilter(logicalOlapScan()).when(p ->
!p.child().isPartitionPruned()).thenApply(ctx -> {
+ return prunePartitions(ctx.cascadesContext,
ctx.root.child(), ctx.root);
+ }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE),
+
+ logicalFilter(logicalProject(logicalOlapScan()))
+ .when(p -> !p.child().child().isPartitionPruned())
+ .when(p ->
p.child().hasPushedDownToProjectionFunctions()).thenApply(ctx -> {
+ return prunePartitions(ctx.cascadesContext,
ctx.root.child().child(), ctx.root);
+
}).toRule(RuleType.OLAP_SCAN_WITH_PROJECT_PARTITION_PRUNE)
+ );
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
index e2981635605..c4468ca8120 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
@@ -27,7 +27,10 @@ import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import
org.apache.doris.nereids.trees.expressions.visitor.ExpressionColumnFilterConverter;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.planner.HashDistributionPruner;
import org.apache.doris.planner.PartitionColumnFilter;
@@ -46,34 +49,50 @@ import java.util.Set;
/**
* prune bucket
*/
-public class PruneOlapScanTablet extends OneRewriteRuleFactory {
-
+public class PruneOlapScanTablet implements RewriteRuleFactory {
@Override
- public Rule build() {
- return logicalFilter(logicalOlapScan())
- .then(filter -> {
- LogicalOlapScan olapScan = filter.child();
- OlapTable table = olapScan.getTable();
- Builder<Long> selectedTabletIdsBuilder =
ImmutableList.builder();
- if (olapScan.getSelectedTabletIds().isEmpty()) {
- for (Long id : olapScan.getSelectedPartitionIds()) {
- Partition partition = table.getPartition(id);
- MaterializedIndex index =
partition.getIndex(olapScan.getSelectedIndexId());
- selectedTabletIdsBuilder
-
.addAll(getSelectedTabletIds(filter.getConjuncts(), index,
- olapScan.getSelectedIndexId() ==
olapScan.getTable()
- .getBaseIndexId(),
- partition.getDistributionInfo()));
- }
- } else {
-
selectedTabletIdsBuilder.addAll(olapScan.getSelectedTabletIds());
- }
- List<Long> selectedTabletIds =
selectedTabletIdsBuilder.build();
- if (new HashSet(selectedTabletIds).equals(new
HashSet(olapScan.getSelectedTabletIds()))) {
- return null;
- }
- return
filter.withChildren(olapScan.withSelectedTabletIds(selectedTabletIds));
- }).toRule(RuleType.OLAP_SCAN_TABLET_PRUNE);
+ public List<Rule> buildRules() {
+ return ImmutableList.of(
+ logicalFilter(logicalOlapScan())
+ .then(filter -> {
+ return pruneTablets(filter.child(), filter);
+ }).toRule(RuleType.OLAP_SCAN_TABLET_PRUNE),
+
+ logicalFilter(logicalProject(logicalOlapScan()))
+ .when(p ->
p.child().hasPushedDownToProjectionFunctions()).then(filter -> {
+ return pruneTablets(filter.child().child(),
filter);
+ }).toRule(RuleType.OLAP_SCAN_WITH_PROJECT_TABLET_PRUNE)
+ );
+ }
+
+ private <T extends Plan> Plan pruneTablets(LogicalOlapScan olapScan,
LogicalFilter<T> originalFilter) {
+ OlapTable table = olapScan.getTable();
+ Builder<Long> selectedTabletIdsBuilder = ImmutableList.builder();
+ if (olapScan.getSelectedTabletIds().isEmpty()) {
+ for (Long id : olapScan.getSelectedPartitionIds()) {
+ Partition partition = table.getPartition(id);
+ MaterializedIndex index =
partition.getIndex(olapScan.getSelectedIndexId());
+ selectedTabletIdsBuilder
+
.addAll(getSelectedTabletIds(originalFilter.getConjuncts(), index,
+ olapScan.getSelectedIndexId() ==
olapScan.getTable()
+ .getBaseIndexId(),
+ partition.getDistributionInfo()));
+ }
+ } else {
+ selectedTabletIdsBuilder.addAll(olapScan.getSelectedTabletIds());
+ }
+ List<Long> selectedTabletIds = selectedTabletIdsBuilder.build();
+ if (new HashSet(selectedTabletIds).equals(new
HashSet(olapScan.getSelectedTabletIds()))) {
+ return null;
+ }
+ LogicalOlapScan rewrittenScan =
olapScan.withSelectedTabletIds(selectedTabletIds);
+ if (originalFilter.child() instanceof LogicalProject) {
+ LogicalProject<LogicalOlapScan> rewrittenProject
+ = (LogicalProject<LogicalOlapScan>) originalFilter.child()
+ .withChildren(ImmutableList.of(rewrittenScan));
+ return new LogicalFilter<>(originalFilter.getConjuncts(),
rewrittenProject);
+ }
+ return originalFilter.withChildren(rewrittenScan);
}
private Collection<Long> getSelectedTabletIds(Set<Expression> expressions,
diff --git a/regression-test/data/variant_p0/select_partition.out
b/regression-test/data/variant_p0/select_partition.out
new file mode 100644
index 00000000000..939cddc82c4
--- /dev/null
+++ b/regression-test/data/variant_p0/select_partition.out
@@ -0,0 +1,29 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 {"a":1}
+
+-- !sql --
+7 {"a":2}
+
+-- !sql --
+1 {"a":1}
+7 {"a":2}
+
+-- !sql --
+7 {"a":2}
+
+-- !sql --
+16 {"a":3}
+
+-- !sql --
+16 {"a":3}
+
+-- !sql --
+16 {"a":3}
+
+-- !sql --
+1 {"a":1}
+
+-- !sql --
+6 {"x":"123"}
+
diff --git a/regression-test/suites/variant_p0/select_partition.groovy
b/regression-test/suites/variant_p0/select_partition.groovy
new file mode 100644
index 00000000000..a057e3b9a1d
--- /dev/null
+++ b/regression-test/suites/variant_p0/select_partition.groovy
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("query_on_specific_partition") {
+ sql "SET enable_nereids_planner=true"
+
+ sql """
+ DROP TABLE IF EXISTS t_p;
+ """
+
+ sql """
+ CREATE TABLE t_p (
+ id BIGINT,
+ var VARIANT
+ ) DUPLICATE KEY(`id`)
+ PARTITION BY RANGE(`id`)
+ (
+ PARTITION `p1` VALUES LESS THAN ('5'),
+ PARTITION `p2` VALUES LESS THAN ('10')
+ )
+ DISTRIBUTED BY HASH(`id`) BUCKETS 3
+ PROPERTIES (
+ "replication_num"="1"
+ );
+ """
+
+ sql """ALTER TABLE t_p ADD TEMPORARY PARTITION tp1 VALUES [("15"),
("20"));"""
+
+ sql """INSERT INTO t_p VALUES(1, '{"a" : 1}')"""
+ sql """INSERT INTO t_p VALUES(7, '{"a" : 2}')"""
+ sql """INSERT INTO t_p TEMPORARY PARTITION(tp1) values(16, '{"a" : 3}');"""
+
+ sql "SET enable_fallback_to_original_planner=false"
+
+ qt_sql "SELECT * FROM t_p PARTITION p1"
+
+ qt_sql "SELECT * FROM t_p PARTITION p2"
+
+ order_qt_sql "SELECT * FROM t_p PARTITIONS (p2, p1)"
+
+ order_qt_sql "SELECT * FROM t_p PARTITIONS (p2, p1) WHERE id > 1 and
cast(var['a'] as int) > 0"
+
+ qt_sql """select * from t_p temporary partition(tp1);"""
+
+ qt_sql """select * from t_p temporary partitions(tp1);"""
+
+ qt_sql """select * from t_p temporary partition tp1;"""
+
+ sql """
+ CREATE TABLE IF NOT EXISTS test_iot (
+ `test_int` int NOT NULL,
+ `test_var` variant NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`test_int`)
+ PARTITION BY LIST (`test_int`)
+ (
+ PARTITION p1 VALUES IN ("1","2","3"),
+ PARTITION p2 VALUES IN ("4","5","6")
+ )
+ DISTRIBUTED BY HASH(`test_int`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2"
+ )
+ """
+
+ sql """
+ INSERT INTO test_iot VALUES(1,'{"a" : 1}'),(4, '{"a" : 2}');
+ """
+
+ qt_sql """
+ SELECT * FROM test_iot PARTITION p1;
+ """
+
+// temporary partition test
+ sql """
+ DROP TABLE IF EXISTS ut_p;
+ """
+
+ sql """
+ CREATE TABLE ut_p (
+ id BIGINT,
+ var VARIANT
+ ) unique KEY(`id`)
+ PARTITION BY RANGE(`id`)
+ (
+ PARTITION `p1` VALUES LESS THAN ('5'),
+ PARTITION `p2` VALUES LESS THAN ('10')
+ )
+ DISTRIBUTED BY HASH(`id`) BUCKETS 3
+ PROPERTIES (
+ "replication_num"="1"
+ );
+ """
+
+ sql """ALTER TABLE ut_p ADD TEMPORARY PARTITION tp1 VALUES [("5"),
("7"));"""
+
+ sql """INSERT INTO ut_p TEMPORARY PARTITION(tp1) values(6, '{"x" :
"123"}');"""
+ sql """INSERT INTO ut_p values(6, '{"x" : 456}');"""
+ sql """INSERT INTO ut_p values(3, '{"x" : 789}');"""
+
+ sql "set enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ qt_sql """select * from ut_p temporary partitions(tp1);"""
+
+ explain {
+ sql "select * from ut_p temporary partitions(tp1);"
+ contains "partitions=1/2 (tp1)"
+ }
+
+ explain {
+ sql """select * from ut_p temporary partitions(tp1) where
cast(var['a'] as int) > 0"""
+ contains "partitions=1/2 (tp1)"
+ }
+
+ explain {
+ sql "select * from ut_p temporary partitions(tp1) where cast(var['a']
as int) > 0"
+ contains "partitions=1/2 (tp1)"
+ }
+
+ explain {
+ sql "select * from ut_p partitions(p2) where cast(var['a'] as int) >
0"
+ contains "partitions=1/2 (p2)"
+ }
+
+ explain {
+ sql "select * from ut_p temporary partitions(tp1) where id = 8"
+ contains "VEMPTYSET"
+ }
+
+ explain {
+ sql "select * from ut_p where id = 3 and cast(var['a'] as int) = 789"
+ contains "partitions=1/2 (p1)"
+ contains "tablets=1/3"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]