This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 44627042260 [fix](nereids) Use correct PREAGGREGATION in
agg(filter(scan)) (#33454)
44627042260 is described below
commit 44627042260573b9a3cb58ca930aae94d3a7e8e9
Author: Lijia Liu <[email protected]>
AuthorDate: Mon Apr 15 22:26:29 2024 +0800
[fix](nereids) Use correct PREAGGREGATION in agg(filter(scan)) (#33454)
1. set `PreAggStatus` to `ON` when agg key column by max or min;
2. #28747 may change `PreAggStatus` of scan, inherit it from the previous
one.
---
.../mv/SelectMaterializedIndexWithAggregate.java | 16 ++++---
.../doris/nereids/trees/plans/PreAggStatus.java | 1 +
.../trees/plans/logical/LogicalOlapScan.java | 12 ++++++
.../org/apache/doris/planner/OlapScanNode.java | 7 +++-
.../rules/rewrite/mv/SelectRollupIndexTest.java | 34 +++++++++++++--
.../explain/test_scan_preaggregation_explain.out | 7 ++++
.../test_scan_preaggregation_explain.groovy | 49 ++++++++++++++++++++++
7 files changed, 114 insertions(+), 12 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
index 24c04937bbe..c84c5212a5b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
@@ -262,6 +262,9 @@ public class SelectMaterializedIndexWithAggregate extends
AbstractSelectMaterial
filter.getExpressions(),
project.getExpressions()
))
);
+ if (mvPlanWithoutAgg.getSelectedIndexId() ==
result.indexId) {
+ mvPlanWithoutAgg =
mvPlanWithoutAgg.withPreAggStatus(result.preAggStatus);
+ }
SlotContext slotContextWithoutAgg =
generateBaseScanExprToMvExpr(mvPlanWithoutAgg);
return agg.withChildren(new LogicalProject(
@@ -745,22 +748,22 @@ public class SelectMaterializedIndexWithAggregate extends
AbstractSelectMaterial
@Override
public PreAggStatus visitAggregateFunction(AggregateFunction
aggregateFunction, CheckContext context) {
- return checkAggFunc(aggregateFunction, AggregateType.NONE,
context);
+ return checkAggFunc(aggregateFunction, AggregateType.NONE,
context, false);
}
@Override
public PreAggStatus visitMax(Max max, CheckContext context) {
- return checkAggFunc(max, AggregateType.MAX, context);
+ return checkAggFunc(max, AggregateType.MAX, context, true);
}
@Override
public PreAggStatus visitMin(Min min, CheckContext context) {
- return checkAggFunc(min, AggregateType.MIN, context);
+ return checkAggFunc(min, AggregateType.MIN, context, true);
}
@Override
public PreAggStatus visitSum(Sum sum, CheckContext context) {
- return checkAggFunc(sum, AggregateType.SUM, context);
+ return checkAggFunc(sum, AggregateType.SUM, context, false);
}
@Override
@@ -829,7 +832,8 @@ public class SelectMaterializedIndexWithAggregate extends
AbstractSelectMaterial
private PreAggStatus checkAggFunc(
AggregateFunction aggFunc,
AggregateType matchingAggType,
- CheckContext ctx) {
+ CheckContext ctx,
+ boolean canUseKeyColumn) {
String childNameWithFuncName = ctx.isBaseIndex()
? normalizeName(aggFunc.child(0).toSql())
: normalizeName(CreateMaterializedViewStmt.mvColumnBuilder(
@@ -837,7 +841,7 @@ public class SelectMaterializedIndexWithAggregate extends
AbstractSelectMaterial
boolean contains = containsAllColumn(aggFunc.child(0),
ctx.keyNameToColumn.keySet());
if (contains ||
ctx.keyNameToColumn.containsKey(childNameWithFuncName)) {
- if (ctx.isDupKeysOrMergeOnWrite || (!ctx.isBaseIndex() &&
contains)) {
+ if (canUseKeyColumn || ctx.isDupKeysOrMergeOnWrite ||
(!ctx.isBaseIndex() && contains)) {
return PreAggStatus.on();
} else {
Column column =
ctx.keyNameToColumn.get(childNameWithFuncName);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PreAggStatus.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PreAggStatus.java
index 70d2a90db9b..7affac49b2b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PreAggStatus.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PreAggStatus.java
@@ -21,6 +21,7 @@ import java.util.function.Supplier;
/**
* Pre-aggregate status for OLAP scan table.
+ * @see org.apache.doris.planner.OlapScanNode#isPreAggregation
*/
public class PreAggStatus {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
index d83a2f59f7f..c69a0b4dcf8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
@@ -42,6 +42,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.tuple.Pair;
+import org.json.JSONObject;
import java.util.Arrays;
import java.util.List;
@@ -429,4 +430,15 @@ public class LogicalOlapScan extends
LogicalCatalogRelation implements OlapScan
}
return (List) Arrays.asList(slots);
}
+
+ @Override
+ public JSONObject toJson() {
+ JSONObject olapScan = super.toJson();
+ JSONObject properties = new JSONObject();
+ properties.put("OlapTable", table.getName());
+ properties.put("SelectedIndexId", Long.toString(selectedIndexId));
+ properties.put("PreAggStatus", preAggStatus.toString());
+ olapScan.put("Properties", properties);
+ return olapScan;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 61ffc770ae3..bccd8a4962d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -121,7 +121,8 @@ public class OlapScanNode extends ScanNode {
* When the field value is ON, the storage engine can return the data
directly
* without pre-aggregation.
* When the field value is OFF, the storage engine needs to aggregate the
data
- * before returning to scan node.
+ * before returning to scan node. And if the table is an aggregation table,
+ * all key columns need to be read an participate in aggregation.
* For example:
* Aggregate table: k1, k2, v1 sum
* Field value is ON
@@ -135,7 +136,9 @@ public class OlapScanNode extends ScanNode {
* Query2: select k1, min(v1) from table group by k1
* This aggregation function in query is min which different from the
schema.
* So the data stored in storage engine need to be merged firstly before
- * returning to scan node.
+ * returning to scan node. Although we only queried key column k1, key
column
+ * k2 still needs to be detected and participate in aggregation to ensure
the
+ * results are correct.
*
* There are currently two places to modify this variable:
* 1. The turnOffPreAgg() method of SingleNodePlanner.
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectRollupIndexTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectRollupIndexTest.java
index 706be618f98..0686edba64e 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectRollupIndexTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectRollupIndexTest.java
@@ -263,11 +263,10 @@ class SelectRollupIndexTest extends
BaseMaterializedIndexSelectTest implements M
}));
}
- @Disabled("reopen it if we fix rollup select bugs")
@Test
public void testMaxCanUseKeyColumn() {
PlanChecker.from(connectContext)
- .analyze("select k2, max(k3) from t group by k3")
+ .analyze("select k2, max(k3) from t group by k2")
.applyTopDown(new SelectMaterializedIndexWithAggregate())
.applyTopDown(new SelectMaterializedIndexWithoutAggregate())
.matches(logicalOlapScan().when(scan -> {
@@ -278,11 +277,10 @@ class SelectRollupIndexTest extends
BaseMaterializedIndexSelectTest implements M
}));
}
- @Disabled("reopen it if we fix rollup select bugs")
@Test
public void testMinCanUseKeyColumn() {
PlanChecker.from(connectContext)
- .analyze("select k2, min(k3) from t group by k3")
+ .analyze("select k2, min(k3) from t group by k2")
.applyTopDown(new SelectMaterializedIndexWithAggregate())
.applyTopDown(new SelectMaterializedIndexWithoutAggregate())
.matches(logicalOlapScan().when(scan -> {
@@ -293,6 +291,34 @@ class SelectRollupIndexTest extends
BaseMaterializedIndexSelectTest implements M
}));
}
+ @Test
+ public void testMinMaxCanUseKeyColumnWithBaseTable() {
+ PlanChecker.from(connectContext)
+ .analyze("select k1, min(k2), max(k2) from t group by k1")
+ .applyTopDown(new SelectMaterializedIndexWithAggregate())
+ .applyTopDown(new SelectMaterializedIndexWithoutAggregate())
+ .matches(logicalOlapScan().when(scan -> {
+ PreAggStatus preAgg = scan.getPreAggStatus();
+ Assertions.assertTrue(preAgg.isOn());
+ Assertions.assertEquals("t",
scan.getSelectedMaterializedIndexName().get());
+ return true;
+ }));
+ }
+
+ @Test
+ public void testFilterAggWithBaseTable() {
+ PlanChecker.from(connectContext)
+ .analyze("select k1 from t where k1 = 0 group by k1")
+ .applyTopDown(new SelectMaterializedIndexWithAggregate())
+ .applyTopDown(new SelectMaterializedIndexWithoutAggregate())
+ .matches(logicalOlapScan().when(scan -> {
+ PreAggStatus preAgg = scan.getPreAggStatus();
+ Assertions.assertTrue(preAgg.isOn());
+ Assertions.assertEquals("t",
scan.getSelectedMaterializedIndexName().get());
+ return true;
+ }));
+ }
+
@Test
public void testDuplicatePreAggOn() {
PlanChecker.from(connectContext)
diff --git
a/regression-test/data/nereids_p0/explain/test_scan_preaggregation_explain.out
b/regression-test/data/nereids_p0/explain/test_scan_preaggregation_explain.out
new file mode 100644
index 00000000000..bbabaedc69e
--- /dev/null
+++
b/regression-test/data/nereids_p0/explain/test_scan_preaggregation_explain.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !right_when_preagg_on --
+1 2 6
+
+-- !right_when_preagg_off --
+1 7
+
diff --git
a/regression-test/suites/nereids_p0/explain/test_scan_preaggregation_explain.groovy
b/regression-test/suites/nereids_p0/explain/test_scan_preaggregation_explain.groovy
new file mode 100644
index 00000000000..9754d08f55b
--- /dev/null
+++
b/regression-test/suites/nereids_p0/explain/test_scan_preaggregation_explain.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_scan_preaggregation_explain") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+ sql "use nereids_test_query_db"
+
+ sql "DROP TABLE IF EXISTS test_scan_preaggregation"
+ sql """ CREATE TABLE `test_scan_preaggregation` (
+ `k1` int(11) NULL,
+ `k2` int(11) NULL,
+ `k3` int(11) NULL,
+ `v1` int(11) SUM NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`k1`, `k2`, `k3`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false"
+ ); """
+ sql """ insert into test_scan_preaggregation values
+ (1, 2, 3, 4),
+ (1, 5, 6, 7); """
+ explain {
+ sql("select k1, min(k2), max(k3) from test_scan_preaggregation where
k1 = 0 group by k1;")
+ contains "PREAGGREGATION: ON"
+ }
+
+ qt_right_when_preagg_on "select k1, min(k2), max(k3) from
test_scan_preaggregation where k1 = 1 group by k1;"
+ qt_right_when_preagg_off "select k1, sum(k2) from test_scan_preaggregation
group by k1;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]