This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 948861b4828 [feat](nereids) adjust min/max for partition key (#41729)
948861b4828 is described below
commit 948861b4828c36adc9082cebf6072dbff4db06fd
Author: minghong <[email protected]>
AuthorDate: Mon Oct 21 11:51:48 2024 +0800
[feat](nereids) adjust min/max for partition key (#41729)
## Proposed changes
after partition prune, adjust the min/max of partition key
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/stats/StatsCalculator.java | 131 +++++++++++++++++++++
.../doris/regression/action/ExplainAction.groovy | 17 +++
.../nereids_p0/stats/partition_key_minmax.groovy | 67 +++++++++++
3 files changed, 215 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 3c70d4cd518..e9d2d0a51d9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,11 +18,18 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.MTMV;
import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionKey;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
@@ -141,11 +148,13 @@ import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.TableStatsMeta;
+import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -483,6 +492,9 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
});
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache =
getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
+ if (slot.getColumn().isPresent()) {
+ cache = updateMinMaxForPartitionKey(olapTable,
selectedPartitionNames, slot, cache);
+ }
ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache,
selectedPartitionsRowCount);
colStatsBuilder.normalizeAvgSizeByte(slot);
@@ -508,6 +520,125 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
return builder.build();
}
+ private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ if (olapTable.getPartitionType() == PartitionType.LIST) {
+ cache = updateMinMaxForListPartitionKey(olapTable,
selectedPartitionNames, slot, cache);
+ } else if (olapTable.getPartitionType() == PartitionType.RANGE) {
+ cache = updateMinMaxForTheFirstRangePartitionKey(olapTable,
selectedPartitionNames, slot, cache);
+ }
+ return cache;
+ }
+
+ private double convertLegacyLiteralToDouble(LiteralExpr literal) throws
AnalysisException {
+ return StatisticsUtil.convertToDouble(literal.getType(),
literal.getStringValue());
+ }
+
+ private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable
olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ int partitionColumnIdx =
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+ if (partitionColumnIdx != -1) {
+ try {
+ LiteralExpr minExpr = null;
+ LiteralExpr maxExpr = null;
+ double minValue = 0;
+ double maxValue = 0;
+ for (String selectedPartitionName : selectedPartitionNames) {
+ PartitionItem item =
olapTable.getPartitionItemOrAnalysisException(
+ selectedPartitionName);
+ if (item instanceof ListPartitionItem) {
+ ListPartitionItem lp = (ListPartitionItem) item;
+ for (PartitionKey key : lp.getItems()) {
+ if (minExpr == null) {
+ minExpr =
key.getKeys().get(partitionColumnIdx);
+ minValue =
convertLegacyLiteralToDouble(minExpr);
+ maxExpr =
key.getKeys().get(partitionColumnIdx);
+ maxValue =
convertLegacyLiteralToDouble(maxExpr);
+ } else {
+ double current =
convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
+ if (current > maxValue) {
+ maxValue = current;
+ maxExpr =
key.getKeys().get(partitionColumnIdx);
+ } else if (current < minValue) {
+ minValue = current;
+ minExpr =
key.getKeys().get(partitionColumnIdx);
+ }
+ }
+ }
+ }
+ }
+ if (minExpr != null) {
+ cache = new ColumnStatisticBuilder(cache)
+ .setMinExpr(minExpr)
+ .setMinValue(minValue)
+ .setMaxExpr(maxExpr)
+ .setMaxValue(maxValue)
+ .build();
+ }
+ } catch (AnalysisException e) {
+ LOG.debug(e.getMessage());
+ }
+ }
+ return cache;
+ }
+
+ private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable
olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ int partitionColumnIdx =
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+ // for multi partition keys, only the first partition key need to
adjust min/max
+ if (partitionColumnIdx == 0) {
+ // update partition column min/max by partition info
+ try {
+ LiteralExpr minExpr = null;
+ LiteralExpr maxExpr = null;
+ double minValue = 0;
+ double maxValue = 0;
+ for (String selectedPartitionName : selectedPartitionNames) {
+ PartitionItem item =
olapTable.getPartitionItemOrAnalysisException(
+ selectedPartitionName);
+ if (item instanceof RangePartitionItem) {
+ RangePartitionItem ri = (RangePartitionItem) item;
+ Range<PartitionKey> range = ri.getItems();
+ PartitionKey upper = range.upperEndpoint();
+ PartitionKey lower = range.lowerEndpoint();
+ if (maxExpr == null) {
+ maxExpr = upper.getKeys().get(partitionColumnIdx);
+ maxValue = convertLegacyLiteralToDouble(maxExpr);
+ minExpr = lower.getKeys().get(partitionColumnIdx);
+ minValue = convertLegacyLiteralToDouble(minExpr);
+ } else {
+ double currentValue =
convertLegacyLiteralToDouble(upper.getKeys()
+ .get(partitionColumnIdx));
+ if (currentValue > maxValue) {
+ maxValue = currentValue;
+ maxExpr =
upper.getKeys().get(partitionColumnIdx);
+ }
+ currentValue =
convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
+ if (currentValue < minValue) {
+ minValue = currentValue;
+ minExpr =
lower.getKeys().get(partitionColumnIdx);
+ }
+ }
+ }
+ }
+ if (minExpr != null) {
+ cache = new ColumnStatisticBuilder(cache)
+ .setMinExpr(minExpr)
+ .setMinValue(minValue)
+ .setMaxExpr(maxExpr)
+ .setMaxValue(maxValue)
+ .build();
+ }
+ } catch (AnalysisException e) {
+ LOG.debug(e.getMessage());
+ }
+ }
+ return cache;
+ }
+
@Override
public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void
context) {
return computeOlapScan(olapScan);
diff --git
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
index f7a28c85acd..9fc19c91a4a 100644
---
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
+++
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
@@ -31,6 +31,7 @@ class ExplainAction implements SuiteAction {
private boolean verbose = false
private SuiteContext context
private Set<String> containsStrings = new LinkedHashSet<>()
+ private Set<String> containsAnyStrings = new LinkedHashSet<>()
private Set<String> notContainsStrings = new LinkedHashSet<>()
private Map<String, Integer> multiContainsStrings = new HashMap<>()
private String coonType
@@ -57,6 +58,10 @@ class ExplainAction implements SuiteAction {
containsStrings.add(subString)
}
+ void containsAny(String subString) {
+ containsAnyStrings.add(subString)
+ }
+
void multiContains(String subString, int n) {
multiContainsStrings.put(subString, n);
}
@@ -124,6 +129,18 @@ class ExplainAction implements SuiteAction {
throw t
}
}
+ boolean any = false;
+ for (String string : containsAnyStrings) {
+ if (explainString.contains(string)) {
+ any = true;
+ }
+ }
+ if (!containsAnyStrings.isEmpty() && !any) {
+ String msg = ("Explain and check failed, expect contains
any '${containsAnyStrings}',"
+ + " but actual explain string
is:\n${explainString}").toString()
+ def t = new IllegalStateException(msg)
+ throw t
+ }
}
}
diff --git
a/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy
b/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy
new file mode 100644
index 00000000000..936df92102e
--- /dev/null
+++ b/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("partition_key_minmax") {
+ sql """
+ drop table if exists rangetable;
+ create table rangetable (a int,
+ b int,
+ c int)
+ partition by range (a, b)
+ (partition p1 values [("1", "2"), ("10", "20")),
+ partition p2 values [("20", "100"), ("30", "200")),
+ partition p3 values [("300", "-1"), ("400", "1000"))
+ )
+ distributed by hash(a) properties("replication_num"="1");
+
+ insert into rangetable values (5, 3, 0), (22, 150, 1), (333, 1, 2),(6,
1, 3);
+
+ analyze table rangetable with sync;
+ """
+ explain {
+ sql """memo plan
+ select * from rangetable where a < 250;
+ """
+ containsAny("a#0 -> ndv=3.0000, min=1.000000(1), max=30.000000(30),
count=3.0000")
+ containsAny("a#0 -> ndv=4.0000, min=5.000000(5), max=333.000000(333),
count=4.0000")
+ }
+
+ sql """
+ drop table if exists listtable;
+ create table listtable(id int, city varchar(20), value int)
+ PARTITION BY LIST(id, city)
+ (
+ PARTITION p1_city VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
+ PARTITION p2_city VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
+ PARTITION p3_city VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
+ )
+ distributed by hash(id) properties("replication_num"="1");
+
+ insert into listtable values (1, "Beijing", 0), (2, "Beijing", 0), (3,
"Beijing", 0);
+
+ analyze table listtable with sync;
+ """
+
+ explain {
+ sql """
+ memo plan select * from listtable where id >=3;
+ """
+ containsAny("id#0 -> ndv=1.0000, min=3.000000(3), max=3.000000(3),
count=1.0000,")
+ containsAny("id#0 -> ndv=3.0000, min=1.000000(1), max=3.000000(3),
count=3.0000,")
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]