This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 7d9a7512971 [feat](nereids) adjust min/max for partition key #41729
branch-2.1 (#45713)
7d9a7512971 is described below
commit 7d9a7512971b3ec9dc0dee68835999b123c8650a
Author: minghong <[email protected]>
AuthorDate: Sun Dec 22 11:20:01 2024 +0800
[feat](nereids) adjust min/max for partition key #41729 branch-2.1 (#45713)
### What problem does this PR solve?
pick #41729
---
.../doris/nereids/stats/StatsCalculator.java | 174 +++++++++++++++++++++
1 file changed, 174 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index d4b4022fbe7..bfe5ec4be48 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,9 +18,15 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionKey;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
@@ -136,11 +142,13 @@ import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.TableStatsMeta;
+import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -899,6 +907,155 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
}
}
+ /**
+ * Determine whether it is a partition key inside the function.
+ */
+ private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ if (olapTable.getPartitionType() == PartitionType.LIST) {
+ cache = updateMinMaxForListPartitionKey(olapTable,
selectedPartitionNames, slot, cache);
+ } else if (olapTable.getPartitionType() == PartitionType.RANGE) {
+ cache = updateMinMaxForTheFirstRangePartitionKey(olapTable,
selectedPartitionNames, slot, cache);
+ }
+ return cache;
+ }
+
+ private double convertLegacyLiteralToDouble(LiteralExpr literal) throws
org.apache.doris.common.AnalysisException {
+ return StatisticsUtil.convertToDouble(literal.getType(),
literal.getStringValue());
+ }
+
+ private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable
olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ int partitionColumnIdx =
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+ if (partitionColumnIdx != -1) {
+ try {
+ LiteralExpr minExpr = null;
+ LiteralExpr maxExpr = null;
+ double minValue = 0;
+ double maxValue = 0;
+ for (String selectedPartitionName : selectedPartitionNames) {
+ PartitionItem item =
olapTable.getPartitionItemOrAnalysisException(
+ selectedPartitionName);
+ if (item instanceof ListPartitionItem) {
+ ListPartitionItem lp = (ListPartitionItem) item;
+ for (PartitionKey key : lp.getItems()) {
+ if (minExpr == null) {
+ minExpr =
key.getKeys().get(partitionColumnIdx);
+ minValue =
convertLegacyLiteralToDouble(minExpr);
+ maxExpr =
key.getKeys().get(partitionColumnIdx);
+ maxValue =
convertLegacyLiteralToDouble(maxExpr);
+ } else {
+ double current =
convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
+ if (current > maxValue) {
+ maxValue = current;
+ maxExpr =
key.getKeys().get(partitionColumnIdx);
+ } else if (current < minValue) {
+ minValue = current;
+ minExpr =
key.getKeys().get(partitionColumnIdx);
+ }
+ }
+ }
+ }
+ }
+ if (minExpr != null) {
+ cache = updateMinMax(cache, minValue, minExpr, maxValue,
maxExpr);
+ }
+ } catch (org.apache.doris.common.AnalysisException e) {
+ LOG.debug(e.getMessage());
+ }
+ }
+ return cache;
+ }
+
+ private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable
olapTable,
+ List<String> selectedPartitionNames,
+ SlotReference slot, ColumnStatistic cache) {
+ int partitionColumnIdx =
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+ // for multi partition keys, only the first partition key need to
adjust min/max
+ if (partitionColumnIdx == 0) {
+ // update partition column min/max by partition info
+ try {
+ LiteralExpr minExpr = null;
+ LiteralExpr maxExpr = null;
+ double minValue = 0;
+ double maxValue = 0;
+ for (String selectedPartitionName : selectedPartitionNames) {
+ PartitionItem item =
olapTable.getPartitionItemOrAnalysisException(
+ selectedPartitionName);
+ if (item instanceof RangePartitionItem) {
+ RangePartitionItem ri = (RangePartitionItem) item;
+ Range<PartitionKey> range = ri.getItems();
+ PartitionKey upper = range.upperEndpoint();
+ PartitionKey lower = range.lowerEndpoint();
+ if (maxExpr == null) {
+ maxExpr = upper.getKeys().get(partitionColumnIdx);
+ maxValue = convertLegacyLiteralToDouble(maxExpr);
+ minExpr = lower.getKeys().get(partitionColumnIdx);
+ minValue = convertLegacyLiteralToDouble(minExpr);
+ } else {
+ double currentValue =
convertLegacyLiteralToDouble(upper.getKeys()
+ .get(partitionColumnIdx));
+ if (currentValue > maxValue) {
+ maxValue = currentValue;
+ maxExpr =
upper.getKeys().get(partitionColumnIdx);
+ }
+ currentValue =
convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
+ if (currentValue < minValue) {
+ minValue = currentValue;
+ minExpr =
lower.getKeys().get(partitionColumnIdx);
+ }
+ }
+ }
+ }
+ if (minExpr != null) {
+ cache = updateMinMax(cache, minValue, minExpr, maxValue,
maxExpr);
+ }
+ } catch (org.apache.doris.common.AnalysisException e) {
+ LOG.debug(e.getMessage());
+ }
+ }
+ return cache;
+ }
+
+ private ColumnStatistic updateMinMax(ColumnStatistic cache, double
minValue, LiteralExpr minExpr,
+ double maxValue, LiteralExpr maxExpr) {
+ boolean shouldUpdateCache = false;
+ if (!cache.isUnKnown) {
+ // merge the min/max with cache.
+ // example: min/max range in cache is [10-20]
+ // range from partition def is [15-30]
+ // the final range is [15-20]
+ if (cache.minValue > minValue) {
+ minValue = cache.minValue;
+ minExpr = cache.minExpr;
+ } else {
+ shouldUpdateCache = true;
+ }
+ if (cache.maxValue < maxValue) {
+ maxValue = cache.maxValue;
+ maxExpr = cache.maxExpr;
+ } else {
+ shouldUpdateCache = true;
+ }
+ // if min/max is invalid, do not update cache
+ if (minValue > maxValue) {
+ shouldUpdateCache = false;
+ }
+ }
+
+ if (shouldUpdateCache) {
+ cache = new ColumnStatisticBuilder(cache)
+ .setMinExpr(minExpr)
+ .setMinValue(minValue)
+ .setMaxExpr(maxExpr)
+ .setMaxValue(maxValue)
+ .build();
+ }
+ return cache;
+ }
+
// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger,
StatisticManager doesn't support it now.
@@ -944,6 +1101,18 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
LOG.debug("{} is partially analyzed, clear min/max values in
column stats",
catalogRelation.getTable().getName());
}
+ List<String> selectedPartitionNames = null;
+ if (catalogRelation instanceof OlapScan) {
+ OlapScan olapScan = (OlapScan) catalogRelation;
+ if (olapScan.getSelectedPartitionIds().size() <
olapScan.getTable().getPartitionNum()) {
+ // partition pruned
+ // try to use selected partition stats, if failed, fall back
to table stats
+ selectedPartitionNames = new
ArrayList<>(olapScan.getSelectedPartitionIds().size());
+ for (Long id : olapScan.getSelectedPartitionIds()) {
+
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
+ }
+ }
+ }
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
@@ -959,6 +1128,11 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
cache = ColumnStatistic.UNKNOWN;
} else {
cache = getColumnStatistic(table, colName, idxId);
+ if (table instanceof OlapTable &&
slotReference.getColumn().isPresent()
+ && selectedPartitionNames != null) {
+ cache = updateMinMaxForPartitionKey((OlapTable) table,
+ selectedPartitionNames, slotReference, cache);
+ }
}
ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache);
colStatsBuilder.normalizeAvgSizeByte(slotReference);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]