This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 948861b4828 [feat](nereids) adjust min/max for partition key (#41729)
948861b4828 is described below

commit 948861b4828c36adc9082cebf6072dbff4db06fd
Author: minghong <[email protected]>
AuthorDate: Mon Oct 21 11:51:48 2024 +0800

    [feat](nereids) adjust min/max for partition key (#41729)
    
    ## Proposed changes
    after partition prune, adjust the min/max of partition key
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/stats/StatsCalculator.java       | 131 +++++++++++++++++++++
 .../doris/regression/action/ExplainAction.groovy   |  17 +++
 .../nereids_p0/stats/partition_key_minmax.groovy   |  67 +++++++++++
 3 files changed, 215 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 3c70d4cd518..e9d2d0a51d9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,11 +18,18 @@
 package org.apache.doris.nereids.stats;
 
 import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.ListPartitionItem;
 import org.apache.doris.catalog.MTMV;
 import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionKey;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.RangePartitionItem;
 import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.CascadesContext;
@@ -141,11 +148,13 @@ import org.apache.doris.statistics.StatisticRange;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
 import org.apache.doris.statistics.TableStatsMeta;
+import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -483,6 +492,9 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 });
                 for (SlotReference slot : visibleOutputSlots) {
                     ColumnStatistic cache = 
getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
+                    if (slot.getColumn().isPresent()) {
+                        cache = updateMinMaxForPartitionKey(olapTable, 
selectedPartitionNames, slot, cache);
+                    }
                     ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(cache,
                             selectedPartitionsRowCount);
                     colStatsBuilder.normalizeAvgSizeByte(slot);
@@ -508,6 +520,125 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         return builder.build();
     }
 
+    private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        if (olapTable.getPartitionType() == PartitionType.LIST) {
+            cache = updateMinMaxForListPartitionKey(olapTable, 
selectedPartitionNames, slot, cache);
+        } else if (olapTable.getPartitionType() == PartitionType.RANGE) {
+            cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, 
selectedPartitionNames, slot, cache);
+        }
+        return cache;
+    }
+
+    private double convertLegacyLiteralToDouble(LiteralExpr literal) throws 
AnalysisException {
+        return StatisticsUtil.convertToDouble(literal.getType(), 
literal.getStringValue());
+    }
+
+    private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable 
olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        int partitionColumnIdx = 
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+        if (partitionColumnIdx != -1) {
+            try {
+                LiteralExpr minExpr = null;
+                LiteralExpr maxExpr = null;
+                double minValue = 0;
+                double maxValue = 0;
+                for (String selectedPartitionName : selectedPartitionNames) {
+                    PartitionItem item = 
olapTable.getPartitionItemOrAnalysisException(
+                            selectedPartitionName);
+                    if (item instanceof ListPartitionItem) {
+                        ListPartitionItem lp = (ListPartitionItem) item;
+                        for (PartitionKey key : lp.getItems()) {
+                            if (minExpr == null) {
+                                minExpr = 
key.getKeys().get(partitionColumnIdx);
+                                minValue = 
convertLegacyLiteralToDouble(minExpr);
+                                maxExpr = 
key.getKeys().get(partitionColumnIdx);
+                                maxValue = 
convertLegacyLiteralToDouble(maxExpr);
+                            } else {
+                                double current = 
convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
+                                if (current > maxValue) {
+                                    maxValue = current;
+                                    maxExpr = 
key.getKeys().get(partitionColumnIdx);
+                                } else if (current < minValue) {
+                                    minValue = current;
+                                    minExpr = 
key.getKeys().get(partitionColumnIdx);
+                                }
+                            }
+                        }
+                    }
+                }
+                if (minExpr != null) {
+                    cache = new ColumnStatisticBuilder(cache)
+                            .setMinExpr(minExpr)
+                            .setMinValue(minValue)
+                            .setMaxExpr(maxExpr)
+                            .setMaxValue(maxValue)
+                            .build();
+                }
+            } catch (AnalysisException e) {
+                LOG.debug(e.getMessage());
+            }
+        }
+        return cache;
+    }
+
+    private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable 
olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        int partitionColumnIdx = 
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+        // for multi partition keys, only the first partition key need to 
adjust min/max
+        if (partitionColumnIdx == 0) {
+            // update partition column min/max by partition info
+            try {
+                LiteralExpr minExpr = null;
+                LiteralExpr maxExpr = null;
+                double minValue = 0;
+                double maxValue = 0;
+                for (String selectedPartitionName : selectedPartitionNames) {
+                    PartitionItem item = 
olapTable.getPartitionItemOrAnalysisException(
+                            selectedPartitionName);
+                    if (item instanceof RangePartitionItem) {
+                        RangePartitionItem ri = (RangePartitionItem) item;
+                        Range<PartitionKey> range = ri.getItems();
+                        PartitionKey upper = range.upperEndpoint();
+                        PartitionKey lower = range.lowerEndpoint();
+                        if (maxExpr == null) {
+                            maxExpr = upper.getKeys().get(partitionColumnIdx);
+                            maxValue = convertLegacyLiteralToDouble(maxExpr);
+                            minExpr = lower.getKeys().get(partitionColumnIdx);
+                            minValue = convertLegacyLiteralToDouble(minExpr);
+                        } else {
+                            double currentValue = 
convertLegacyLiteralToDouble(upper.getKeys()
+                                    .get(partitionColumnIdx));
+                            if (currentValue > maxValue) {
+                                maxValue = currentValue;
+                                maxExpr = 
upper.getKeys().get(partitionColumnIdx);
+                            }
+                            currentValue = 
convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
+                            if (currentValue < minValue) {
+                                minValue = currentValue;
+                                minExpr = 
lower.getKeys().get(partitionColumnIdx);
+                            }
+                        }
+                    }
+                }
+                if (minExpr != null) {
+                    cache = new ColumnStatisticBuilder(cache)
+                            .setMinExpr(minExpr)
+                            .setMinValue(minValue)
+                            .setMaxExpr(maxExpr)
+                            .setMaxValue(maxValue)
+                            .build();
+                }
+            } catch (AnalysisException e) {
+                LOG.debug(e.getMessage());
+            }
+        }
+        return cache;
+    }
+
     @Override
     public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void 
context) {
         return computeOlapScan(olapScan);
diff --git 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
index f7a28c85acd..9fc19c91a4a 100644
--- 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
+++ 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/ExplainAction.groovy
@@ -31,6 +31,7 @@ class ExplainAction implements SuiteAction {
     private boolean verbose = false
     private SuiteContext context
     private Set<String> containsStrings = new LinkedHashSet<>()
+    private Set<String> containsAnyStrings = new LinkedHashSet<>()
     private Set<String> notContainsStrings = new LinkedHashSet<>()
     private Map<String, Integer> multiContainsStrings = new HashMap<>()
     private String coonType
@@ -57,6 +58,10 @@ class ExplainAction implements SuiteAction {
         containsStrings.add(subString)
     }
 
+    void containsAny(String subString) {
+        containsAnyStrings.add(subString)
+    }
+
     void multiContains(String subString, int n) {
         multiContainsStrings.put(subString, n);
     }
@@ -124,6 +129,18 @@ class ExplainAction implements SuiteAction {
                     throw t
                 }
             }
+            boolean any = false;
+            for (String string : containsAnyStrings) {
+                if (explainString.contains(string)) {
+                    any = true;
+                }
+            }
+            if (!containsAnyStrings.isEmpty() && !any) {
+                    String msg = ("Explain and check failed, expect contains 
any '${containsAnyStrings}',"
+                            + " but actual explain string 
is:\n${explainString}").toString()
+                    def t = new IllegalStateException(msg)
+                    throw t
+            }
         }
     }
 
diff --git 
a/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy 
b/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy
new file mode 100644
index 00000000000..936df92102e
--- /dev/null
+++ b/regression-test/suites/nereids_p0/stats/partition_key_minmax.groovy
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("partition_key_minmax") {
+    sql """
+        drop table if exists rangetable;
+        create table rangetable (a int,
+        b int,
+        c int)
+        partition by range (a, b)
+        (partition p1 values [("1", "2"), ("10", "20")),
+        partition p2 values [("20", "100"), ("30", "200")),
+        partition p3 values [("300", "-1"), ("400", "1000")) 
+        )
+        distributed by hash(a) properties("replication_num"="1");
+
+        insert into rangetable values (5, 3, 0), (22, 150, 1), (333, 1, 2),(6, 
1, 3);
+
+        analyze table rangetable with sync;
+    """
+    explain {
+        sql """memo plan
+            select * from rangetable where a < 250;
+            """
+        containsAny("a#0 -> ndv=3.0000, min=1.000000(1), max=30.000000(30), 
count=3.0000")
+        containsAny("a#0 -> ndv=4.0000, min=5.000000(5), max=333.000000(333), 
count=4.0000")
+    }
+
+    sql """
+    drop table if exists listtable;
+    create table listtable(id int, city varchar(20), value int)
+    PARTITION BY LIST(id, city)
+    (
+        PARTITION p1_city VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
+        PARTITION p2_city VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
+        PARTITION p3_city VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
+    )
+    distributed by hash(id) properties("replication_num"="1");
+
+    insert into listtable values (1, "Beijing", 0), (2, "Beijing", 0), (3, 
"Beijing", 0);
+
+    analyze table listtable with sync;
+    """
+
+    explain {
+        sql """
+         memo plan select * from listtable where id >=3;
+        """
+        containsAny("id#0 -> ndv=1.0000, min=3.000000(3), max=3.000000(3), 
count=1.0000,")
+        containsAny("id#0 -> ndv=3.0000, min=1.000000(1), max=3.000000(3), 
count=3.0000,")
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to