This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d69cb2ec5a [fix](statistics) Use column update rows to decide min/max 
stats are valid or not (#34263)
4d69cb2ec5a is described below

commit 4d69cb2ec5a1a879cb2f86a40c141dbdcc6efee6
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Apr 30 16:18:06 2024 +0800

    [fix](statistics) Use column update rows to decide min/max stats are valid 
or not (#34263)
    
    This is a following pr of #33685
    After #33703 merged, need to check update rows in column level instead of 
table level.
---
 .../doris/nereids/stats/StatsCalculator.java       | 26 ++++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 7e00b3680a0..c8deba349c2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.stats;
 
 import org.apache.doris.analysis.IntLiteral;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
@@ -124,6 +125,7 @@ import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.util.PlanUtils;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.ColStatsMeta;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Histogram;
@@ -764,10 +766,10 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         Set<SlotReference> slotSet = slotSetBuilder.build();
         Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = 
new HashMap<>();
         TableIf table = catalogRelation.getTable();
+        boolean isOlapTable = table instanceof OlapTable;
         AnalysisManager analysisManager = 
Env.getCurrentEnv().getAnalysisManager();
         TableStatsMeta tableMeta = 
analysisManager.findTableStatsStatus(table.getId());
-        // rows newly updated after last analyze
-        long deltaRowCount = tableMeta == null ? 0 : 
tableMeta.updatedRows.get();
+        long tableUpdatedRows = tableMeta == null ? 0 : 
tableMeta.updatedRows.get();
         double rowCount = catalogRelation.getTable().getRowCountForNereids();
         boolean hasUnknownKeyCol = false;
         long idxId = -1;
@@ -777,10 +779,6 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 idxId = olapScan.getSelectedIndexId();
             }
         }
-        if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
-            LOG.debug("{} is partially analyzed, clear min/max values in 
column stats",
-                    catalogRelation.getTable().getName());
-        }
         for (SlotReference slotReference : slotSet) {
             boolean usedAsKey = false;
             if (ConnectContext.get() != null && 
slotReference.getColumn().isPresent()
@@ -795,6 +793,13 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
             if (colName == null) {
                 throw new RuntimeException(String.format("Invalid slot: %s", 
slotReference.getExprId()));
             }
+            long deltaRowCount = 0;
+            if (isOlapTable) {
+                OlapTable olapTable = (OlapTable) table;
+                ColStatsMeta colMeta = tableMeta == null ? null : 
tableMeta.findColumnStatsMeta(
+                        olapTable.getIndexNameById(idxId == -1 ? 
olapTable.getBaseIndexId() : idxId), colName);
+                deltaRowCount = colMeta == null ? 0 : tableUpdatedRows - 
colMeta.updatedRows;
+            }
             ColumnStatistic cache;
             if (!FeConstants.enableInternalSchemaDb
                     || shouldIgnoreThisCol) {
@@ -814,13 +819,20 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 }
             }
             if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableStats) {
+                // deltaRowCount > 0 indicates that
+                // new data is loaded to the table after this column was 
analyzed last time.
+                // In this case, need to eliminate min/max value for this 
column.
                 if (deltaRowCount > 0) {
                     // clear min-max to avoid error estimation
                     // for example, after yesterday data loaded, user send 
query about yesterday immediately.
                     // since yesterday data are not analyzed, the max date is 
before yesterday, and hence optimizer
                     // estimates the filter result is zero
                     
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-                            
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+                        
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("{}.{} is partially analyzed, clear min/max 
values in column stats",
+                                table.getName(), colName);
+                    }
                 }
                 columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
             } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to