This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 4d69cb2ec5a [fix](statistics) Use column update rows to decide min/max
stats are valid or not (#34263)
4d69cb2ec5a is described below
commit 4d69cb2ec5a1a879cb2f86a40c141dbdcc6efee6
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Apr 30 16:18:06 2024 +0800
[fix](statistics) Use column update rows to decide min/max stats are valid
or not (#34263)
This is a following pr of #33685
After #33703 merged, need to check update rows in column level instead of
table level.
---
.../doris/nereids/stats/StatsCalculator.java | 26 ++++++++++++++++------
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 7e00b3680a0..c8deba349c2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.stats;
import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
@@ -124,6 +125,7 @@ import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.util.PlanUtils;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Histogram;
@@ -764,10 +766,10 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
Set<SlotReference> slotSet = slotSetBuilder.build();
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap =
new HashMap<>();
TableIf table = catalogRelation.getTable();
+ boolean isOlapTable = table instanceof OlapTable;
AnalysisManager analysisManager =
Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta =
analysisManager.findTableStatsStatus(table.getId());
- // rows newly updated after last analyze
- long deltaRowCount = tableMeta == null ? 0 :
tableMeta.updatedRows.get();
+ long tableUpdatedRows = tableMeta == null ? 0 :
tableMeta.updatedRows.get();
double rowCount = catalogRelation.getTable().getRowCountForNereids();
boolean hasUnknownKeyCol = false;
long idxId = -1;
@@ -777,10 +779,6 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
idxId = olapScan.getSelectedIndexId();
}
}
- if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
- LOG.debug("{} is partially analyzed, clear min/max values in
column stats",
- catalogRelation.getTable().getName());
- }
for (SlotReference slotReference : slotSet) {
boolean usedAsKey = false;
if (ConnectContext.get() != null &&
slotReference.getColumn().isPresent()
@@ -795,6 +793,13 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
if (colName == null) {
throw new RuntimeException(String.format("Invalid slot: %s",
slotReference.getExprId()));
}
+ long deltaRowCount = 0;
+ if (isOlapTable) {
+ OlapTable olapTable = (OlapTable) table;
+ ColStatsMeta colMeta = tableMeta == null ? null :
tableMeta.findColumnStatsMeta(
+ olapTable.getIndexNameById(idxId == -1 ?
olapTable.getBaseIndexId() : idxId), colName);
+ deltaRowCount = colMeta == null ? 0 : tableUpdatedRows -
colMeta.updatedRows;
+ }
ColumnStatistic cache;
if (!FeConstants.enableInternalSchemaDb
|| shouldIgnoreThisCol) {
@@ -814,13 +819,20 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
}
}
if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableStats) {
+ // deltaRowCount > 0 indicates that
+ // new data is loaded to the table after this column was
analyzed last time.
+ // In this case, need to eliminate min/max value for this
column.
if (deltaRowCount > 0) {
// clear min-max to avoid error estimation
// for example, after yesterday data loaded, user send
query about yesterday immediately.
// since yesterday data are not analyzed, the max date is
before yesterday, and hence optimizer
// estimates the filter result is zero
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("{}.{} is partially analyzed, clear min/max
values in column stats",
+ table.getName(), colName);
+ }
}
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]