This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new c9813c81a68 [feat](nereids) adjust stats derive by delta row (#39185)
c9813c81a68 is described below
commit c9813c81a683e498c9114c2476f4697dd0163e22
Author: minghong <[email protected]>
AuthorDate: Wed Aug 14 09:18:09 2024 +0800
[feat](nereids) adjust stats derive by delta row (#39185)
#39222
After analyzing, user may insert new rows.
analyzed rows: the rows have been analyzed
delta row: rows inserted after analyze job
if analyzed rows are filtered out, then we try to estimate filter result
by delta row with unknown column stats.
## Proposed changes
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/stats/FilterEstimation.java | 17 +++++--
.../doris/nereids/stats/StatsCalculator.java | 31 ++++++------
.../org/apache/doris/statistics/Statistics.java | 24 ++++++++-
.../apache/doris/statistics/StatisticsBuilder.java | 10 +++-
.../suites/nereids_p0/delta_row/delta_row.groovy | 57 ++++++++++++++++++++++
5 files changed, 117 insertions(+), 22 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index b4b4fa5e3f8..6f6e768caae 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -87,12 +87,21 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
/**
* This method will update the stats according to the selectivity.
*/
- public Statistics estimate(Expression expression, Statistics statistics) {
+ public Statistics estimate(Expression expression, Statistics inputStats) {
// For a comparison predicate, only when it's left side is a slot and
right side is a literal, we would
// consider is a valid predicate.
- Statistics stats = expression.accept(this, new
EstimationContext(statistics));
- stats.enforceValid();
- return stats;
+ Statistics outputStats = expression.accept(this, new
EstimationContext(inputStats));
+ if (outputStats.getRowCount() == 0 && inputStats.getDeltaRowCount() >
0) {
+ StatisticsBuilder deltaStats = new StatisticsBuilder();
+ deltaStats.setDeltaRowCount(0);
+ deltaStats.setRowCount(inputStats.getDeltaRowCount());
+ for (Expression expr : inputStats.columnStatistics().keySet()) {
+ deltaStats.putColumnStatistics(expr, ColumnStatistic.UNKNOWN);
+ }
+ outputStats = expression.accept(this, new
EstimationContext(deltaStats.build()));
+ }
+ outputStats.enforceValid();
+ return outputStats;
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index d711b99655b..2c954f9ee43 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -646,10 +646,10 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
idxId = olapScan.getSelectedIndexId();
}
}
- if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
- LOG.debug("{} is partially analyzed, clear min/max values in
column stats",
- catalogRelation.getTable().getName());
- }
+ // if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
+ // LOG.debug("{} is partially analyzed, clear min/max values in
column stats",
+ // catalogRelation.getTable().getName());
+ // }
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
@@ -676,14 +676,14 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
hasUnknownCol = true;
}
if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableStats) {
- if (deltaRowCount > 0) {
- // clear min-max to avoid error estimation
- // for example, after yesterday data loaded, user send
query about yesterday immediately.
- // since yesterday data are not analyzed, the max date is
before yesterday, and hence optimizer
- // estimates the filter result is zero
-
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
- }
+ // if (deltaRowCount > 0) {
+ // // clear min-max to avoid error estimation
+ // // for example, after yesterday data loaded, user send
query about yesterday immediately.
+ // // since yesterday data are not analyzed, the max date
is before yesterday, and hence optimizer
+ // // estimates the filter result is zero
+ //
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
+ //
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+ // }
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {
columnStatisticBuilderMap.put(slotReference, new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
@@ -693,17 +693,18 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
if (hasUnknownCol && ConnectContext.get() != null &&
ConnectContext.get().getStatementContext() != null) {
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
}
- return normalizeCatalogRelationColumnStatsRowCount(rowCount,
columnStatisticBuilderMap);
+ return normalizeCatalogRelationColumnStatsRowCount(rowCount,
columnStatisticBuilderMap, deltaRowCount);
}
private Statistics normalizeCatalogRelationColumnStatsRowCount(double
rowCount,
- Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap)
{
+ Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap,
+ double deltaRowCount) {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
for (Expression slot : columnStatisticBuilderMap.keySet()) {
columnStatisticMap.put(slot,
columnStatisticBuilderMap.get(slot).setCount(rowCount).build());
}
- return new Statistics(rowCount, columnStatisticMap);
+ return new Statistics(rowCount, columnStatisticMap, deltaRowCount);
}
private Statistics computeTopN(TopN topN) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index da6bf937593..9cec4cc18d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -37,15 +37,23 @@ public class Statistics {
// the byte size of one tuple
private double tupleSize;
+ private double deltaRowCount = 0.0;
+
public Statistics(Statistics another) {
this.rowCount = another.rowCount;
this.expressionToColumnStats = new
HashMap<>(another.expressionToColumnStats);
this.tupleSize = another.tupleSize;
+ this.deltaRowCount = another.getDeltaRowCount();
}
- public Statistics(double rowCount, Map<Expression, ColumnStatistic>
expressionToColumnStats) {
+ public Statistics(double rowCount, Map<Expression, ColumnStatistic>
expressionToColumnStats, double deltaRowCount) {
this.rowCount = rowCount;
this.expressionToColumnStats = expressionToColumnStats;
+ this.deltaRowCount = deltaRowCount;
+ }
+
+ public Statistics(double rowCount, Map<Expression, ColumnStatistic>
expressionToColumnStats) {
+ this(rowCount, expressionToColumnStats, 0);
}
public ColumnStatistic findColumnStatistics(Expression expression) {
@@ -150,7 +158,11 @@ public class Statistics {
return "-Infinite";
}
DecimalFormat format = new DecimalFormat("#,###.##");
- return format.format(rowCount);
+ String rows = format.format(rowCount);
+ if (deltaRowCount > 0) {
+ rows = rows + "(" + format.format(deltaRowCount) + ")";
+ }
+ return rows;
}
public int getBENumber() {
@@ -209,4 +221,12 @@ public class Statistics {
}
return builder.build();
}
+
+ public double getDeltaRowCount() {
+ return deltaRowCount;
+ }
+
+ public void setDeltaRowCount(double deltaRowCount) {
+ this.deltaRowCount = deltaRowCount;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
index a0e75f7df38..1e399573546 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
@@ -28,6 +28,8 @@ public class StatisticsBuilder {
private final Map<Expression, ColumnStatistic> expressionToColumnStats;
+ private double deltaRowCount = 0.0;
+
public StatisticsBuilder() {
expressionToColumnStats = new HashMap<>();
}
@@ -36,6 +38,7 @@ public class StatisticsBuilder {
this.rowCount = statistics.getRowCount();
expressionToColumnStats = new HashMap<>();
expressionToColumnStats.putAll(statistics.columnStatistics());
+ deltaRowCount = statistics.getDeltaRowCount();
}
public StatisticsBuilder setRowCount(double rowCount) {
@@ -54,7 +57,12 @@ public class StatisticsBuilder {
return this;
}
+ public StatisticsBuilder setDeltaRowCount(double deltaRowCount) {
+ this.deltaRowCount = deltaRowCount;
+ return this;
+ }
+
public Statistics build() {
- return new Statistics(rowCount, expressionToColumnStats);
+ return new Statistics(rowCount, expressionToColumnStats,
deltaRowCount);
}
}
diff --git a/regression-test/suites/nereids_p0/delta_row/delta_row.groovy
b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy
new file mode 100644
index 00000000000..1550d3f6b15
--- /dev/null
+++ b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("delta_row") {
+ String database = context.config.getDbNameByFile(context.file)
+ sql """
+ drop database if exists ${database};
+ create database ${database};
+ use ${database};
+ CREATE TABLE IF NOT EXISTS t (
+ k int(11) null comment "",
+ v string replace null comment "",
+ ) engine=olap
+ DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1");
+
+ insert into t values (1, "a"),(2, "b"),(3, 'c'),(4,'d');
+ analyze table t with sync;
+ """
+ explain {
+ sql "physical plan select * from t where k > 6"
+ contains("stats=0 ")
+ contains("stats=4,")
+ // PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
+ // +--PhysicalDistribute[117]@@1 (
distributionSpec=DistributionSpecGather, stats=0 )
+ // +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0 )
+ // +--PhysicalOlapScan[111]@0 (
qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t,
stats=4, fr=Optional[1] )
+ }
+
+ sql "set global enable_auto_analyze=false;"
+
+ sql "insert into t values (10, 'c');"
+ explain {
+ sql "physical plan select * from t where k > 6"
+ contains("stats=0.5 ")
+ contains("stats=5(1),")
+ notContains("stats=0 ")
+ notContains("stats=4,")
+ // PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
+ // +--PhysicalDistribute[117]@@1 (
distributionSpec=DistributionSpecGather, stats=0.5 )
+ // +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0.5 )
+ // +--PhysicalOlapScan[111]@0 (
qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t,
stats=5(1), fr=Optional[1] )
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]