This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new f0017616ce0 [fix](nereids) derive column stats for 'expr and A is not
null' #37235 (#37236)
f0017616ce0 is described below
commit f0017616ce0959e86ff08aa5c32a4d30ea6c739e
Author: minghong <[email protected]>
AuthorDate: Fri Jul 5 16:59:22 2024 +0800
[fix](nereids) derive column stats for 'expr and A is not null' #37235
(#37236)
## Proposed changes
pick #37235
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/stats/FilterEstimation.java | 1 +
.../org/apache/doris/statistics/Statistics.java | 18 +++++++++++
.../doris/nereids/stats/FilterEstimationTest.java | 35 +++++++++++++++++++++-
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 33f6318808b..0ce10ec0c3c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -104,6 +104,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
Expression leftExpr = predicate.child(0);
Expression rightExpr = predicate.child(1);
Statistics leftStats = leftExpr.accept(this, context);
+ leftStats =
leftStats.normalizeByRatio(context.statistics.getRowCount());
Statistics andStats = rightExpr.accept(this,
new EstimationContext(leftStats));
if (predicate instanceof And) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 1f35d4fb033..da6bf937593 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -191,4 +191,22 @@ public class Statistics {
}
return builder.toString();
}
+
+ public Statistics normalizeByRatio(double originRowCount) {
+ if (rowCount >= originRowCount || rowCount <= 0) {
+ return this;
+ }
+ StatisticsBuilder builder = new StatisticsBuilder(this);
+ double ratio = rowCount / originRowCount;
+ for (Entry<Expression, ColumnStatistic> entry :
expressionToColumnStats.entrySet()) {
+ ColumnStatistic colStats = entry.getValue();
+ if (colStats.numNulls != 0 || colStats.ndv > rowCount) {
+ ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(colStats);
+ colStatsBuilder.setNumNulls(colStats.numNulls * ratio);
+ colStatsBuilder.setNdv(Math.min(rowCount -
colStatsBuilder.getNumNulls(), colStats.ndv));
+ builder.putColumnStatistics(entry.getKey(),
colStatsBuilder.build());
+ }
+ }
+ return builder.build();
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index da8159ef6b9..de5e4bba877 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -206,7 +206,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
- Assertions.assertEquals(51.9, expected.getRowCount(), 0.1);
+ Assertions.assertEquals(51, expected.getRowCount(), 1);
}
// a > 500 and b < 100 or a > c
@@ -1059,6 +1059,39 @@ class FilterEstimationTest {
Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
}
+ /**
+ * a = 1 and b is not null
+ */
+ @Test
+ public void testNumNullsAndTwoCol() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+ .setNdv(2)
+ .setAvgSizeByte(4)
+ .setNumNulls(0)
+ .setMaxValue(2)
+ .setMinValue(1)
+ .setCount(10);
+ IntegerLiteral int1 = new IntegerLiteral(1);
+ EqualTo equalTo = new EqualTo(a, int1);
+ SlotReference b = new SlotReference("a", IntegerType.INSTANCE);
+ ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
+ .setNdv(2)
+ .setAvgSizeByte(4)
+ .setNumNulls(8)
+ .setMaxValue(2)
+ .setMinValue(1)
+ .setCount(10);
+ Not isNotNull = new Not(new IsNull(b));
+ And and = new And(equalTo, isNotNull);
+ Statistics stats = new Statistics(10, new HashMap<>());
+ stats.addColumnStats(a, builderA.build());
+ stats.addColumnStats(b, builderB.build());
+ FilterEstimation filterEstimation = new FilterEstimation();
+ Statistics result = filterEstimation.estimate(and, stats);
+ Assertions.assertEquals(result.getRowCount(), 1.0, 0.01);
+ }
+
/**
* a >= 1 or a <= 2
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]