This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b12e627fb2d [fix](nereids) avoid normalize ColumnStatistics.numNull
twice (#49891)
b12e627fb2d is described below
commit b12e627fb2df701b2400b293b74df691708c7dec
Author: minghong <[email protected]>
AuthorDate: Wed Apr 23 07:21:01 2025 +0800
[fix](nereids) avoid normalize ColumnStatistics.numNull twice (#49891)
### What problem does this PR solve?
let's take an example: expr1 and expr2
after deriving stats for expr1, stats is normalized, and the num_null is
adjusted according to rowCount.
And hence it is not suitable normalize num_null after deriving "and".
---
.../doris/nereids/stats/FilterEstimation.java | 2 +-
.../doris/nereids/stats/FilterEstimationTest.java | 34 ++++++++++++++++++++--
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 9c7fbaaa2f6..61f4bc5f203 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -119,7 +119,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
Preconditions.checkArgument(children.size() > 1, "and expression
abnormal: " + and);
for (Expression child : children) {
outputStats = child.accept(this, new
EstimationContext(inputStats));
- outputStats.normalizeColumnStatistics(inputStats.getRowCount(),
true);
+ outputStats.normalizeColumnStatistics(inputStats.getRowCount(),
false);
inputStats = outputStats;
}
return outputStats;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 2009a98fbc1..b2adb5846f9 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -1146,7 +1146,7 @@ class FilterEstimationTest {
Statistics result = filterEstimation.estimate(and, stats);
// result 1.0->2.0 bc happens because the calculation from
normalization of
// "Math.min(columnStatistic.numNulls * factor, rowCount - ndv);"
- Assertions.assertEquals(result.getRowCount(), 3.5, 0.01);
+ Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
}
/**
@@ -1214,7 +1214,7 @@ class FilterEstimationTest {
FilterEstimation filterEstimation = new FilterEstimation();
Statistics result = filterEstimation.estimate(allAnd, stats);
- Assertions.assertEquals(result.getRowCount(), 2109.0, 10);
+ Assertions.assertEquals(result.getRowCount(), 1809, 10);
}
/**
@@ -1520,4 +1520,34 @@ class FilterEstimationTest {
Statistics stats = new FilterEstimation().estimate(predicate,
statsBuilder.build());
Assertions.assertEquals(250, stats.getRowCount());
}
+
+ @Test
+ void testEqualAndIsNull() {
+ // avoid to normalize num-nulls twice
+ Double row = 1000.0;
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ ColumnStatisticBuilder columnStatisticBuilderA = new
ColumnStatisticBuilder(row)
+ .setNdv(10)
+ .setAvgSizeByte(4)
+ .setNumNulls(0);
+
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ ColumnStatisticBuilder columnStatisticBuilderB = new
ColumnStatisticBuilder(row)
+ .setNdv(10)
+ .setAvgSizeByte(4)
+ .setNumNulls(90);
+
+ StatisticsBuilder statsBuilder = new StatisticsBuilder();
+ statsBuilder.setRowCount(row);
+ statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
+ statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
+
+ Expression expr = new And(
+ new EqualTo(a, new IntegerLiteral(1)),
+ new IsNull(b)
+ );
+
+ Statistics result = new FilterEstimation().estimate(expr,
statsBuilder.build());
+ System.out.println(result);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]