This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b12e627fb2d [fix](nereids) avoid normalize ColumnStatistics.numNull 
twice (#49891)
b12e627fb2d is described below

commit b12e627fb2df701b2400b293b74df691708c7dec
Author: minghong <[email protected]>
AuthorDate: Wed Apr 23 07:21:01 2025 +0800

    [fix](nereids) avoid normalize ColumnStatistics.numNull twice (#49891)
    
    ### What problem does this PR solve?
    let's take an example: expr1 and expr2
    after deriving stats for expr1, stats is normalized, and the num_null is
    adjusted according to rowCount.
    And hence it is not suitable normalize num_null after deriving "and".
---
 .../doris/nereids/stats/FilterEstimation.java      |  2 +-
 .../doris/nereids/stats/FilterEstimationTest.java  | 34 ++++++++++++++++++++--
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 9c7fbaaa2f6..61f4bc5f203 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -119,7 +119,7 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         Preconditions.checkArgument(children.size() > 1, "and expression 
abnormal: " + and);
         for (Expression child : children) {
             outputStats = child.accept(this, new 
EstimationContext(inputStats));
-            outputStats.normalizeColumnStatistics(inputStats.getRowCount(), 
true);
+            outputStats.normalizeColumnStatistics(inputStats.getRowCount(), 
false);
             inputStats = outputStats;
         }
         return outputStats;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 2009a98fbc1..b2adb5846f9 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -1146,7 +1146,7 @@ class FilterEstimationTest {
         Statistics result = filterEstimation.estimate(and, stats);
         // result 1.0->2.0 bc happens because the calculation from 
normalization of
         // "Math.min(columnStatistic.numNulls * factor, rowCount - ndv);"
-        Assertions.assertEquals(result.getRowCount(), 3.5, 0.01);
+        Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
     }
 
     /**
@@ -1214,7 +1214,7 @@ class FilterEstimationTest {
 
         FilterEstimation filterEstimation = new FilterEstimation();
         Statistics result = filterEstimation.estimate(allAnd, stats);
-        Assertions.assertEquals(result.getRowCount(), 2109.0, 10);
+        Assertions.assertEquals(result.getRowCount(), 1809, 10);
     }
 
     /**
@@ -1520,4 +1520,34 @@ class FilterEstimationTest {
         Statistics stats = new FilterEstimation().estimate(predicate, 
statsBuilder.build());
         Assertions.assertEquals(250, stats.getRowCount());
     }
+
+    @Test
+    void testEqualAndIsNull() {
+        // avoid to normalize num-nulls twice
+        Double row = 1000.0;
+        SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+        ColumnStatisticBuilder columnStatisticBuilderA = new 
ColumnStatisticBuilder(row)
+                .setNdv(10)
+                .setAvgSizeByte(4)
+                .setNumNulls(0);
+
+        SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+        ColumnStatisticBuilder columnStatisticBuilderB = new 
ColumnStatisticBuilder(row)
+                .setNdv(10)
+                .setAvgSizeByte(4)
+                .setNumNulls(90);
+
+        StatisticsBuilder statsBuilder = new StatisticsBuilder();
+        statsBuilder.setRowCount(row);
+        statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
+        statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
+
+        Expression expr = new And(
+                new EqualTo(a, new IntegerLiteral(1)),
+                new IsNull(b)
+        );
+
+        Statistics result = new FilterEstimation().estimate(expr, 
statsBuilder.build());
+        System.out.println(result);
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to