This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new f0017616ce0 [fix](nereids) derive column stats for 'expr and A is not 
null' #37235 (#37236)
f0017616ce0 is described below

commit f0017616ce0959e86ff08aa5c32a4d30ea6c739e
Author: minghong <[email protected]>
AuthorDate: Fri Jul 5 16:59:22 2024 +0800

    [fix](nereids) derive column stats for 'expr and A is not null' #37235 
(#37236)
    
    ## Proposed changes
    pick #37235
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/stats/FilterEstimation.java      |  1 +
 .../org/apache/doris/statistics/Statistics.java    | 18 +++++++++++
 .../doris/nereids/stats/FilterEstimationTest.java  | 35 +++++++++++++++++++++-
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 33f6318808b..0ce10ec0c3c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -104,6 +104,7 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         Expression leftExpr = predicate.child(0);
         Expression rightExpr = predicate.child(1);
         Statistics leftStats = leftExpr.accept(this, context);
+        leftStats = 
leftStats.normalizeByRatio(context.statistics.getRowCount());
         Statistics andStats = rightExpr.accept(this,
                 new EstimationContext(leftStats));
         if (predicate instanceof And) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 1f35d4fb033..da6bf937593 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -191,4 +191,22 @@ public class Statistics {
         }
         return builder.toString();
     }
+
+    public Statistics normalizeByRatio(double originRowCount) {
+        if (rowCount >= originRowCount || rowCount <= 0) {
+            return this;
+        }
+        StatisticsBuilder builder = new StatisticsBuilder(this);
+        double ratio = rowCount / originRowCount;
+        for (Entry<Expression, ColumnStatistic> entry : 
expressionToColumnStats.entrySet()) {
+            ColumnStatistic colStats = entry.getValue();
+            if (colStats.numNulls != 0 || colStats.ndv > rowCount) {
+                ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(colStats);
+                colStatsBuilder.setNumNulls(colStats.numNulls * ratio);
+                colStatsBuilder.setNdv(Math.min(rowCount - 
colStatsBuilder.getNumNulls(), colStats.ndv));
+                builder.putColumnStatistics(entry.getKey(), 
colStatsBuilder.build());
+            }
+        }
+        return builder.build();
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index da8159ef6b9..de5e4bba877 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -206,7 +206,7 @@ class FilterEstimationTest {
         Statistics stat = new Statistics(1000, slotToColumnStat);
         FilterEstimation filterEstimation = new FilterEstimation();
         Statistics expected = filterEstimation.estimate(or, stat);
-        Assertions.assertEquals(51.9, expected.getRowCount(), 0.1);
+        Assertions.assertEquals(51, expected.getRowCount(), 1);
     }
 
     // a > 500 and b < 100 or a > c
@@ -1059,6 +1059,39 @@ class FilterEstimationTest {
         Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
     }
 
+    /**
+     * a = 1 and b is not null
+     */
+    @Test
+    public void testNumNullsAndTwoCol() {
+        SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+        ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+                .setNdv(2)
+                .setAvgSizeByte(4)
+                .setNumNulls(0)
+                .setMaxValue(2)
+                .setMinValue(1)
+                .setCount(10);
+        IntegerLiteral int1 = new IntegerLiteral(1);
+        EqualTo equalTo = new EqualTo(a, int1);
+        SlotReference b = new SlotReference("a", IntegerType.INSTANCE);
+        ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
+                .setNdv(2)
+                .setAvgSizeByte(4)
+                .setNumNulls(8)
+                .setMaxValue(2)
+                .setMinValue(1)
+                .setCount(10);
+        Not isNotNull = new Not(new IsNull(b));
+        And and = new And(equalTo, isNotNull);
+        Statistics stats = new Statistics(10, new HashMap<>());
+        stats.addColumnStats(a, builderA.build());
+        stats.addColumnStats(b, builderB.build());
+        FilterEstimation filterEstimation = new FilterEstimation();
+        Statistics result = filterEstimation.estimate(and, stats);
+        Assertions.assertEquals(result.getRowCount(), 1.0, 0.01);
+    }
+
     /**
      * a >= 1 or a <= 2
      */


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to