This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 3dc6ada26a1 [feat](nereids) support nullSafeEqual estimation #31616 
(#31924)
3dc6ada26a1 is described below

commit 3dc6ada26a15e3e3a8859a07db40e5442807e7ac
Author: minghong <[email protected]>
AuthorDate: Thu Mar 7 20:02:09 2024 +0800

    [feat](nereids) support nullSafeEqual estimation #31616 (#31924)
---
 .../doris/nereids/stats/FilterEstimation.java      | 15 ++++++---
 .../org/apache/doris/statistics/Statistics.java    |  6 +++-
 .../doris/nereids/stats/FilterEstimationTest.java  | 36 ++++++++++++++++++++++
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 86f955eba3f..c56a7d9f5bc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.LessThan;
 import org.apache.doris.nereids.trees.expressions.LessThanEqual;
 import org.apache.doris.nereids.trees.expressions.Like;
 import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
@@ -239,7 +240,8 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         Expression left = cp.left();
         Expression right = cp.right();
         if (cp instanceof EqualPredicate) {
-            return estimateColumnEqualToColumn(left, statsForLeft, right, 
statsForRight, context);
+            return estimateColumnEqualToColumn(left, statsForLeft, right, 
statsForRight,
+                    cp instanceof NullSafeEqual, context);
         }
         if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
             return estimateColumnLessThanColumn(right, statsForRight, left, 
statsForLeft, context);
@@ -488,7 +490,7 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
     }
 
     private Statistics estimateColumnEqualToColumn(Expression leftExpr, 
ColumnStatistic leftStats,
-            Expression rightExpr, ColumnStatistic rightStats, 
EstimationContext context) {
+            Expression rightExpr, ColumnStatistic rightStats, boolean 
keepNull, EstimationContext context) {
         StatisticRange leftRange = StatisticRange.from(leftStats, 
leftExpr.getDataType());
         StatisticRange rightRange = StatisticRange.from(rightStats, 
rightExpr.getDataType());
         StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
@@ -497,11 +499,16 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         intersectBuilder.setNdv(intersect.getDistinctValues());
         intersectBuilder.setMinValue(intersect.getLow());
         intersectBuilder.setMaxValue(intersect.getHigh());
-        intersectBuilder.setNumNulls(0);
+        double numNull = 0;
+        if (keepNull) {
+            numNull = Math.min(leftStats.numNulls, rightStats.numNulls);
+        }
+        intersectBuilder.setNumNulls(numNull);
         double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, 
rightStats.ndv));
-        Statistics updatedStatistics = context.statistics.withSel(sel);
+        Statistics updatedStatistics = context.statistics.withSel(sel, 
numNull);
         updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
         updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());
+
         context.addKeyIfSlot(leftExpr);
         context.addKeyIfSlot(rightExpr);
         return updatedStatistics;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 5ed440f1bc6..1f35d4fb033 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -94,11 +94,15 @@ public class Statistics {
     }
 
     public Statistics withSel(double sel) {
+        return withSel(sel, 0);
+    }
+
+    public Statistics withSel(double sel, double numNull) {
         sel = StatsMathUtil.minNonNaN(sel, 1);
         if (Double.isNaN(rowCount)) {
             return this;
         }
-        double newCount = rowCount * sel;
+        double newCount = rowCount * sel + numNull;
         return new Statistics(newCount, new 
HashMap<>(expressionToColumnStats));
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 9b37f1119a4..eca15eea364 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -29,6 +29,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.LessThan;
 import org.apache.doris.nereids.trees.expressions.LessThanEqual;
 import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
@@ -40,6 +41,7 @@ import org.apache.doris.nereids.types.IntegerType;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Statistics;
+import org.apache.doris.statistics.StatisticsBuilder;
 
 import com.google.common.collect.Lists;
 import org.apache.commons.math3.util.Precision;
@@ -1103,4 +1105,38 @@ class FilterEstimationTest {
         Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
     }
 
+    @Test
+    public void testNullSafeEqual() {
+        ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder()
+                .setNdv(2)
+                .setAvgSizeByte(4)
+                .setNumNulls(8)
+                .setMaxValue(2)
+                .setMinValue(1)
+                .setCount(10);
+        ColumnStatistic aStats = columnStatisticBuilder.build();
+        SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+
+        columnStatisticBuilder.setNdv(2)
+                .setAvgSizeByte(4)
+                .setNumNulls(7)
+                .setMaxValue(2)
+                .setMinValue(1)
+                .setCount(10);
+        ColumnStatistic bStats = columnStatisticBuilder.build();
+        SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+
+        StatisticsBuilder statsBuilder = new StatisticsBuilder();
+        statsBuilder.setRowCount(100);
+        statsBuilder.putColumnStatistics(a, aStats);
+        statsBuilder.putColumnStatistics(b, bStats);
+
+        NullSafeEqual nse = new NullSafeEqual(a, b);
+        FilterEstimation estimator = new FilterEstimation();
+        Statistics resultNse = estimator.estimate(nse, statsBuilder.build());
+
+        EqualTo eq = new EqualTo(a, b);
+        Statistics resultEq = estimator.estimate(eq, statsBuilder.build());
+        Assertions.assertEquals(7, resultNse.getRowCount() - 
resultEq.getRowCount());
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to