This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 3dc6ada26a1 [feat](nereids) support nullSafeEqual estimation #31616
(#31924)
3dc6ada26a1 is described below
commit 3dc6ada26a15e3e3a8859a07db40e5442807e7ac
Author: minghong <[email protected]>
AuthorDate: Thu Mar 7 20:02:09 2024 +0800
[feat](nereids) support nullSafeEqual estimation #31616 (#31924)
---
.../doris/nereids/stats/FilterEstimation.java | 15 ++++++---
.../org/apache/doris/statistics/Statistics.java | 6 +++-
.../doris/nereids/stats/FilterEstimationTest.java | 36 ++++++++++++++++++++++
3 files changed, 52 insertions(+), 5 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 86f955eba3f..c56a7d9f5bc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
@@ -239,7 +240,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
Expression left = cp.left();
Expression right = cp.right();
if (cp instanceof EqualPredicate) {
- return estimateColumnEqualToColumn(left, statsForLeft, right,
statsForRight, context);
+ return estimateColumnEqualToColumn(left, statsForLeft, right,
statsForRight,
+ cp instanceof NullSafeEqual, context);
}
if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
return estimateColumnLessThanColumn(right, statsForRight, left,
statsForLeft, context);
@@ -488,7 +490,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
}
private Statistics estimateColumnEqualToColumn(Expression leftExpr,
ColumnStatistic leftStats,
- Expression rightExpr, ColumnStatistic rightStats,
EstimationContext context) {
+ Expression rightExpr, ColumnStatistic rightStats, boolean
keepNull, EstimationContext context) {
StatisticRange leftRange = StatisticRange.from(leftStats,
leftExpr.getDataType());
StatisticRange rightRange = StatisticRange.from(rightStats,
rightExpr.getDataType());
StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
@@ -497,11 +499,16 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
intersectBuilder.setNdv(intersect.getDistinctValues());
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
- intersectBuilder.setNumNulls(0);
+ double numNull = 0;
+ if (keepNull) {
+ numNull = Math.min(leftStats.numNulls, rightStats.numNulls);
+ }
+ intersectBuilder.setNumNulls(numNull);
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv,
rightStats.ndv));
- Statistics updatedStatistics = context.statistics.withSel(sel);
+ Statistics updatedStatistics = context.statistics.withSel(sel,
numNull);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());
+
context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return updatedStatistics;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 5ed440f1bc6..1f35d4fb033 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -94,11 +94,15 @@ public class Statistics {
}
public Statistics withSel(double sel) {
+ return withSel(sel, 0);
+ }
+
+ public Statistics withSel(double sel, double numNull) {
sel = StatsMathUtil.minNonNaN(sel, 1);
if (Double.isNaN(rowCount)) {
return this;
}
- double newCount = rowCount * sel;
+ double newCount = rowCount * sel + numNull;
return new Statistics(newCount, new
HashMap<>(expressionToColumnStats));
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 9b37f1119a4..eca15eea364 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -29,6 +29,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
@@ -40,6 +41,7 @@ import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;
+import org.apache.doris.statistics.StatisticsBuilder;
import com.google.common.collect.Lists;
import org.apache.commons.math3.util.Precision;
@@ -1103,4 +1105,38 @@ class FilterEstimationTest {
Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
}
+ @Test
+ public void testNullSafeEqual() {
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder()
+ .setNdv(2)
+ .setAvgSizeByte(4)
+ .setNumNulls(8)
+ .setMaxValue(2)
+ .setMinValue(1)
+ .setCount(10);
+ ColumnStatistic aStats = columnStatisticBuilder.build();
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+
+ columnStatisticBuilder.setNdv(2)
+ .setAvgSizeByte(4)
+ .setNumNulls(7)
+ .setMaxValue(2)
+ .setMinValue(1)
+ .setCount(10);
+ ColumnStatistic bStats = columnStatisticBuilder.build();
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+
+ StatisticsBuilder statsBuilder = new StatisticsBuilder();
+ statsBuilder.setRowCount(100);
+ statsBuilder.putColumnStatistics(a, aStats);
+ statsBuilder.putColumnStatistics(b, bStats);
+
+ NullSafeEqual nse = new NullSafeEqual(a, b);
+ FilterEstimation estimator = new FilterEstimation();
+ Statistics resultNse = estimator.estimate(nse, statsBuilder.build());
+
+ EqualTo eq = new EqualTo(a, b);
+ Statistics resultEq = estimator.estimate(eq, statsBuilder.build());
+ Assertions.assertEquals(7, resultNse.getRowCount() -
resultEq.getRowCount());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]