This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6f6054d37eb [fix](nereids)filter estimation for slot=unknown (#39592)
6f6054d37eb is described below
commit 6f6054d37eb7f7d3123b69c96761e6610325a215
Author: minghong <[email protected]>
AuthorDate: Mon Aug 26 11:39:45 2024 +0800
[fix](nereids)filter estimation for slot=unknown (#39592)
## Proposed changes
detect new pattern: slot=unknown
suppose slot.ndv = 5, slot.row=100
expect filter result row is 20, but in master, the result is 0
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/stats/FilterEstimation.java | 26 ++++++--
.../doris/nereids/stats/FilterEstimationTest.java | 75 ++++++++++++++++++++++
2 files changed, 95 insertions(+), 6 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index d44e6198170..5569580d29b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -331,14 +331,28 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
ColumnStatistic statsForRight,
EstimationContext context) {
double selectivity;
- double ndv = statsForLeft.ndv;
- double val = statsForRight.maxValue;
- if (val > statsForLeft.maxValue || val < statsForLeft.minValue) {
- selectivity = 0.0;
+ if (statsForLeft.isUnKnown) {
+ selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
} else {
- selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
+ double ndv = statsForLeft.ndv;
+ if (statsForRight.isUnKnown) {
+ if (ndv >= 1.0) {
+ selectivity = 1.0 / ndv;
+ } else {
+ selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
+ }
+ } else {
+ double val = statsForRight.maxValue;
+ if (val > statsForLeft.maxValue || val <
statsForLeft.minValue) {
+ selectivity = 0.0;
+ } else if (ndv >= 1.0) {
+ selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
+ } else {
+ selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
+ }
+ selectivity = getNotNullSelectivity(statsForLeft, selectivity);
+ }
}
- selectivity = getNotNullSelectivity(statsForLeft, selectivity);
Statistics equalStats = context.statistics.withSel(selectivity);
Expression left = cp.left();
equalStats.addColumnStats(left, statsForRight);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index dd5a38a4a62..d7c44e082cf 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
@@ -177,6 +178,80 @@ class FilterEstimationTest {
Assertions.assertEquals(10, aStatsEst.ndv);
}
+ @Test
+ public void knownEqualToUnknown() {
+ SlotReference ym = new SlotReference("a", new VarcharType(7));
+ double rowCount = 404962.0;
+ double ndv = 14.0;
+ ColumnStatistic ymStats = new ColumnStatisticBuilder()
+ .setCount(rowCount)
+ .setNdv(ndv)
+ .setMinExpr(new StringLiteral("2023-07"))
+ .setMinValue(14126741000630328.000000)
+ .setMaxExpr(new StringLiteral("2024-08"))
+ .setMaxValue(14126741017407544.000000)
+ .setAvgSizeByte(7)
+ .build();
+ Statistics stats = new StatisticsBuilder()
+ .setRowCount(404962).putColumnStatistics(ym, ymStats)
+ .build();
+
+ EqualTo predicate = new EqualTo(ym,
+ new Left(new
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+ new IntegerLiteral(7))
+ );
+ FilterEstimation filterEstimation = new FilterEstimation();
+ Statistics outStats = filterEstimation.estimate(predicate, stats);
+ Assertions.assertEquals(rowCount / ndv, outStats.getRowCount());
+ }
+
+ @Test
+ public void knownEqualToUnknownWithLittleNdv() {
+ SlotReference ym = new SlotReference("a", new VarcharType(7));
+ double rowCount = 404962.0;
+ double ndv = 0.5;
+ ColumnStatistic ymStats = new ColumnStatisticBuilder()
+ .setCount(rowCount)
+ .setNdv(ndv)
+ .setMinExpr(new StringLiteral("2023-07"))
+ .setMinValue(14126741000630328.000000)
+ .setMaxExpr(new StringLiteral("2024-08"))
+ .setMaxValue(14126741017407544.000000)
+ .setAvgSizeByte(7)
+ .build();
+ Statistics stats = new StatisticsBuilder()
+ .setRowCount(404962).putColumnStatistics(ym, ymStats)
+ .build();
+
+ EqualTo predicate = new EqualTo(ym,
+ new Left(new
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+ new IntegerLiteral(7))
+ );
+ FilterEstimation filterEstimation = new FilterEstimation();
+ Statistics outStats = filterEstimation.estimate(predicate, stats);
+ Assertions.assertEquals(rowCount *
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT,
+ outStats.getRowCount());
+ }
+
+ @Test
+ public void unknownEqualToUnknown() {
+ SlotReference ym = new SlotReference("a", new VarcharType(7));
+ ColumnStatistic ymStats = ColumnStatistic.UNKNOWN;
+ double rowCount = 404962.0;
+ Statistics stats = new StatisticsBuilder()
+ .setRowCount(rowCount).putColumnStatistics(ym, ymStats)
+ .build();
+
+ EqualTo predicate = new EqualTo(ym,
+ new Left(new
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+ new IntegerLiteral(7))
+ );
+ FilterEstimation filterEstimation = new FilterEstimation();
+ Statistics outStats = filterEstimation.estimate(predicate, stats);
+ Assertions.assertEquals(rowCount *
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT,
+ outStats.getRowCount());
+ }
+
// a > 500 and b < 100 or a = c
@Test
public void test1() {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]