This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9915862bf7d40f24a683c8c956c51499f1b1a9fb
Author: minghong <[email protected]>
AuthorDate: Fri May 10 11:20:02 2024 +0800

    [opt](nereids)estimate rowcount for is-null filter when column stats are 
not available (#34519)
    
    * estimate rowcount for is-null filter when column stats are not available
---
 .../doris/nereids/stats/FilterEstimation.java      |  6 +++---
 .../noStatsRfPrune/query76.out                     | 22 +++++++++++-----------
 .../no_stats_shape/query76.out                     | 22 +++++++++++-----------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 45e6dcd2abc..2286daaa448 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -64,9 +64,8 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
 
     public static final double DEFAULT_HAVING_COEFFICIENT = 0.01;
 
-    public static final double DEFAULT_EQUALITY_COMPARISON_SELECTIVITY = 0.1;
     public static final double DEFAULT_LIKE_COMPARISON_SELECTIVITY = 0.2;
-    public static final double DEFAULT_ISNULL_SELECTIVITY = 0.001;
+    public static final double DEFAULT_ISNULL_SELECTIVITY = 0.005;
     private Set<Slot> aggSlots;
 
     private boolean isOnBaseTable = false;
@@ -421,7 +420,8 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
     public Statistics visitIsNull(IsNull isNull, EstimationContext context) {
         ColumnStatistic childColStats = 
ExpressionEstimation.estimate(isNull.child(), context.statistics);
         if (childColStats.isUnKnown()) {
-            return new StatisticsBuilder(context.statistics).build();
+            double row = context.statistics.getRowCount() * 
DEFAULT_ISNULL_SELECTIVITY;
+            return new 
StatisticsBuilder(context.statistics).setRowCount(row).build();
         }
         double outputRowCount = childColStats.numNulls;
         if (!isOnBaseTable) {
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out
index 4943199ee55..5b7706f2e52 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out
@@ -22,20 +22,20 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=()
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=()
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
 ----------------------------filter(ws_bill_addr_sk IS NULL)
 ------------------------------PhysicalOlapScan[web_sales]
---------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[item]
-----------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[date_dim]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[item]
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalDistribute[DistributionSpecExecutionAny]
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=()
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out
index 3279ad95df5..671f7b0cbb1 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out
@@ -22,20 +22,20 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF2 i_item_sk->[ws_item_sk]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF2 i_item_sk->[ws_item_sk]
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
 ----------------------------filter(ws_bill_addr_sk IS NULL)
 ------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
---------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[item]
-----------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[date_dim]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[item]
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalDistribute[DistributionSpecExecutionAny]
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to