This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 9dc55f90ebb [opt](nereids) set lower bound for range-selectivity(2.1)
(#41061)
9dc55f90ebb is described below
commit 9dc55f90ebb1bda9531275e5c40d403296f12f6b
Author: minghong <[email protected]>
AuthorDate: Sun Sep 22 07:32:22 2024 +0800
[opt](nereids) set lower bound for range-selectivity(2.1) (#41061)
## Proposed changes
pick #40089
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/stats/FilterEstimation.java | 5 +++
.../data/nereids_hint_tpcds_p0/shape/query12.out | 17 ++++----
.../data/nereids_hint_tpcds_p0/shape/query80.out | 49 ++++++++++------------
.../shape/query12.out | 15 ++++---
.../shape/query80.out | 49 ++++++++++------------
5 files changed, 66 insertions(+), 69 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 33b7e02b332..65db271e394 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -71,6 +71,9 @@ import java.util.function.Predicate;
*/
public class FilterEstimation extends ExpressionVisitor<Statistics,
EstimationContext> {
public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
+ // "Range selectivity is prone to producing outliers, so we add this
threshold limit.
+ // The threshold estimation is calculated based on selecting one month out
of fifty years."
+ public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016;
public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0;
public static final double DEFAULT_HAVING_COEFFICIENT = 0.01;
@@ -627,6 +630,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
: intersectRange.getDistinctValues() /
leftRange.getDistinctValues();
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel !=
1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
+ } else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
+ sel = RANGE_SELECTIVITY_THRESHOLD;
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
index 40646f2dda1..03274a28fef 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
@@ -13,16 +13,15 @@ PhysicalResultSink
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF1 i_item_sk->[ws_item_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1
---------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2001-07-15')
and (date_dim.d_date >= '2001-06-15'))
---------------------------------------PhysicalOlapScan[date_dim]
---------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------filter((date_dim.d_date <= '2001-07-15') and
(date_dim.d_date >= '2001-06-15'))
+------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter(i_category IN ('Books', 'Electronics',
'Men'))
--------------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
index 88976f6717b..9981b31b55d 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
@@ -16,34 +16,31 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5
ss_ticket_number->[sr_ticket_number]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns] apply RFs:
RF4 RF5
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF3 s_store_sk->[ss_store_sk]
---------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
-----------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1 RF2 RF3
-------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------PhysicalProject
-----------------------------------------------------filter((date_dim.d_date <=
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
-------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((promotion.p_channel_tv
= 'N'))
-----------------------------------------------------PhysicalOlapScan[promotion]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price >
50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns] apply RFs:
RF4 RF5
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF3 s_store_sk->[ss_store_sk]
+------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
+------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1 RF2 RF3
+--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_date <=
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((promotion.p_channel_tv =
'N'))
+------------------------------------------------PhysicalOlapScan[promotion]
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[store]
+------------------------------------------filter((item.i_current_price >
50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
----------------------hashAgg[GLOBAL]
------------------------PhysicalDistribute[DistributionSpecHash]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
index 03682c1c406..837bd33960d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
@@ -13,15 +13,14 @@ PhysicalResultSink
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF1 i_item_sk->[ws_item_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2001-07-15') and
(date_dim.d_date >= '2001-06-15'))
-------------------------------------PhysicalOlapScan[date_dim]
---------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------filter((date_dim.d_date <= '2001-07-15') and
(date_dim.d_date >= '2001-06-15'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter(i_category IN ('Books', 'Electronics',
'Men'))
--------------------------------PhysicalOlapScan[item]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
index 88976f6717b..9981b31b55d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
@@ -16,34 +16,31 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5
ss_ticket_number->[sr_ticket_number]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns] apply RFs:
RF4 RF5
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF3 s_store_sk->[ss_store_sk]
---------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
-----------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1 RF2 RF3
-------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------PhysicalProject
-----------------------------------------------------filter((date_dim.d_date <=
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
-------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((promotion.p_channel_tv
= 'N'))
-----------------------------------------------------PhysicalOlapScan[promotion]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price >
50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns] apply RFs:
RF4 RF5
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF3 s_store_sk->[ss_store_sk]
+------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
+------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1 RF2 RF3
+--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_date <=
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((promotion.p_channel_tv =
'N'))
+------------------------------------------------PhysicalOlapScan[promotion]
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[store]
+------------------------------------------filter((item.i_current_price >
50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
----------------------hashAgg[GLOBAL]
------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]