This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 9915862bf7d40f24a683c8c956c51499f1b1a9fb Author: minghong <[email protected]> AuthorDate: Fri May 10 11:20:02 2024 +0800 [opt](nereids)estimate rowcount for is-null filter when column stats are not available (#34519) * estimate rowcount for is-null filter when column stats are not available --- .../doris/nereids/stats/FilterEstimation.java | 6 +++--- .../noStatsRfPrune/query76.out | 22 +++++++++++----------- .../no_stats_shape/query76.out | 22 +++++++++++----------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 45e6dcd2abc..2286daaa448 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -64,9 +64,8 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo public static final double DEFAULT_HAVING_COEFFICIENT = 0.01; - public static final double DEFAULT_EQUALITY_COMPARISON_SELECTIVITY = 0.1; public static final double DEFAULT_LIKE_COMPARISON_SELECTIVITY = 0.2; - public static final double DEFAULT_ISNULL_SELECTIVITY = 0.001; + public static final double DEFAULT_ISNULL_SELECTIVITY = 0.005; private Set<Slot> aggSlots; private boolean isOnBaseTable = false; @@ -421,7 +420,8 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo public Statistics visitIsNull(IsNull isNull, EstimationContext context) { ColumnStatistic childColStats = ExpressionEstimation.estimate(isNull.child(), context.statistics); if (childColStats.isUnKnown()) { - return new StatisticsBuilder(context.statistics).build(); + double row = context.statistics.getRowCount() * DEFAULT_ISNULL_SELECTIVITY; + return new StatisticsBuilder(context.statistics).setRowCount(row).build(); } double outputRowCount = childColStats.numNulls; if (!isOnBaseTable) { diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out index 4943199ee55..5b7706f2e52 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query76.out @@ -22,20 +22,20 @@ PhysicalResultSink ----------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------PhysicalProject --------------------------PhysicalOlapScan[date_dim] -----------------PhysicalDistribute[DistributionSpecExecutionAny] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() +------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject ----------------------------filter(ws_bill_addr_sk IS NULL) ------------------------------PhysicalOlapScan[web_sales] ---------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[item] -----------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] +--------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------PhysicalProject +------------------------PhysicalOlapScan[date_dim] ----------------PhysicalDistribute[DistributionSpecExecutionAny] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out index 3279ad95df5..671f7b0cbb1 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query76.out @@ -22,20 +22,20 @@ PhysicalResultSink ----------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------PhysicalProject --------------------------PhysicalOlapScan[date_dim] -----------------PhysicalDistribute[DistributionSpecExecutionAny] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ws_item_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ws_item_sk] +------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject ----------------------------filter(ws_bill_addr_sk IS NULL) ------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 ---------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[item] -----------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] +--------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------PhysicalProject +------------------------PhysicalOlapScan[date_dim] ----------------PhysicalDistribute[DistributionSpecExecutionAny] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
