This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2827bc1a39 [Fix](nereids) fix a bug in ColumnStatistics.numNulls
update #21220
2827bc1a39 is described below
commit 2827bc1a3968f3c9bcda3ebd485a90e0d0a17e31
Author: minghong <[email protected]>
AuthorDate: Mon Jul 3 10:51:23 2023 +0800
[Fix](nereids) fix a bug in ColumnStatistics.numNulls update #21220
no impact on tpch
has impact on tpcds 95,
before 1.63 sec, after 1.30 sec
---
.../org/apache/doris/statistics/Statistics.java | 4 +-
.../nereids_tpcds_shape_sf100_p0/shape/query21.out | 3 +-
.../nereids_tpcds_shape_sf100_p0/shape/query95.out | 54 +++++++++++-----------
3 files changed, 31 insertions(+), 30 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index dd8d470e24..ff889d3edc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -111,8 +111,8 @@ public class Statistics {
ColumnStatistic columnStatistic = entry.getValue();
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(columnStatistic);
columnStatisticBuilder.setNdv(Math.min(columnStatistic.ndv,
rowCount));
- double nullFactor = (rowCount - columnStatistic.numNulls) /
rowCount;
- columnStatisticBuilder.setNumNulls(nullFactor * rowCount);
+ double numNulls = Math.min(columnStatistic.numNulls, rowCount -
columnStatistic.ndv);
+ columnStatisticBuilder.setNumNulls(numNulls);
columnStatisticBuilder.setCount(rowCount);
statistics.addColumnStats(entry.getKey(),
columnStatisticBuilder.build());
}
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out
index 8361e28dac..32df595226 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out
@@ -21,4 +21,5 @@ PhysicalTopN
------------------------PhysicalOlapScan[date_dim]
----------------PhysicalDistribute
------------------PhysicalProject
---------------------PhysicalOlapScan[warehouse]
\ No newline at end of file
+--------------------PhysicalOlapScan[warehouse]
+
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
index ccb9b6ff50..d6ec3488d9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
@@ -14,38 +14,38 @@ CteAnchor[cteId= ( CTEId#3=] )
----PhysicalTopN
------PhysicalProject
--------hashAgg[GLOBAL]
-----------hashAgg[LOCAL]
-------------PhysicalDistribute
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](ws1.ws_ship_date_sk = date_dim.d_date_sk)
-------------------PhysicalProject
---------------------filter((date_dim.d_date >= 1999-02-01)(cast(d_date as
DATETIMEV2(0)) <= cast(days_add(cast('1999-2-01' as DATEV2), INTERVAL 60 DAY)
as DATETIMEV2(0))))
-----------------------PhysicalOlapScan[date_dim]
-------------------PhysicalDistribute
---------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number =
web_returns.wr_order_number)
+------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number =
web_returns.wr_order_number)
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN](web_returns.wr_order_number =
ws_wh.ws_order_number)
+--------------------------PhysicalProject
+----------------------------CteConsumer[cteId= ( CTEId#3=] )
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_returns]
+--------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number =
ws_wh.ws_order_number)
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN](web_returns.wr_order_number =
ws_wh.ws_order_number)
+--------------------------CteConsumer[cteId= ( CTEId#3=] )
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN](ws1.ws_web_site_sk =
web_site.web_site_sk)
+--------------------------hashJoin[INNER_JOIN](ws1.ws_ship_addr_sk =
customer_address.ca_address_sk)
----------------------------PhysicalProject
-------------------------------CteConsumer[cteId= ( CTEId#3=] )
-----------------------------PhysicalDistribute
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_returns]
-----------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number =
ws_wh.ws_order_number)
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------CteConsumer[cteId= ( CTEId#3=] )
-------------------------PhysicalDistribute
---------------------------hashJoin[INNER_JOIN](ws1.ws_web_site_sk =
web_site.web_site_sk)
-----------------------------hashJoin[INNER_JOIN](ws1.ws_ship_addr_sk =
customer_address.ca_address_sk)
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((cast(ca_state as VARCHAR(*)) = 'NC'))
-------------------------------------PhysicalOlapScan[customer_address]
+------------------------------PhysicalOlapScan[web_sales]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
---------------------------------filter((cast(web_company_name as VARCHAR(*)) =
'pri'))
-----------------------------------PhysicalOlapScan[web_site]
+--------------------------------filter((cast(ca_state as VARCHAR(*)) = 'NC'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((cast(web_company_name as VARCHAR(*)) =
'pri'))
+--------------------------------PhysicalOlapScan[web_site]
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------filter((date_dim.d_date >= 1999-02-01)(cast(d_date as
DATETIMEV2(0)) <= cast(days_add(cast('1999-2-01' as DATEV2), INTERVAL 60 DAY)
as DATETIMEV2(0))))
+------------------------PhysicalOlapScan[date_dim]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]