This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e238a8705f2 [feat](nereids)disable join reorder if column stats is
invalid (#41790)
e238a8705f2 is described below
commit e238a8705f25e2b5bc5a9e95b2025712f98dd682
Author: minghong <[email protected]>
AuthorDate: Mon Oct 28 09:48:30 2024 +0800
[feat](nereids)disable join reorder if column stats is invalid (#41790)
## Proposed changes
disable join reorder if any condition is matched:
1. any table row count is -1
2. any column, whose ndv is 0, but MinExpr or MaxExpr is not null
3. ndv > 10* rowCount
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../org/apache/doris/nereids/NereidsPlanner.java | 4 +-
.../org/apache/doris/nereids/StatementContext.java | 10 ++
.../doris/nereids/stats/StatsCalculator.java | 46 +++++++--
.../data/nereids_hint_tpcds_p0/shape/query64.out | 101 +++++++++----------
.../infer_predicate/infer_intersect_except.out | 11 --
.../suites/nereids_hint_tpcds_p0/load.groovy | 2 +-
.../suites/nereids_p0/stats/invalid_stats.groovy | 111 +++++++++++++++++++++
.../nereids_rules_p0/eager_aggregate/basic.groovy | 4 +-
.../eager_aggregate/basic_one_side.groovy | 3 +-
.../infer_predicate/infer_intersect_except.groovy | 26 ++++-
.../nereids_tpcds_shape_sf1000_p0/load.groovy | 2 +-
.../constraints/load.groovy | 2 +-
.../nereids_tpcds_shape_sf100_p0/load.groovy | 6 +-
.../tpcds_sf100/constraints/load.groovy | 2 +-
.../suites/new_shapes_p0/tpcds_sf100/load.groovy | 6 +-
.../suites/new_shapes_p0/tpcds_sf1000/load.groovy | 2 +-
16 files changed, 247 insertions(+), 91 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 4acae7164f8..7ea92ee73b3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -274,7 +274,9 @@ public class NereidsPlanner extends Planner {
&& !cascadesContext.isLeadingDisableJoinReorder()) {
List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
.collectToList(LogicalOlapScan.class::isInstance);
-
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans,
cascadesContext);
+ Optional<String> disableJoinReorderReason = StatsCalculator
+ .disableJoinReorderIfStatsInvalid(scans, cascadesContext);
+
disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason);
}
optimize();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 08e1e3fa815..ed64864da50 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -169,6 +169,8 @@ public class StatementContext implements Closeable {
private List<PlannerHook> plannerHooks = new ArrayList<>();
+ private String disableJoinReorderReason;
+
public StatementContext() {
this(ConnectContext.get(), null, 0);
}
@@ -558,4 +560,12 @@ public class StatementContext implements Closeable {
this.tableIdMapping.put(tableIdentifier, tableId);
return tableId;
}
+
+ public Optional<String> getDisableJoinReorderReason() {
+ return Optional.ofNullable(disableJoinReorderReason);
+ }
+
+ public void setDisableJoinReorderReason(String disableJoinReorderReason) {
+ this.disableJoinReorderReason = disableJoinReorderReason;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 2e159ff8b68..d4a9a81fc07 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -221,24 +221,40 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
}
/**
- * disable join reorder if any table row count is not available.
+ * disable join reorder if
+ * 1. any table rowCount is not available, or
+ * 2. col stats ndv=0 but minExpr or maxExpr is not null
+ * 3. ndv > 10 * rowCount
*/
- public static void disableJoinReorderIfTableRowCountNotAvailable(
- List<LogicalOlapScan> scans, CascadesContext context) {
+ public static Optional<String>
disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans,
+ CascadesContext context) {
StatsCalculator calculator = new StatsCalculator(context);
+ if (ConnectContext.get() == null) {
+ // ut case
+ return Optional.empty();
+ }
for (LogicalOlapScan scan : scans) {
double rowCount = calculator.getOlapTableRowCount(scan);
- if (rowCount == -1 && ConnectContext.get() != null) {
+ // row count not available
+ if (rowCount == -1) {
+ LOG.info("disable join reorder since row count not available: "
+ + scan.getTable().getNameWithFullQualifiers());
+ return Optional.of("table[" + scan.getTable().getName() + "]
row count is invalid");
+ }
+ // ndv abnormal
+ Optional<String> reason = calculator.checkNdvValidation(scan,
rowCount);
+ if (reason.isPresent()) {
try {
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
- LOG.info("disable join reorder since row count not
available: "
- + scan.getTable().getNameWithFullQualifiers());
+ LOG.info("disable join reorder since col stats invalid: "
+ + reason.get());
} catch (Exception e) {
LOG.info("disableNereidsJoinReorderOnce failed");
}
- return;
+ return reason;
}
}
+ return Optional.empty();
}
/**
@@ -412,6 +428,22 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
return rowCount;
}
+ // check validation of ndv.
+ private Optional<String> checkNdvValidation(OlapScan olapScan, double
rowCount) {
+ for (Slot slot : ((Plan) olapScan).getOutput()) {
+ if (isVisibleSlotReference(slot)) {
+ ColumnStatistic cache =
getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot);
+ if (!cache.isUnKnown) {
+ if ((cache.ndv == 0 && (cache.minExpr != null ||
cache.maxExpr != null))
+ || cache.ndv > rowCount * 10) {
+ return Optional.of("slot " + slot.getName() + " has
invalid column stats: " + cache);
+ }
+ }
+ }
+ }
+ return Optional.empty();
+ }
+
private Statistics computeOlapScan(OlapScan olapScan) {
OlapTable olapTable = olapScan.getTable();
double tableRowCount = getOlapTableRowCount(olapScan);
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
index 1338812312e..26a67aa0d6e 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
@@ -7,84 +7,85 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
--------PhysicalDistribute[DistributionSpecHash]
----------hashAgg[LOCAL]
------------PhysicalProject
---------------hashJoin[INNER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=() build RFs:RF18
sr_item_sk->[cr_item_sk,cs_item_sk,i_item_sk,ss_item_sk];RF19
sr_ticket_number->[ss_ticket_number]
+--------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=()
build RFs:RF19 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk]
----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk))
otherCondition=() build RFs:RF17 d_date_sk->[c_first_shipto_date_sk]
+------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF18 p_promo_sk->[ss_promo_sk]
--------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=()
build RFs:RF16 hd_demo_sk->[ss_hdemo_sk]
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF17 s_store_sk->[ss_store_sk]
------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=()
build RFs:RF15 s_store_sk->[ss_store_sk]
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=()
build RFs:RF16 d_date_sk->[ss_sold_date_sk]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk))
otherCondition=() build RFs:RF14 p_promo_sk->[ss_promo_sk]
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk))
otherCondition=() build RFs:RF15 ib_income_band_sk->[hd_income_band_sk]
--------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[c_first_sales_date_sk]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk))
otherCondition=() build RFs:RF14 ca_address_sk->[c_current_addr_sk]
------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk))
otherCondition=() build RFs:RF12 ca_address_sk->[c_current_addr_sk]
+--------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk))
otherCondition=() build RFs:RF13 hd_demo_sk->[c_current_hdemo_sk]
----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=()
build RFs:RF11 ca_address_sk->[ss_addr_sk]
+------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=()
build RFs:RF12 ca_address_sk->[ss_addr_sk]
--------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk))
otherCondition=() build RFs:RF10 hd_demo_sk->[c_current_hdemo_sk]
+----------------------------------------------hashJoin[INNER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=() build RFs:RF10 sr_item_sk->[i_item_sk,ss_item_sk];RF11
sr_ticket_number->[ss_ticket_number]
------------------------------------------------PhysicalProject
---------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk))
otherCondition=() build RFs:RF9 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk))
otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk]
+--------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk))
otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk]
+----------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk))
otherCondition=() build RFs:RF8 i_item_sk->[ss_item_sk]
------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk))
otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF7
cd_demo_sk->[c_current_cdemo_sk]
+--------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk))
otherCondition=() build RFs:RF7 d_date_sk->[c_first_shipto_date_sk]
----------------------------------------------------------PhysicalProject
-------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_customer_sk =
customer.c_customer_sk)) otherCondition=() build RFs:RF6
c_customer_sk->[ss_customer_sk]
+------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk))
otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF6
cd_demo_sk->[ss_cdemo_sk]
--------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk))
otherCondition=() build RFs:RF5 cd_demo_sk->[ss_cdemo_sk]
-------------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk))
otherCondition=() build RFs:RF4 cs_item_sk->[ss_item_sk]
---------------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF4 RF5 RF6 RF8 RF9 RF11 RF14 RF15 RF16 RF18 RF19
---------------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------------filter((sale
> (2 * refund)))
-------------------------------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------hashJoin[INNER_JOIN
colocated] hashCondition=((catalog_sales.cs_item_sk =
catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number =
catalog_returns.cr_order_number)) otherCondition=() build RFs:RF2
cr_item_sk->[cs_item_sk];RF3 cr_order_number->[cs_order_number]
-----------------------------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales]
apply RFs: RF2 RF3 RF8 RF18
-----------------------------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns]
apply RFs: RF8 RF18
+----------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((store_sales.ss_customer_sk =
customer.c_customer_sk)) otherCondition=() build RFs:RF5
c_customer_sk->[ss_customer_sk]
------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
+--------------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19
+------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk))
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+----------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk]
+--------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------PhysicalOlapScan[customer]
apply RFs: RF3 RF4 RF7 RF13 RF14
+--------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
--------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[customer]
apply RFs: RF7 RF10 RF12 RF13 RF17
+----------------------------------------------------------------PhysicalOlapScan[customer_demographics]
----------------------------------------------------------PhysicalProject
-------------------------------------------------------------PhysicalOlapScan[customer_demographics]
+------------------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------------PhysicalProject
--------------------------------------------------------filter((item.i_current_price
<= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace',
'lawn', 'misty', 'orange', 'pink'))
-----------------------------------------------------------PhysicalOlapScan[item]
apply RFs: RF18
+----------------------------------------------------------PhysicalOlapScan[item]
apply RFs: RF10 RF19
----------------------------------------------------PhysicalProject
-------------------------------------------------------filter(d_year IN (1999,
2000))
---------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk))
otherCondition=() build RFs:RF2 ib_income_band_sk->[hd_income_band_sk]
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------PhysicalOlapScan[household_demographics]
apply RFs: RF2
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------PhysicalOlapScan[income_band]
------------------------------------------------PhysicalProject
---------------------------------------------------hashJoin[INNER_JOIN
broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk))
otherCondition=() build RFs:RF1 ib_income_band_sk->[hd_income_band_sk]
-----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[household_demographics]
apply RFs: RF1
-----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[income_band]
+--------------------------------------------------PhysicalOlapScan[store_returns]
apply RFs: RF19
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[customer_address]
----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[customer_address]
+------------------------------------------PhysicalOlapScan[household_demographics]
apply RFs: RF15
------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------PhysicalOlapScan[customer_address]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[promotion]
+----------------------------------PhysicalOlapScan[income_band]
----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[store]
+------------------------------filter(d_year IN (1999, 2000))
+--------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk))
otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[household_demographics] apply
RFs: RF0
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[income_band]
+--------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
-----------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalOlapScan[promotion]
----------------PhysicalProject
-------------------PhysicalOlapScan[store_returns]
+------------------filter((sale > (2 * refund)))
+--------------------hashAgg[GLOBAL]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------hashAgg[LOCAL]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN colocated]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1
cr_order_number->[cs_order_number]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_returns]
--PhysicalResultSink
----PhysicalQuickSort[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
diff --git
a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
index 2609ca5f4c9..d897514ee49 100644
---
a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
+++
b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
@@ -52,17 +52,6 @@ PhysicalResultSink
----filter((cast(a as BIGINT) < -1))
------PhysicalOlapScan[infer_intersect_except2]
--- !except_and_intersect --
-PhysicalResultSink
---PhysicalExcept
-----filter((infer_intersect_except1.a > 0))
-------PhysicalOlapScan[infer_intersect_except1]
-----PhysicalIntersect
-------filter((infer_intersect_except2.b > 'ab'))
---------PhysicalOlapScan[infer_intersect_except2]
-------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b =
'abc'))
---------PhysicalOlapScan[infer_intersect_except3]
-
-- !except_and_intersect_except_predicate_to_right --
PhysicalResultSink
--PhysicalExcept
diff --git a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
index 77b37a9c1b4..f4cf57113a6 100644
--- a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
+++ b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
@@ -2336,7 +2336,7 @@ suite("load") {
"""
sql """
- alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0',
'max_value'='179769313', 'data_size'='168')
+ alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""
sql """
diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
new file mode 100644
index 00000000000..5304cd8c2c1
--- /dev/null
+++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("invalid_stats") {
+ // multi_sql """
+ // set global enable_auto_analyze=false;
+ // SET enable_nereids_planner=true;
+ // SET enable_fallback_to_original_planner=false;
+ // set disable_nereids_rules=PRUNE_EMPTY_PARTITION;
+
+
+ // drop table if exists region;
+ // CREATE TABLE region (
+ // r_regionkey int NOT NULL,
+ // r_name VARCHAR(25) NOT NULL,
+ // r_comment VARCHAR(152)
+ // )ENGINE=OLAP
+ // DUPLICATE KEY(`r_regionkey`)
+ // COMMENT "OLAP"
+ // DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+ // PROPERTIES (
+ // "replication_num" = "1"
+ // );
+
+ // drop table if exists nation;
+ // CREATE TABLE `nation` (
+ // `n_nationkey` int(11) NOT NULL,
+ // `n_name` varchar(25) NOT NULL,
+ // `n_regionkey` int(11) NOT NULL,
+ // `n_comment` varchar(152) NULL
+ // ) ENGINE=OLAP
+ // DUPLICATE KEY(`N_NATIONKEY`)
+ // COMMENT "OLAP"
+ // DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
+ // PROPERTIES (
+ // "replication_num" = "1"
+ // );
+ // alter table nation modify column n_nationkey set stats ('ndv'='25',
'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25');
+
+ // alter table nation modify column n_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25');
+
+ // """
+
+ // explain {
+ // sql "select * from region"
+ // notContains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select * from region where r_regionkey=1"
+ // contains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select r_regionkey from region group by r_regionkey"
+ // contains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
+ // contains("join reorder with unknown column statistics")
+ // }
+
+ // sql "alter table region modify column r_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');"
+
+ // explain {
+ // sql "select * from region where r_regionkey=1"
+ // notContains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select r_regionkey from region group by r_regionkey"
+ // notContains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
+ // notContains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql "select r_name from region join nation on
r_regionkey=n_regionkey"
+ // notContains("join reorder with unknown column statistics")
+ // }
+
+ // explain {
+ // sql """
+ // select r_name
+ // from (select r_name, r_regionkey + 1 x from region) T join
nation on T.x=n_regionkey
+ // """
+ // notContains("join reorder with unknown column statistics")
+ // }
+}
+// disable jo: alter table region modify column r_regionkey set stats
('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');
+// disable jo: alter table region modify column r_regionkey set stats
('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4',
'row_count'='1');
+
+// alter table region modify column r_regionkey set stats ('ndv'='10',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');
diff --git
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
index 117d0c01f24..e37e9a65da1 100644
--- a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
+++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
@@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-suite("eager_aggregate_basic") {
+suite("basic") {
sql "SET enable_nereids_planner=true"
sql "set runtime_filter_mode=OFF"
sql "SET enable_fallback_to_original_planner=false"
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
-
+ sql "set disable_join_reorder=true;"
sql """
DROP TABLE IF EXISTS shunt_log_com_dd_library;
"""
diff --git
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
index f3e6f593aa9..78503039d22 100644
---
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
+++
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
@@ -15,13 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-suite("eager_aggregate_basic_one_side") {
+suite("basic_one_side") {
sql "SET enable_nereids_planner=true"
sql "set runtime_filter_mode=OFF"
sql "SET enable_fallback_to_original_planner=false"
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
+ sql "set disable_join_reorder=true"
sql """
DROP TABLE IF EXISTS shunt_log_com_dd_library_one_side;
diff --git
a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
index fb8ef0a75f9..aec3f3384e4 100644
---
a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
+++
b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
@@ -115,10 +115,28 @@ suite("infer_intersect_except") {
select a+1,b from infer_intersect_except1 where a>0 intersect select a+1,b
from infer_intersect_except2 where a+1<0;
"""
- qt_except_and_intersect """
- explain shape plan
- select a,b from infer_intersect_except1 where a>0 except select 1,'abc'
from infer_intersect_except2 where b>'ab' intersect select a,b from
infer_intersect_except3 where a<10;
- """
+ explain {
+ sql """
+ shape plan
+ select a,b from infer_intersect_except1 where a > 0
+ except
+ select 1, 'abc' from infer_intersect_except2 where b > 'ab'
+ intersect
+ select a, b from infer_intersect_except3 where a < 10;
+ """
+ notContains("a < 10")
+ contains("(infer_intersect_except3.a = 1) and
(infer_intersect_except3.b = 'abc')")
+// PhysicalResultSink
+// --PhysicalExcept
+// ----filter((infer_intersect_except1.a > 0))
+// ------PhysicalOlapScan[infer_intersect_except1]
+// ----PhysicalIntersect
+// ------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b
= 'abc'))
+// --------PhysicalOlapScan[infer_intersect_except3]
+// ------filter((infer_intersect_except2.b > 'ab'))
+// --------PhysicalOlapScan[infer_intersect_except2]
+ }
+
qt_except_and_intersect_except_predicate_to_right """
explain shape plan
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
index b868aab6130..1a5c3980e84 100644
--- a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
@@ -2336,7 +2336,7 @@ suite("load") {
"""
sql """
- alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0',
'max_value'='179769313', 'data_size'='168')
+ alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""
sql """
diff --git
a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
index 3020a285e63..b2dca961e4d 100644
---
a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
+++
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
@@ -2340,7 +2340,7 @@ suite("load") {
"""
sql """
- alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0',
'max_value'='179769313', 'data_size'='168')
+ alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""
sql """
diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
index 120ebf97298..48ad6bda53d 100644
--- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
+++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
@@ -1299,7 +1299,7 @@ alter table web_page modify column wp_max_ad_count set
stats ('row_count'='2040'
"""
sql """
-alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070',
'avg_size'='120', 'max_size'='120' )
+alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120',
'max_size'='120' )
"""
sql """
@@ -2018,10 +2018,6 @@ sql """
alter table ship_mode modify column sm_contract set stats ('row_count'='20',
'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn',
'avg_size'='252', 'max_size'='252' )
"""
-sql """
-alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0',
'avg_size'='120', 'max_size'='120' )
-"""
-
sql """
alter table customer_address modify column ca_zip set stats
('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981',
'avg_size'='4848150', 'max_size'='4848150' )
"""
diff --git
a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
index 8b4fdeee69a..1ed3ebba10e 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
@@ -2343,7 +2343,7 @@ suite("load") {
"""
sql """
- alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0',
'max_value'='179769313', 'data_size'='168')
+ alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""
sql """
diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
index 23ce70d931e..4b99eafdea9 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
@@ -1302,7 +1302,7 @@ alter table web_page modify column wp_max_ad_count set
stats ('row_count'='2040'
"""
sql """
-alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070',
'avg_size'='120', 'max_size'='120' )
+alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120',
'max_size'='120' )
"""
sql """
@@ -2021,10 +2021,6 @@ sql """
alter table ship_mode modify column sm_contract set stats ('row_count'='20',
'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn',
'avg_size'='252', 'max_size'='252' )
"""
-sql """
-alter table call_center modify column cc_closed_date_sk set stats
('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0',
'avg_size'='120', 'max_size'='120' )
-"""
-
sql """
alter table customer_address modify column ca_zip set stats
('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981',
'avg_size'='4848150', 'max_size'='4848150' )
"""
diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
index bc28fdde8c0..14c11d3ea84 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
@@ -2339,7 +2339,7 @@ suite("load") {
"""
sql """
- alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0',
'max_value'='179769313', 'data_size'='168')
+ alter table call_center modify column cc_closed_date_sk set stats
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""
sql """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]