This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ee5691b9dd4 [test](nereids)add regression case: disable join reorder
if there are invalid stats (#43501)
ee5691b9dd4 is described below
commit ee5691b9dd4c7a1ed4ee9364c2261ebd3be52881
Author: minghong <[email protected]>
AuthorDate: Thu Nov 28 11:18:28 2024 +0800
[test](nereids)add regression case: disable join reorder if there are
invalid stats (#43501)
### What problem does this PR solve?
add test case for pr #41790.
this case is blocked by #43279
---
.../data/nereids_p0/stats/invalid_stats.out | 31 ++++++
.../stats/invalid_stats/invalid_stats.out | 31 ++++++
.../stats/{ => col_stats}/column_stats.groovy | 0
.../suites/nereids_p0/stats/invalid_stats.groovy | 111 ---------------------
.../invalid_stats.groovy} | 71 ++++---------
5 files changed, 80 insertions(+), 164 deletions(-)
diff --git a/regression-test/data/nereids_p0/stats/invalid_stats.out
b/regression-test/data/nereids_p0/stats/invalid_stats.out
new file mode 100644
index 00000000000..9b1b2e2aa97
--- /dev/null
+++ b/regression-test/data/nereids_p0/stats/invalid_stats.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !reorder_1 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !ndv_min_max_invalid --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[region]
+----PhysicalOlapScan[nation]
+
+-- !reorder_2 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !order_3 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !ndv_row_invalid --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[region]
+----PhysicalOlapScan[nation]
+
diff --git
a/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out
b/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out
new file mode 100644
index 00000000000..9b1b2e2aa97
--- /dev/null
+++ b/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !reorder_1 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !ndv_min_max_invalid --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[region]
+----PhysicalOlapScan[nation]
+
+-- !reorder_2 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !order_3 --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[nation]
+----PhysicalOlapScan[region]
+
+-- !ndv_row_invalid --
+PhysicalResultSink
+--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey =
nation.n_regionkey)) otherCondition=()
+----PhysicalOlapScan[region]
+----PhysicalOlapScan[nation]
+
diff --git a/regression-test/suites/nereids_p0/stats/column_stats.groovy
b/regression-test/suites/nereids_p0/stats/col_stats/column_stats.groovy
similarity index 100%
copy from regression-test/suites/nereids_p0/stats/column_stats.groovy
copy to regression-test/suites/nereids_p0/stats/col_stats/column_stats.groovy
diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
deleted file mode 100644
index 5304cd8c2c1..00000000000
--- a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
+++ /dev/null
@@ -1,111 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("invalid_stats") {
- // multi_sql """
- // set global enable_auto_analyze=false;
- // SET enable_nereids_planner=true;
- // SET enable_fallback_to_original_planner=false;
- // set disable_nereids_rules=PRUNE_EMPTY_PARTITION;
-
-
- // drop table if exists region;
- // CREATE TABLE region (
- // r_regionkey int NOT NULL,
- // r_name VARCHAR(25) NOT NULL,
- // r_comment VARCHAR(152)
- // )ENGINE=OLAP
- // DUPLICATE KEY(`r_regionkey`)
- // COMMENT "OLAP"
- // DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
- // PROPERTIES (
- // "replication_num" = "1"
- // );
-
- // drop table if exists nation;
- // CREATE TABLE `nation` (
- // `n_nationkey` int(11) NOT NULL,
- // `n_name` varchar(25) NOT NULL,
- // `n_regionkey` int(11) NOT NULL,
- // `n_comment` varchar(152) NULL
- // ) ENGINE=OLAP
- // DUPLICATE KEY(`N_NATIONKEY`)
- // COMMENT "OLAP"
- // DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
- // PROPERTIES (
- // "replication_num" = "1"
- // );
- // alter table nation modify column n_nationkey set stats ('ndv'='25',
'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25');
-
- // alter table nation modify column n_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25');
-
- // """
-
- // explain {
- // sql "select * from region"
- // notContains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select * from region where r_regionkey=1"
- // contains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select r_regionkey from region group by r_regionkey"
- // contains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
- // contains("join reorder with unknown column statistics")
- // }
-
- // sql "alter table region modify column r_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');"
-
- // explain {
- // sql "select * from region where r_regionkey=1"
- // notContains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select r_regionkey from region group by r_regionkey"
- // notContains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
- // notContains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql "select r_name from region join nation on
r_regionkey=n_regionkey"
- // notContains("join reorder with unknown column statistics")
- // }
-
- // explain {
- // sql """
- // select r_name
- // from (select r_name, r_regionkey + 1 x from region) T join
nation on T.x=n_regionkey
- // """
- // notContains("join reorder with unknown column statistics")
- // }
-}
-// disable jo: alter table region modify column r_regionkey set stats
('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');
-// disable jo: alter table region modify column r_regionkey set stats
('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4',
'row_count'='1');
-
-// alter table region modify column r_regionkey set stats ('ndv'='10',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');
diff --git a/regression-test/suites/nereids_p0/stats/column_stats.groovy
b/regression-test/suites/nereids_p0/stats/invalid_stats/invalid_stats.groovy
similarity index 52%
rename from regression-test/suites/nereids_p0/stats/column_stats.groovy
rename to
regression-test/suites/nereids_p0/stats/invalid_stats/invalid_stats.groovy
index d69601ec8f4..51f1a37f118 100644
--- a/regression-test/suites/nereids_p0/stats/column_stats.groovy
+++ b/regression-test/suites/nereids_p0/stats/invalid_stats/invalid_stats.groovy
@@ -15,14 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-suite("column_stats") {
+suite("invalid_stats") {
multi_sql """
set global enable_auto_analyze=false;
SET enable_nereids_planner=true;
-
SET enable_fallback_to_original_planner=false;
set disable_nereids_rules=PRUNE_EMPTY_PARTITION;
-
+ set ignore_shape_nodes=PhysicalProject;
drop table if exists region;
CREATE TABLE region (
@@ -52,61 +51,27 @@ suite("column_stats") {
);
alter table nation modify column n_nationkey set stats ('ndv'='25',
'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25');
- alter table nation modify column n_name set stats ('ndv'='25',
'num_nulls'='0', 'min_value'='ALGERIA', 'max_value'='VIETNAM',
'row_count'='25');
-
- alter table nation modify column n_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25');
-
- alter table nation modify column n_comment set stats ('ndv'='25',
'num_nulls'='0', 'min_value'=' haggle. carefully final deposits detect slyly
agai', 'max_value'='y final packages. slow foxes cajole quickly. quickly silent
platelets breach ironic accounts. unusual pinto be', 'row_count'='25');
+ alter table region modify column r_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');
"""
- explain {
- sql "select * from region"
- notContains("planed with unknown column statistics")
- }
-
- explain {
- sql "select * from region where r_regionkey=1"
- contains("planed with unknown column statistics")
- }
-
- explain {
- sql "select r_regionkey from region group by r_regionkey"
- contains("planed with unknown column statistics")
- }
-
- explain {
- sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
- contains("planed with unknown column statistics")
- }
-
+ qt_reorder_1 "explain shape plan select r_regionkey from region join
nation on r_regionkey=n_regionkey"
+
+ sql "alter table region modify column r_regionkey set stats ('ndv'='0',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');"
+
+ // r_regionkey stats invalid: ndv=0, but min or max is not null
+ qt_ndv_min_max_invalid "explain shape plan select r_regionkey from region
join nation on r_regionkey=n_regionkey"
+
+ // inject normal stats and check join order is nation-region
sql "alter table region modify column r_regionkey set stats ('ndv'='5',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');"
- explain {
- sql "select * from region where r_regionkey=1"
- notContains("planed with unknown column statistics")
- }
+ qt_reorder_2 "explain shape plan select r_regionkey from region join
nation on r_regionkey=n_regionkey"
- explain {
- sql "select r_regionkey from region group by r_regionkey"
- notContains("planed with unknown column statistics")
- }
-
- explain {
- sql "select r_regionkey from region join nation on
r_regionkey=n_regionkey"
- notContains("planed with unknown column statistics")
- }
-
- explain {
- sql "select r_name from region join nation on r_regionkey=n_regionkey"
- notContains("planed with unknown column statistics")
- }
+ // r_regionkey stats invalid: ndv > 10*row
+ sql "alter table region modify column r_regionkey set stats ('ndv'='10',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');"
+ qt_order_3 "explain shape plan select r_regionkey from region join nation
on r_regionkey=n_regionkey"
+
+ sql "alter table region modify column r_regionkey set stats ('ndv'='11',
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');"
+ qt_ndv_row_invalid "explain shape plan select r_regionkey from region join
nation on r_regionkey=n_regionkey"
- explain {
- sql """
- select r_name
- from (select r_name, r_regionkey + 1 x from region) T join nation
on T.x=n_regionkey
- """
- notContains("planed with unknown column statistics")
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]