This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 9610c7e38f2 branch-4.0: [fix](test) fix unstable cases (#63500)
9610c7e38f2 is described below
commit 9610c7e38f25d68fdb0c7ea7533a1757aa6858fb
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Thu May 21 18:26:59 2026 -0700
branch-4.0: [fix](test) fix unstable cases (#63500)
external_table_p0/hive/test_hive_compress_type.groovy
external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
export_p0/outfile/csv/test_outfile_empty_data.groovy
external_table_p0/broker_load/test_broker_load_func.groovy
bp #61073 #62360 #62423
---------
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
Co-authored-by: Chenjunwei <[email protected]>
Co-authored-by: Qi Chen <[email protected]>
---
.../outfile/csv/test_outfile_empty_data.groovy | 33 +++++++--
.../broker_load/test_broker_load_func.groovy | 15 ++++
.../hive/test_hive_compress_type.groovy | 41 -----------
.../jdbc/test_mysql_jdbc_catalog.groovy | 2 +-
.../hive/test_hive_compress_type_large_data.groovy | 85 ++++++++++++++++++++++
5 files changed, 126 insertions(+), 50 deletions(-)
diff --git
a/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
b/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
index d14dc119ddb..a2e01ff9dec 100644
---
a/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
+++
b/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
@@ -51,6 +51,21 @@ suite("test_outfile_empty_data",
"external,hive,tvf,external_docker") {
// broker
String broker_name = "hdfs"
+ // check whether the broker named `hdfs` exists. If not, skip
broker-related cases.
+ def brokerExists = {String name ->
+ def brokers = sql """ SHOW BROKER """
+ for (def row : brokers) {
+ if (row[0] == name) {
+ return true
+ }
+ }
+ return false
+ }
+ def has_broker = brokerExists(broker_name)
+ if (!has_broker) {
+ logger.info("broker `${broker_name}` does not exist, will skip
broker-related cases.")
+ }
+
def export_table_name = "outfile_empty_data_test"
def create_table = {table_name, column_define ->
@@ -133,11 +148,9 @@ suite("test_outfile_empty_data",
"external,hive,tvf,external_docker") {
create_table(export_table_name, doris_column_define);
// test outfile empty data to hdfs directly
def outfile_to_hdfs_directly_url = outfile_to_HDFS_directly()
- // test outfile empty data to hdfs with broker
- def outfile_to_hdfs_with_broker_url= outfile_to_HDFS_with_broker()
// test outfile empty data to s3 directly
def outfile_to_s3_directly_url = outfile_to_S3_directly()
- qt_select_base1 """ SELECT * FROM ${export_table_name} ORDER BY
user_id; """
+ qt_select_base1 """ SELECT * FROM ${export_table_name} ORDER BY
user_id; """
qt_select_tvf1 """ select * from HDFS(
"uri" = "${outfile_to_hdfs_directly_url}0.csv",
@@ -145,11 +158,15 @@ suite("test_outfile_empty_data",
"external,hive,tvf,external_docker") {
"format" = "${format}");
"""
- qt_select_tvf2 """ select * from HDFS(
- "uri" = "${outfile_to_hdfs_with_broker_url}0.csv",
- "hadoop.username" = "${hdfsUserName}",
- "format" = "${format}");
- """
+ if (has_broker) {
+ // test outfile empty data to hdfs with broker
+ def outfile_to_hdfs_with_broker_url= outfile_to_HDFS_with_broker()
+ qt_select_tvf2 """ select * from HDFS(
+ "uri" = "${outfile_to_hdfs_with_broker_url}0.csv",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}");
+ """
+ }
qt_select_tvf3 """ SELECT * FROM S3 (
"uri" =
"http://${bucket}.${s3_endpoint}${outfile_to_s3_directly_url.substring(5 +
bucket.length(), outfile_to_s3_directly_url.length())}0.csv",
diff --git
a/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
b/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
index ed8ff2161cf..afce6ab8455 100644
---
a/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
+++
b/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
@@ -25,6 +25,21 @@ suite("test_broker_load_func",
"p0,external,hive,external_docker,external_docker
String database_name = "test_broker_load_func"
String broker_name = "hdfs"
+
+ // check whether the broker named `hdfs` exists. If not, skip this
case.
+ def brokers = sql """ SHOW BROKER """
+ def has_broker = false
+ for (def row : brokers) {
+ if (row[0] == broker_name) {
+ has_broker = true
+ break
+ }
+ }
+ if (!has_broker) {
+ logger.info("broker `${broker_name}` does not exist, skip
test_broker_load_func.")
+ return
+ }
+
def uuid = UUID.randomUUID().toString().replaceAll("-", "")
def test_load_label="label_test_broker_load_func_${uuid}"
String table_name="simple"
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index 8668e148a63..a29cc711077 100644
---
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -23,16 +23,6 @@ suite("test_hive_compress_type",
"p0,external,hive,external_docker,external_dock
return;
}
- def backends = sql """show backends"""
- def backendNum = backends.size()
- logger.info("get backendNum: ${backendNum}")
- // `parallel_fragment_exec_instance_num` may be displayed as
- // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
- def parallelExecInstanceRows = sql("show variables like
'%parallel_fragment_exec_instance_num%'")
- assertTrue(parallelExecInstanceRows.size() > 0)
- def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as
String).toInteger()
- logger.info("get ${parallelExecInstanceRows[0][0]}:
${parallelExecInstanceNum}")
-
for (String hivePrefix : ["hive3"]) {
String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
String catalog_name = "${hivePrefix}_test_hive_compress_type"
@@ -45,37 +35,6 @@ suite("test_hive_compress_type",
"p0,external,hive,external_docker,external_dock
);"""
sql """use `${catalog_name}`.`multi_catalog`"""
- // table test_compress_partitioned has 6 partitions with different
compressed file: plain, gzip, bzip2, deflate
- sql """set file_split_size=0"""
- // COUNT pushdown split behavior depends on:
- // totalFileNum < parallel_fragment_exec_instance_num * backendNum
- // test_compress_partitioned currently has 16 files.
- def expectedSplitNum = 16
- if (backendNum > 1) {
- expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ?
28 : 16
- }
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=${expectedSplitNum},
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
- contains "partition=8/8"
- }
- qt_q21 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q22 """select count(*) from test_compress_partitioned"""
- order_qt_q23 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
-
- sql """set file_split_size=8388608"""
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
- contains "partition=8/8"
- }
-
- qt_q31 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q32 """select count(*) from test_compress_partitioned"""
- order_qt_q33 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
- sql """set file_split_size=0"""
-
-
order_qt_q42 """ select count(*) from parquet_lz4_compression ;
"""
order_qt_q43 """ select * from parquet_lz4_compression
order by
col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
diff --git
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
index 4adc5c9e280..c89c7595f78 100644
---
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
+++
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
@@ -405,7 +405,7 @@ suite("test_mysql_jdbc_catalog",
"p0,external,mysql,external_docker,external_doc
}
sql """ set enable_ext_func_pred_pushdown = "false"; """
explain {
- sql ("select k6, k8 from test1 where nvl(k6, null) = 1 and k8 =
1;")
+ sql ("select k6, k8 from test1 where nvl(k6, 1) = k6 and k8 = 1;")
contains "QUERY: SELECT `k6`, `k8` FROM `doris_test`.`test1` WHERE
((`k8` = 1))"
}
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
new file mode 100644
index 00000000000..3f9e45117ad
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_compress_type_large_data", "p2,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ def backends = sql """show backends"""
+ def backendNum = backends.size()
+ logger.info("get backendNum: ${backendNum}")
+
+ // Set parallel_pipeline_task_num explicitly so
getParallelExecInstanceNum() returns
+ // a known value, regardless of fuzzy mode randomization or BE core count.
+ def parallelExecInstanceNum = 8
+ sql """set parallel_pipeline_task_num = ${parallelExecInstanceNum}"""
+
+ for (String hivePrefix : ["hive3"]) {
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name =
"${hivePrefix}_test_hive_compress_type_large_data"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+ sql """use `${catalog_name}`.`multi_catalog`"""
+
+ // table test_compress_partitioned has 16 files across 8 partitions
(734MB total).
+ // With count pushdown, needSplit depends on totalFileNum vs
parallelNum * backendNum.
+ // When needSplit=false: each file = 1 split = 16 splits.
+ // When needSplit=true and file_split_size=0: splits by dynamic size =
28 splits.
+ // When needSplit=true and file_split_size=8MB: splits by 8MB = 82
splits.
+ def needSplit = (backendNum > 1) && (16 < parallelExecInstanceNum *
backendNum)
+
+ sql """set file_split_size=0"""
+ def expectedSplitNum1 = needSplit ? 28 : 16
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=${expectedSplitNum1},
totalFileSize=734675596, scanRanges=${expectedSplitNum1}"
+ contains "partition=8/8"
+ }
+
+ def countMix1 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix1[0][0])
+ def countAll1 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll1[0][0])
+ def countWatchId1 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId1[0][0])
+
+ sql """set file_split_size=8388608"""
+ def expectedSplitNum2 = needSplit ? 82 : 16
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=${expectedSplitNum2},
totalFileSize=734675596, scanRanges=${expectedSplitNum2}"
+ contains "partition=8/8"
+ }
+
+ def countMix2 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix2[0][0])
+ def countAll2 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll2[0][0])
+ def countWatchId2 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId2[0][0])
+
+ sql """set file_split_size=0"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]