This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 9610c7e38f2 branch-4.0: [fix](test) fix unstable cases (#63500)
9610c7e38f2 is described below

commit 9610c7e38f25d68fdb0c7ea7533a1757aa6858fb
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Thu May 21 18:26:59 2026 -0700

    branch-4.0: [fix](test) fix unstable cases (#63500)
    
    external_table_p0/hive/test_hive_compress_type.groovy
    external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
    export_p0/outfile/csv/test_outfile_empty_data.groovy
    external_table_p0/broker_load/test_broker_load_func.groovy
    
    bp #61073 #62360 #62423
    
    ---------
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
    Co-authored-by: Chenjunwei <[email protected]>
    Co-authored-by: Qi Chen <[email protected]>
---
 .../outfile/csv/test_outfile_empty_data.groovy     | 33 +++++++--
 .../broker_load/test_broker_load_func.groovy       | 15 ++++
 .../hive/test_hive_compress_type.groovy            | 41 -----------
 .../jdbc/test_mysql_jdbc_catalog.groovy            |  2 +-
 .../hive/test_hive_compress_type_large_data.groovy | 85 ++++++++++++++++++++++
 5 files changed, 126 insertions(+), 50 deletions(-)

diff --git 
a/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy 
b/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
index d14dc119ddb..a2e01ff9dec 100644
--- 
a/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
+++ 
b/regression-test/suites/export_p0/outfile/csv/test_outfile_empty_data.groovy
@@ -51,6 +51,21 @@ suite("test_outfile_empty_data", 
"external,hive,tvf,external_docker") {
     // broker
     String broker_name = "hdfs"
 
+    // check whether the broker named `hdfs` exists. If not, skip 
broker-related cases.
+    def brokerExists = {String name ->
+        def brokers = sql """ SHOW BROKER """
+        for (def row : brokers) {
+            if (row[0] == name) {
+                return true
+            }
+        }
+        return false
+    }
+    def has_broker = brokerExists(broker_name)
+    if (!has_broker) {
+        logger.info("broker `${broker_name}` does not exist, will skip 
broker-related cases.")
+    }
+
     def export_table_name = "outfile_empty_data_test"
 
     def create_table = {table_name, column_define ->
@@ -133,11 +148,9 @@ suite("test_outfile_empty_data", 
"external,hive,tvf,external_docker") {
         create_table(export_table_name, doris_column_define);
         // test outfile empty data to hdfs directly
         def outfile_to_hdfs_directly_url = outfile_to_HDFS_directly()
-        // test outfile empty data to hdfs with broker
-        def outfile_to_hdfs_with_broker_url= outfile_to_HDFS_with_broker()
         // test outfile empty data to s3 directly
         def outfile_to_s3_directly_url = outfile_to_S3_directly()
-        qt_select_base1 """ SELECT * FROM ${export_table_name} ORDER BY 
user_id; """ 
+        qt_select_base1 """ SELECT * FROM ${export_table_name} ORDER BY 
user_id; """
 
         qt_select_tvf1 """ select * from HDFS(
                     "uri" = "${outfile_to_hdfs_directly_url}0.csv",
@@ -145,11 +158,15 @@ suite("test_outfile_empty_data", 
"external,hive,tvf,external_docker") {
                     "format" = "${format}");
                     """
 
-        qt_select_tvf2 """ select * from HDFS(
-                    "uri" = "${outfile_to_hdfs_with_broker_url}0.csv",
-                    "hadoop.username" = "${hdfsUserName}",
-                    "format" = "${format}");
-                    """
+        if (has_broker) {
+            // test outfile empty data to hdfs with broker
+            def outfile_to_hdfs_with_broker_url= outfile_to_HDFS_with_broker()
+            qt_select_tvf2 """ select * from HDFS(
+                        "uri" = "${outfile_to_hdfs_with_broker_url}0.csv",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}");
+                        """
+        }
         
         qt_select_tvf3 """ SELECT * FROM S3 (
                 "uri" = 
"http://${bucket}.${s3_endpoint}${outfile_to_s3_directly_url.substring(5 + 
bucket.length(), outfile_to_s3_directly_url.length())}0.csv",
diff --git 
a/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
 
b/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
index ed8ff2161cf..afce6ab8455 100644
--- 
a/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
+++ 
b/regression-test/suites/external_table_p0/broker_load/test_broker_load_func.groovy
@@ -25,6 +25,21 @@ suite("test_broker_load_func", 
"p0,external,hive,external_docker,external_docker
 
             String database_name = "test_broker_load_func"
             String broker_name = "hdfs"
+
+            // check whether the broker named `hdfs` exists. If not, skip this 
case.
+            def brokers = sql """ SHOW BROKER """
+            def has_broker = false
+            for (def row : brokers) {
+                if (row[0] == broker_name) {
+                    has_broker = true
+                    break
+                }
+            }
+            if (!has_broker) {
+                logger.info("broker `${broker_name}` does not exist, skip 
test_broker_load_func.")
+                return
+            }
+
             def uuid = UUID.randomUUID().toString().replaceAll("-", "")
             def test_load_label="label_test_broker_load_func_${uuid}"
             String table_name="simple"
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index 8668e148a63..a29cc711077 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -23,16 +23,6 @@ suite("test_hive_compress_type", 
"p0,external,hive,external_docker,external_dock
         return;
     }
 
-    def backends = sql """show backends"""
-    def backendNum = backends.size()
-    logger.info("get backendNum: ${backendNum}")
-    // `parallel_fragment_exec_instance_num` may be displayed as
-    // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
-    def parallelExecInstanceRows = sql("show variables like 
'%parallel_fragment_exec_instance_num%'")
-    assertTrue(parallelExecInstanceRows.size() > 0)
-    def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as 
String).toInteger()
-    logger.info("get ${parallelExecInstanceRows[0][0]}: 
${parallelExecInstanceNum}")
-
     for (String hivePrefix : ["hive3"]) {
         String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
         String catalog_name = "${hivePrefix}_test_hive_compress_type"
@@ -45,37 +35,6 @@ suite("test_hive_compress_type", 
"p0,external,hive,external_docker,external_dock
         );"""
         sql """use `${catalog_name}`.`multi_catalog`"""
 
-        // table test_compress_partitioned has 6 partitions with different 
compressed file: plain, gzip, bzip2, deflate
-        sql """set file_split_size=0"""
-        // COUNT pushdown split behavior depends on:
-        // totalFileNum < parallel_fragment_exec_instance_num * backendNum
-        // test_compress_partitioned currently has 16 files.
-        def expectedSplitNum = 16
-        if (backendNum > 1) {
-            expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ? 
28 : 16
-        }
-        explain {
-            sql("select count(*) from test_compress_partitioned")
-            contains "inputSplitNum=${expectedSplitNum}, 
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
-            contains "partition=8/8"
-        }
-        qt_q21 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
-        qt_q22 """select count(*) from test_compress_partitioned"""
-        order_qt_q23 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
-
-        sql """set file_split_size=8388608"""
-        explain {
-            sql("select count(*) from test_compress_partitioned")
-            contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
-            contains "partition=8/8"
-        }
-
-        qt_q31 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
-        qt_q32 """select count(*) from test_compress_partitioned"""
-        order_qt_q33 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
-        sql """set file_split_size=0"""
-
-
         order_qt_q42 """ select count(*) from parquet_lz4_compression ;       
"""
         order_qt_q43 """ select * from parquet_lz4_compression 
             order by 
col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
diff --git 
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy 
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
index 4adc5c9e280..c89c7595f78 100644
--- 
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
+++ 
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy
@@ -405,7 +405,7 @@ suite("test_mysql_jdbc_catalog", 
"p0,external,mysql,external_docker,external_doc
         }
         sql """ set enable_ext_func_pred_pushdown = "false"; """
         explain {
-            sql ("select k6, k8 from test1 where nvl(k6, null) = 1 and k8 = 
1;")
+            sql ("select k6, k8 from test1 where nvl(k6, 1) = k6 and k8 = 1;")
 
             contains "QUERY: SELECT `k6`, `k8` FROM `doris_test`.`test1` WHERE 
((`k8` = 1))"
         }
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
new file mode 100644
index 00000000000..3f9e45117ad
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_compress_type_large_data", "p2,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    def backends = sql """show backends"""
+    def backendNum = backends.size()
+    logger.info("get backendNum: ${backendNum}")
+
+    // Set parallel_pipeline_task_num explicitly so 
getParallelExecInstanceNum() returns
+    // a known value, regardless of fuzzy mode randomization or BE core count.
+    def parallelExecInstanceNum = 8
+    sql """set parallel_pipeline_task_num = ${parallelExecInstanceNum}"""
+
+    for (String hivePrefix : ["hive3"]) {
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String catalog_name = 
"${hivePrefix}_test_hive_compress_type_large_data"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+        sql """drop catalog if exists ${catalog_name}"""
+        sql """create catalog if not exists ${catalog_name} properties (
+            "type"="hms",
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+        );"""
+        sql """use `${catalog_name}`.`multi_catalog`"""
+
+        // table test_compress_partitioned has 16 files across 8 partitions 
(734MB total).
+        // With count pushdown, needSplit depends on totalFileNum vs 
parallelNum * backendNum.
+        // When needSplit=false: each file = 1 split = 16 splits.
+        // When needSplit=true and file_split_size=0: splits by dynamic size = 
28 splits.
+        // When needSplit=true and file_split_size=8MB: splits by 8MB = 82 
splits.
+        def needSplit = (backendNum > 1) && (16 < parallelExecInstanceNum * 
backendNum)
+
+        sql """set file_split_size=0"""
+        def expectedSplitNum1 = needSplit ? 28 : 16
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=${expectedSplitNum1}, 
totalFileSize=734675596, scanRanges=${expectedSplitNum1}"
+            contains "partition=8/8"
+        }
+
+        def countMix1 = sql """select count(*) from test_compress_partitioned 
where dt="gzip" or dt="mix""""
+        assertEquals(600005, countMix1[0][0])
+        def countAll1 = sql """select count(*) from 
test_compress_partitioned"""
+        assertEquals(1510010, countAll1[0][0])
+        def countWatchId1 = sql """select count(*) from 
test_compress_partitioned where watchid=4611870011201662970"""
+        assertEquals(15, countWatchId1[0][0])
+
+        sql """set file_split_size=8388608"""
+        def expectedSplitNum2 = needSplit ? 82 : 16
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=${expectedSplitNum2}, 
totalFileSize=734675596, scanRanges=${expectedSplitNum2}"
+            contains "partition=8/8"
+        }
+
+        def countMix2 = sql """select count(*) from test_compress_partitioned 
where dt="gzip" or dt="mix""""
+        assertEquals(600005, countMix2[0][0])
+        def countAll2 = sql """select count(*) from 
test_compress_partitioned"""
+        assertEquals(1510010, countAll2[0][0])
+        def countWatchId2 = sql """select count(*) from 
test_compress_partitioned where watchid=4611870011201662970"""
+        assertEquals(15, countWatchId2[0][0])
+
+        sql """set file_split_size=0"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to