This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 04060e1bf94 [fix](regression) split hive compress-type heavy scan into 
p2 (#61073)
04060e1bf94 is described below

commit 04060e1bf946f2baf4a970e6a7f2ced5370f596a
Author: Chenjunwei <[email protected]>
AuthorDate: Thu Mar 12 14:47:00 2026 +0800

    [fix](regression) split hive compress-type heavy scan into p2 (#61073)
    
    ## Summary
    - slim down `test_hive_compress_type` in `external_table_p0` by removing
    heavy `test_compress_partitioned` scans
    - add new `test_hive_compress_type_large_data` in `external_table_p2` to
    cover the moved large-data checks
    
    ## Why
    - reduce externregression p0 runtime for this case:
    `test_hive_compress_type` drops from **~5 min → ~3 sec** (removes 6
    large-table scans + 2 EXPLAINs on ~734 MB `test_compress_partitioned`)
    - keep heavy-data/file-split behavior validation in p2 instead of
    dropping coverage
    
    ## Details
    - p0 keeps lightweight parquet LZ4/LZO compression query checks
    - p2 keeps large-table checks with `file_split_size=0` and
    `file_split_size=8388608` and validates row counts/scan split
    expectations
    
    ## Test
    - `test_hive_compress_type` (p0): measured runtime ~3s after this change
    (was ~5 min)
    - `test_hive_compress_type_large_data` (p2): covers the moved
    large-data/split assertions
---
 .../hive/test_hive_compress_type.groovy            | 41 -----------
 .../hive/test_hive_compress_type_large_data.groovy | 82 ++++++++++++++++++++++
 2 files changed, 82 insertions(+), 41 deletions(-)

diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index 40d5b0a4a73..bd5f4efc28b 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -23,16 +23,6 @@ suite("test_hive_compress_type", "p0,external") {
         return;
     }
 
-    def backends = sql """show backends"""
-    def backendNum = backends.size()
-    logger.info("get backendNum: ${backendNum}")
-    // `parallel_fragment_exec_instance_num` may be displayed as
-    // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
-    def parallelExecInstanceRows = sql("show variables like 
'%parallel_fragment_exec_instance_num%'")
-    assertTrue(parallelExecInstanceRows.size() > 0)
-    def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as 
String).toInteger()
-    logger.info("get ${parallelExecInstanceRows[0][0]}: 
${parallelExecInstanceNum}")
-
     for (String hivePrefix : ["hive3"]) {
         String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
         String catalog_name = "${hivePrefix}_test_hive_compress_type"
@@ -45,37 +35,6 @@ suite("test_hive_compress_type", "p0,external") {
         );"""
         sql """use `${catalog_name}`.`multi_catalog`"""
 
-        // table test_compress_partitioned has 6 partitions with different 
compressed file: plain, gzip, bzip2, deflate
-        sql """set file_split_size=0"""
-        // COUNT pushdown split behavior depends on:
-        // totalFileNum < parallel_fragment_exec_instance_num * backendNum
-        // test_compress_partitioned currently has 16 files.
-        def expectedSplitNum = 16
-        if (backendNum > 1) {
-            expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ? 
28 : 16
-        }
-        explain {
-            sql("select count(*) from test_compress_partitioned")
-            contains "inputSplitNum=${expectedSplitNum}, 
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
-            contains "partition=8/8"
-        }
-        qt_q21 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
-        qt_q22 """select count(*) from test_compress_partitioned"""
-        order_qt_q23 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
-
-        sql """set file_split_size=8388608"""
-        explain {
-            sql("select count(*) from test_compress_partitioned")
-            contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
-            contains "partition=8/8"
-        }
-
-        qt_q31 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
-        qt_q32 """select count(*) from test_compress_partitioned"""
-        order_qt_q33 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
-        sql """set file_split_size=0"""
-
-
         order_qt_q42 """ select count(*) from parquet_lz4_compression ;       
"""
         order_qt_q43 """ select * from parquet_lz4_compression 
             order by 
col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
new file mode 100644
index 00000000000..943b81d30b9
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_compress_type_large_data", "p2,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    def backends = sql """show backends"""
+    def backendNum = backends.size()
+    logger.info("get backendNum: ${backendNum}")
+    // `parallel_fragment_exec_instance_num` may be displayed as
+    // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
+    def parallelExecInstanceRows = sql("show variables like 
'%parallel_fragment_exec_instance_num%'")
+    assertTrue(parallelExecInstanceRows.size() > 0)
+    def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as 
String).toInteger()
+    logger.info("get ${parallelExecInstanceRows[0][0]}: 
${parallelExecInstanceNum}")
+
+    for (String hivePrefix : ["hive3"]) {
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String catalog_name = 
"${hivePrefix}_test_hive_compress_type_large_data"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+        sql """drop catalog if exists ${catalog_name}"""
+        sql """create catalog if not exists ${catalog_name} properties (
+            "type"="hms",
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+        );"""
+        sql """use `${catalog_name}`.`multi_catalog`"""
+
+        // table test_compress_partitioned has mixed compressed files and 
larger data volume.
+        sql """set file_split_size=0"""
+        def expectedSplitNum = 16
+        if (backendNum > 1) {
+            expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ? 
28 : 16
+        }
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=${expectedSplitNum}, 
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
+            contains "partition=8/8"
+        }
+
+        def countMix1 = sql """select count(*) from test_compress_partitioned 
where dt="gzip" or dt="mix""""
+        assertEquals(600005, countMix1[0][0])
+        def countAll1 = sql """select count(*) from 
test_compress_partitioned"""
+        assertEquals(1510010, countAll1[0][0])
+        def countWatchId1 = sql """select count(*) from 
test_compress_partitioned where watchid=4611870011201662970"""
+        assertEquals(15, countWatchId1[0][0])
+
+        sql """set file_split_size=8388608"""
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
+            contains "partition=8/8"
+        }
+
+        def countMix2 = sql """select count(*) from test_compress_partitioned 
where dt="gzip" or dt="mix""""
+        assertEquals(600005, countMix2[0][0])
+        def countAll2 = sql """select count(*) from 
test_compress_partitioned"""
+        assertEquals(1510010, countAll2[0][0])
+        def countWatchId2 = sql """select count(*) from 
test_compress_partitioned where watchid=4611870011201662970"""
+        assertEquals(15, countWatchId2[0][0])
+
+        sql """set file_split_size=0"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to