This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new cfc067c7e58 branch-4.1: [fix](regression) split hive compress-type
heavy scan into p2 #61073 (#61763)
cfc067c7e58 is described below
commit cfc067c7e5858a9fbd80eb4542d2cba4521902ee
Author: Socrates <[email protected]>
AuthorDate: Thu Mar 26 18:44:39 2026 +0800
branch-4.1: [fix](regression) split hive compress-type heavy scan into p2
#61073 (#61763)
Cherry-pick #61073 to branch-4.1
### What problem does this PR solve?
- Related PR: #61073
Split the heavy Hive compress-type regression scan out of the p0 suite
into p2 to keep the p0 case lighter while preserving the large-data
coverage in a dedicated p2 case.
### Cherry-pick commit
- `04060e1bf94` - [fix](regression) split hive compress-type heavy scan
into p2 (#61073)
Co-authored-by: Chenjunwei <[email protected]>
---
.../hive/test_hive_compress_type.groovy | 41 -----------
.../hive/test_hive_compress_type_large_data.groovy | 82 ++++++++++++++++++++++
2 files changed, 82 insertions(+), 41 deletions(-)
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index 8668e148a63..a29cc711077 100644
---
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -23,16 +23,6 @@ suite("test_hive_compress_type",
"p0,external,hive,external_docker,external_dock
return;
}
- def backends = sql """show backends"""
- def backendNum = backends.size()
- logger.info("get backendNum: ${backendNum}")
- // `parallel_fragment_exec_instance_num` may be displayed as
- // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
- def parallelExecInstanceRows = sql("show variables like
'%parallel_fragment_exec_instance_num%'")
- assertTrue(parallelExecInstanceRows.size() > 0)
- def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as
String).toInteger()
- logger.info("get ${parallelExecInstanceRows[0][0]}:
${parallelExecInstanceNum}")
-
for (String hivePrefix : ["hive3"]) {
String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
String catalog_name = "${hivePrefix}_test_hive_compress_type"
@@ -45,37 +35,6 @@ suite("test_hive_compress_type",
"p0,external,hive,external_docker,external_dock
);"""
sql """use `${catalog_name}`.`multi_catalog`"""
- // table test_compress_partitioned has 6 partitions with different
compressed file: plain, gzip, bzip2, deflate
- sql """set file_split_size=0"""
- // COUNT pushdown split behavior depends on:
- // totalFileNum < parallel_fragment_exec_instance_num * backendNum
- // test_compress_partitioned currently has 16 files.
- def expectedSplitNum = 16
- if (backendNum > 1) {
- expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ?
28 : 16
- }
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=${expectedSplitNum},
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
- contains "partition=8/8"
- }
- qt_q21 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q22 """select count(*) from test_compress_partitioned"""
- order_qt_q23 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
-
- sql """set file_split_size=8388608"""
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
- contains "partition=8/8"
- }
-
- qt_q31 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q32 """select count(*) from test_compress_partitioned"""
- order_qt_q33 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
- sql """set file_split_size=0"""
-
-
order_qt_q42 """ select count(*) from parquet_lz4_compression ;
"""
order_qt_q43 """ select * from parquet_lz4_compression
order by
col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
new file mode 100644
index 00000000000..943b81d30b9
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_compress_type_large_data", "p2,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ def backends = sql """show backends"""
+ def backendNum = backends.size()
+ logger.info("get backendNum: ${backendNum}")
+ // `parallel_fragment_exec_instance_num` may be displayed as
+ // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
+ def parallelExecInstanceRows = sql("show variables like
'%parallel_fragment_exec_instance_num%'")
+ assertTrue(parallelExecInstanceRows.size() > 0)
+ def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as
String).toInteger()
+ logger.info("get ${parallelExecInstanceRows[0][0]}:
${parallelExecInstanceNum}")
+
+ for (String hivePrefix : ["hive3"]) {
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name =
"${hivePrefix}_test_hive_compress_type_large_data"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+ sql """use `${catalog_name}`.`multi_catalog`"""
+
+ // table test_compress_partitioned has mixed compressed files and
larger data volume.
+ sql """set file_split_size=0"""
+ def expectedSplitNum = 16
+ if (backendNum > 1) {
+ expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ?
28 : 16
+ }
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=${expectedSplitNum},
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
+ contains "partition=8/8"
+ }
+
+ def countMix1 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix1[0][0])
+ def countAll1 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll1[0][0])
+ def countWatchId1 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId1[0][0])
+
+ sql """set file_split_size=8388608"""
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
+ contains "partition=8/8"
+ }
+
+ def countMix2 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix2[0][0])
+ def countAll2 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll2[0][0])
+ def countWatchId2 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId2[0][0])
+
+ sql """set file_split_size=0"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]