This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 04060e1bf94 [fix](regression) split hive compress-type heavy scan into
p2 (#61073)
04060e1bf94 is described below
commit 04060e1bf946f2baf4a970e6a7f2ced5370f596a
Author: Chenjunwei <[email protected]>
AuthorDate: Thu Mar 12 14:47:00 2026 +0800
[fix](regression) split hive compress-type heavy scan into p2 (#61073)
## Summary
- slim down `test_hive_compress_type` in `external_table_p0` by removing
heavy `test_compress_partitioned` scans
- add new `test_hive_compress_type_large_data` in `external_table_p2` to
cover the moved large-data checks
## Why
- reduce externregression p0 runtime for this case:
`test_hive_compress_type` drops from **~5 min → ~3 sec** (removes 6
large-table scans + 2 EXPLAINs on ~734 MB `test_compress_partitioned`)
- keep heavy-data/file-split behavior validation in p2 instead of
dropping coverage
## Details
- p0 keeps lightweight parquet LZ4/LZO compression query checks
- p2 keeps large-table checks with `file_split_size=0` and
`file_split_size=8388608` and validates row counts/scan split
expectations
## Test
- `test_hive_compress_type` (p0): measured runtime ~3s after this change
(was ~5 min)
- `test_hive_compress_type_large_data` (p2): covers the moved
large-data/split assertions
---
.../hive/test_hive_compress_type.groovy | 41 -----------
.../hive/test_hive_compress_type_large_data.groovy | 82 ++++++++++++++++++++++
2 files changed, 82 insertions(+), 41 deletions(-)
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index 40d5b0a4a73..bd5f4efc28b 100644
---
a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -23,16 +23,6 @@ suite("test_hive_compress_type", "p0,external") {
return;
}
- def backends = sql """show backends"""
- def backendNum = backends.size()
- logger.info("get backendNum: ${backendNum}")
- // `parallel_fragment_exec_instance_num` may be displayed as
- // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
- def parallelExecInstanceRows = sql("show variables like
'%parallel_fragment_exec_instance_num%'")
- assertTrue(parallelExecInstanceRows.size() > 0)
- def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as
String).toInteger()
- logger.info("get ${parallelExecInstanceRows[0][0]}:
${parallelExecInstanceNum}")
-
for (String hivePrefix : ["hive3"]) {
String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
String catalog_name = "${hivePrefix}_test_hive_compress_type"
@@ -45,37 +35,6 @@ suite("test_hive_compress_type", "p0,external") {
);"""
sql """use `${catalog_name}`.`multi_catalog`"""
- // table test_compress_partitioned has 6 partitions with different
compressed file: plain, gzip, bzip2, deflate
- sql """set file_split_size=0"""
- // COUNT pushdown split behavior depends on:
- // totalFileNum < parallel_fragment_exec_instance_num * backendNum
- // test_compress_partitioned currently has 16 files.
- def expectedSplitNum = 16
- if (backendNum > 1) {
- expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ?
28 : 16
- }
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=${expectedSplitNum},
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
- contains "partition=8/8"
- }
- qt_q21 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q22 """select count(*) from test_compress_partitioned"""
- order_qt_q23 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
-
- sql """set file_split_size=8388608"""
- explain {
- sql("select count(*) from test_compress_partitioned")
- contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
- contains "partition=8/8"
- }
-
- qt_q31 """select count(*) from test_compress_partitioned where
dt="gzip" or dt="mix""""
- qt_q32 """select count(*) from test_compress_partitioned"""
- order_qt_q33 """select * from test_compress_partitioned where
watchid=4611870011201662970"""
- sql """set file_split_size=0"""
-
-
order_qt_q42 """ select count(*) from parquet_lz4_compression ;
"""
order_qt_q43 """ select * from parquet_lz4_compression
order by
col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
new file mode 100644
index 00000000000..943b81d30b9
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_compress_type_large_data.groovy
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_compress_type_large_data", "p2,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ def backends = sql """show backends"""
+ def backendNum = backends.size()
+ logger.info("get backendNum: ${backendNum}")
+ // `parallel_fragment_exec_instance_num` may be displayed as
+ // `deprecated_parallel_fragment_exec_instance_num` in newer branches.
+ def parallelExecInstanceRows = sql("show variables like
'%parallel_fragment_exec_instance_num%'")
+ assertTrue(parallelExecInstanceRows.size() > 0)
+ def parallelExecInstanceNum = (parallelExecInstanceRows[0][1] as
String).toInteger()
+ logger.info("get ${parallelExecInstanceRows[0][0]}:
${parallelExecInstanceNum}")
+
+ for (String hivePrefix : ["hive3"]) {
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String catalog_name =
"${hivePrefix}_test_hive_compress_type_large_data"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+ sql """use `${catalog_name}`.`multi_catalog`"""
+
+ // table test_compress_partitioned has mixed compressed files and
larger data volume.
+ sql """set file_split_size=0"""
+ def expectedSplitNum = 16
+ if (backendNum > 1) {
+ expectedSplitNum = (16 < parallelExecInstanceNum * backendNum) ?
28 : 16
+ }
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=${expectedSplitNum},
totalFileSize=734675596, scanRanges=${expectedSplitNum}"
+ contains "partition=8/8"
+ }
+
+ def countMix1 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix1[0][0])
+ def countAll1 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll1[0][0])
+ def countWatchId1 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId1[0][0])
+
+ sql """set file_split_size=8388608"""
+ explain {
+ sql("select count(*) from test_compress_partitioned")
+ contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
+ contains "partition=8/8"
+ }
+
+ def countMix2 = sql """select count(*) from test_compress_partitioned
where dt="gzip" or dt="mix""""
+ assertEquals(600005, countMix2[0][0])
+ def countAll2 = sql """select count(*) from
test_compress_partitioned"""
+ assertEquals(1510010, countAll2[0][0])
+ def countWatchId2 = sql """select count(*) from
test_compress_partitioned where watchid=4611870011201662970"""
+ assertEquals(15, countWatchId2[0][0])
+
+ sql """set file_split_size=0"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]