This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new a3c7a07afac branch-3.1: [fix](hive) Fix StackOverflowError in insert
overwrite on S3-compatible storage #58504 (#58533)
a3c7a07afac is described below
commit a3c7a07afacadf1a43e5b76bbc6c447151eb3086
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Dec 2 11:06:53 2025 +0800
branch-3.1: [fix](hive) Fix StackOverflowError in insert overwrite on
S3-compatible storage #58504 (#58533)
Cherry-picked from #58504
Co-authored-by: zy-kkk <[email protected]>
---
.../java/org/apache/doris/fs/obj/S3ObjStorage.java | 7 +-
.../hive_on_hms_and_dlf.groovy | 116 ++++++++++++-
.../iceberg_and_hive_on_glue.groovy | 179 +++++++++++++++++++++
3 files changed, 298 insertions(+), 4 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 1ae1f32319f..f09eaffa445 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -211,7 +211,10 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
try {
S3URI s3Uri = S3URI.create(remotePath, isUsePathStyle,
forceParsingByStandardUri);
String bucket = s3Uri.getBucket();
- String prefix = s3Uri.getKey();
+ String key = s3Uri.getKey();
+ String schemeAndBucket = remotePath.substring(0,
remotePath.length() - key.length());
+
+ String prefix = key.endsWith("/") ? key : key + "/";
ListObjectsV2Request.Builder requestBuilder =
ListObjectsV2Request.builder()
.bucket(bucket)
.prefix(prefix)
@@ -226,7 +229,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
ListObjectsV2Response response =
getClient().listObjectsV2(requestBuilder.build());
for (CommonPrefix dir : response.commonPrefixes()) {
- result.add("s3://" + bucket + "/" + dir.prefix());
+ result.add(schemeAndBucket + dir.prefix());
}
continuationToken = response.nextContinuationToken();
} while (continuationToken != null);
diff --git
a/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
b/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
index 6eb71276e85..f0d711c7a8c 100644
---
a/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
+++
b/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
@@ -183,6 +183,103 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
assert dropResult.size() == 0
}
+ /*--------test insert overwrite---------*/
+ def testInsertOverwrite = { String catalogProperties, String prefix,
String dbLocation ->
+ def catalog_name = "${prefix}_catalog"
+ sql """
+ DROP CATALOG IF EXISTS ${catalog_name};
+ """
+ sql """
+ CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+ ${catalogProperties}
+ );
+ """
+ sql """
+ switch ${catalog_name};
+ """
+
+ def db_name = prefix + "_db" + System.currentTimeMillis() +
ThreadLocalRandom.current().nextInt(1000)
+ sql """
+ DROP DATABASE IF EXISTS ${db_name} FORCE;
+ """
+ sql """
+ CREATE DATABASE IF NOT EXISTS ${db_name}
+ PROPERTIES ('location'='${dbLocation}');
+ """
+
+ def dbResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dbResult.size() == 1
+
+ sql """
+ use ${db_name};
+ """
+
+ def table_name = prefix + ThreadLocalRandom.current().nextInt(1000) +
"_overwrite_table"
+
+ // Create non-partitioned table for insert overwrite test
+ sql """
+ CREATE TABLE ${table_name} (
+ id INT COMMENT 'id',
+ name VARCHAR(20) COMMENT 'name',
+ age INT COMMENT 'age'
+ ) ENGINE=hive
+ PROPERTIES (
+ 'file_format'='parquet'
+ );
+ """
+
+ // Test 1: Initial insert
+ sql """
+ insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+ """
+ def result1 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result1[0][0] == 2
+
+ // Test 2: Insert overwrite - should replace all data
+ sql """
+ insert overwrite table ${table_name} values (3, 'charlie', 30);
+ """
+ def result2 = sql """
+ SELECT * FROM ${table_name};
+ """
+ assert result2.size() == 1
+ assert result2[0][0] == 3
+
+ // Test 3: Another insert overwrite with multiple rows
+ sql """
+ insert overwrite table ${table_name} values (4, 'david', 35), (5,
'eve', 28), (6, 'frank', 40);
+ """
+ def result3 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result3[0][0] == 3
+
+ // Test 4: Verify data integrity after overwrite
+ def result4 = sql """
+ SELECT * FROM ${table_name} ORDER BY id;
+ """
+ assert result4.size() == 3
+ assert result4[0][0] == 4
+ assert result4[1][0] == 5
+ assert result4[2][0] == 6
+
+ sql """
+ DROP TABLE ${table_name};
+ """
+ sql """
+ DROP DATABASE ${db_name} FORCE;
+ """
+
+ def dropResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dropResult.size() == 0
+ }
+
/*--------only execute query---------*/
def testQuery = { String catalog_properties, String prefix, String
db_name, String table_name, int data_count ->
@@ -371,6 +468,9 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
//OBS - Partition table tests
db_location = "obs://${obs_parent_path}/hive/hms/partition/" +
System.currentTimeMillis()
testPartitionTableInsert(hms_properties + obs_storage_properties,
"hive_hms_obs_partition_test", db_location)
+ //OBS - Insert overwrite tests (verifies scheme preservation in
listDirectories)
+ db_location = "obs://${obs_parent_path}/hive/hms/overwrite/" +
System.currentTimeMillis()
+ testInsertOverwrite(hms_properties + obs_storage_properties,
"hive_hms_obs_overwrite_test", db_location)
//GCS
if(context.config.otherConfigs.get("enableGCS")){
db_location = "gs://${gcs_parent_path}/hive/hms/" +
System.currentTimeMillis()
@@ -378,8 +478,11 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
testQueryAndInsert(hms_properties + gcs_storage_new_properties,
"hive_hms_gcs_test_new", db_location)
testQueryAndInsert(hms_type_properties + hms_kerberos_old_prop +
gcs_storage_old_properties, "hive_hms_on_gcs_kerberos_old", db_location)
testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop +
gcs_storage_new_properties, "hive_hms_on_gcs_kerberos_new", db_location)
+ //GCS - Insert overwrite tests
+ db_location = "gs://${gcs_parent_path}/hive/hms/overwrite/" +
System.currentTimeMillis()
+ testInsertOverwrite(hms_properties + gcs_storage_new_properties,
"hive_hms_gcs_overwrite_test", db_location)
}
-
+
//COS
db_location = "cosn://${cos_parent_path}/hive/hms/" +
System.currentTimeMillis()
testQueryAndInsert(hms_properties + cos_storage_properties,
"hive_hms_cos_test", db_location)
@@ -390,6 +493,9 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
//COS - Partition table tests
db_location = "cosn://${cos_parent_path}/hive/hms/partition/" +
System.currentTimeMillis()
testPartitionTableInsert(hms_properties + cos_storage_properties,
"hive_hms_cos_partition_test", db_location)
+ //COS - Insert overwrite tests
+ db_location = "cosn://${cos_parent_path}/hive/hms/overwrite/" +
System.currentTimeMillis()
+ testInsertOverwrite(hms_properties + cos_storage_properties,
"hive_hms_cos_overwrite_test", db_location)
db_location = "cos://${cos_parent_path}/hive/hms/" +
System.currentTimeMillis()
testQueryAndInsert(hms_properties + cos_storage_properties,
"hive_hms_cos_test", db_location)
@@ -405,9 +511,12 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
db_location = "oss://${oss_parent_path}/hive/hms/partition/" +
System.currentTimeMillis()
testPartitionTableInsert(hms_properties + oss_storage_properties,
"hive_hms_oss_partition_test", db_location)
testPartitionTableInsert(hms_properties + oss_region_param +
oss_storage_properties, "hive_hms_oss_partition_test_region", db_location)
+ //OSS - Insert overwrite tests
+ db_location = "oss://${oss_parent_path}/hive/hms/overwrite/" +
System.currentTimeMillis()
+ testInsertOverwrite(hms_properties + oss_storage_properties,
"hive_hms_oss_overwrite_test", db_location)
//s3
- db_location = "s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
+ db_location =
"s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
testQueryAndInsert(hms_properties + s3_storage_properties,
"hive_hms_s3_test", db_location)
db_location =
"s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
testQueryAndInsert(hms_properties + s3_region_param +
s3_storage_properties, "hive_hms_s3_test_region", db_location)
@@ -419,6 +528,9 @@ suite("hive_on_hms_and_dlf",
"p2,external,new_catalog_property") {
//testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop +
s3_storage_properties, "hive_hms_on_s3_kerberos_new",db_location)
db_location = "s3://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
testQueryAndInsert(hms_properties + s3_storage_properties,
"hive_hms_s3_test", db_location)
+ //S3 - Insert overwrite tests
+ db_location =
"s3://${s3_parent_path}/hive/hms/overwrite/"+System.currentTimeMillis()
+ testInsertOverwrite(hms_properties + s3_storage_properties,
"hive_hms_s3_overwrite_test", db_location)
//HDFS
db_location = "${hdfs_parent_path}/hive/hms/" + System.currentTimeMillis()
testQueryAndInsert(hms_properties + hdfs_properties, "hive_hms_hdfs_test",
db_location)
diff --git
a/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
b/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
index b1b1718c460..e69fa012617 100644
---
a/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
+++
b/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
@@ -143,6 +143,178 @@ suite("iceberg_and_hive_on_glue",
"p2,external,hive,new_catalog_property") {
assert dropResult.size() == 0
}
+ /*--------test insert overwrite for hive---------*/
+ def testInsertOverwrite = { String catalogProperties, String prefix,
String dbLocation ->
+ def catalog_name = "${prefix}_catalog"
+ sql """
+ DROP CATALOG IF EXISTS ${catalog_name};
+ """
+ sql """
+ CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+ ${catalogProperties}
+ );
+ """
+ sql """
+ switch ${catalog_name};
+ """
+
+ def db_name = prefix + "_db" + System.currentTimeMillis()
+ sql """
+ DROP DATABASE IF EXISTS ${db_name} FORCE;
+ """
+ sql """
+ CREATE DATABASE IF NOT EXISTS ${db_name}
+ PROPERTIES ('location'='${dbLocation}');
+ """
+
+ def dbResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dbResult.size() == 1
+
+ sql """
+ use ${db_name};
+ """
+
+ def table_name = prefix + "_overwrite_table"
+
+ // Create non-partitioned table for insert overwrite test
+ sql """
+ CREATE TABLE ${table_name} (
+ id INT COMMENT 'id',
+ name VARCHAR(20) COMMENT 'name',
+ age INT COMMENT 'age'
+ ) ENGINE=hive
+ PROPERTIES (
+ 'file_format'='parquet'
+ );
+ """
+
+ // Test 1: Initial insert
+ sql """
+ insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+ """
+ def result1 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result1[0][0] == 2
+
+ // Test 2: Insert overwrite - should replace all data
+ sql """
+ insert overwrite table ${table_name} values (3, 'charlie', 30);
+ """
+ def result2 = sql """
+ SELECT * FROM ${table_name};
+ """
+ assert result2.size() == 1
+ assert result2[0][0] == 3
+
+ // Test 3: Another insert overwrite with multiple rows
+ sql """
+ insert overwrite table ${table_name} values (4, 'david', 35), (5,
'eve', 28), (6, 'frank', 40);
+ """
+ def result3 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result3[0][0] == 3
+
+ sql """
+ DROP TABLE ${table_name};
+ """
+ sql """
+ DROP DATABASE ${db_name} FORCE;
+ """
+
+ def dropResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dropResult.size() == 0
+ }
+
+ /*--------test insert overwrite for iceberg---------*/
+ def testInsertOverwriteIceberg = { String catalogProperties, String prefix
->
+ def catalog_name = "${prefix}_catalog"
+ sql """
+ DROP CATALOG IF EXISTS ${catalog_name};
+ """
+ sql """
+ CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+ ${catalogProperties}
+ );
+ """
+ sql """
+ switch ${catalog_name};
+ """
+
+ def db_name = prefix + "_db"
+ sql """
+ DROP DATABASE IF EXISTS ${db_name} FORCE;
+ """
+ sql """
+ CREATE DATABASE IF NOT EXISTS ${db_name};
+ """
+
+ def dbResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dbResult.size() == 1
+
+ sql """
+ use ${db_name};
+ """
+
+ def table_name = prefix + "_overwrite_table"
+
+ // Create table for insert overwrite test
+ sql """
+ CREATE TABLE ${table_name} (
+ id INT NOT NULL COMMENT 'id',
+ name VARCHAR(20) COMMENT 'name',
+ age INT COMMENT 'age'
+ );
+ """
+
+ // Test 1: Initial insert
+ sql """
+ insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+ """
+ def result1 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result1[0][0] == 2
+
+ // Test 2: Insert overwrite - should replace all data
+ sql """
+ insert overwrite table ${table_name} values (3, 'charlie', 30);
+ """
+ def result2 = sql """
+ SELECT * FROM ${table_name};
+ """
+ assert result2.size() == 1
+ assert result2[0][0] == 3
+
+ // Test 3: Another insert overwrite with multiple rows
+ sql """
+ insert overwrite table ${table_name} values (4, 'david', 35), (5,
'eve', 28), (6, 'frank', 40);
+ """
+ def result3 = sql """
+ SELECT COUNT(*) FROM ${table_name};
+ """
+ assert result3[0][0] == 3
+
+ sql """
+ DROP TABLE ${table_name};
+ """
+ sql """
+ DROP DATABASE ${db_name} FORCE;
+ """
+
+ def dropResult = sql """
+ show databases like "${db_name}";
+ """
+ assert dropResult.size() == 0
+ }
+
/*--------only execute query---------*/
def testQuery = { String catalog_properties, String prefix, String
db_name, String table_name, int data_count ->
@@ -223,4 +395,11 @@ suite("iceberg_and_hive_on_glue",
"p2,external,hive,new_catalog_property") {
testQueryAndInsertIcerberg(warehouse_location +
iceberg_glue_catalog_base_properties + glue_properties_1, "iceberg_glue_on_s3")
testQueryAndInsertIcerberg(warehouse_location +
iceberg_glue_catalog_base_properties + glue_properties_2, "iceberg_glue_on_s3")
testQueryAndInsertIcerberg(warehouse_location +
iceberg_glue_catalog_base_properties + glue_properties_3, "iceberg_glue_on_s3")
+
+ // Iceberg - Insert overwrite tests
+ testInsertOverwriteIceberg(warehouse_location +
iceberg_glue_catalog_base_properties + glue_properties_3,
"iceberg_glue_overwrite_on_s3")
+
+ // Hive on Glue - Insert overwrite tests
+ def db_location = "${s3_warehouse}hive-glue-s3-warehouse/hive-overwrite/"
+ System.currentTimeMillis()
+ testInsertOverwrite(hms_glue_catalog_base_properties + glue_properties_3,
"hive_glue_overwrite_on_s3", db_location)
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]