This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 1d838332481 branch-4.0: [fix](hive) Fix StackOverflowError in insert 
overwrite on S3-compatible storage #58504 (#58534)
1d838332481 is described below

commit 1d838332481bf70fd9d8c1d277d54d021bb5a1e7
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Dec 2 09:31:24 2025 +0800

    branch-4.0: [fix](hive) Fix StackOverflowError in insert overwrite on 
S3-compatible storage #58504 (#58534)
    
    Cherry-picked from #58504
    
    Co-authored-by: zy-kkk <[email protected]>
---
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java |   7 +-
 .../hive_on_hms_and_dlf.groovy                     | 116 ++++++++++++-
 .../iceberg_and_hive_on_glue.groovy                | 179 +++++++++++++++++++++
 3 files changed, 298 insertions(+), 4 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 411bcf40c5e..cb499d78765 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -213,7 +213,10 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
         try {
             S3URI s3Uri = S3URI.create(remotePath, isUsePathStyle, 
forceParsingByStandardUri);
             String bucket = s3Uri.getBucket();
-            String prefix = s3Uri.getKey();
+            String key = s3Uri.getKey();
+            String schemeAndBucket = remotePath.substring(0, 
remotePath.length() - key.length());
+
+            String prefix = key.endsWith("/") ? key : key + "/";
             ListObjectsV2Request.Builder requestBuilder = 
ListObjectsV2Request.builder()
                     .bucket(bucket)
                     .prefix(prefix)
@@ -228,7 +231,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
                 ListObjectsV2Response response = 
getClient().listObjectsV2(requestBuilder.build());
 
                 for (CommonPrefix dir : response.commonPrefixes()) {
-                    result.add("s3://" + bucket + "/" + dir.prefix());
+                    result.add(schemeAndBucket + dir.prefix());
                 }
                 continuationToken = response.nextContinuationToken();
             } while (continuationToken != null);
diff --git 
a/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
 
b/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
index 6eb71276e85..f0d711c7a8c 100644
--- 
a/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
+++ 
b/regression-test/suites/external_table_p2/refactor_catalog_param/hive_on_hms_and_dlf.groovy
@@ -183,6 +183,103 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
         assert dropResult.size() == 0
     }
 
+    /*--------test insert overwrite---------*/
+    def testInsertOverwrite = { String catalogProperties, String prefix, 
String dbLocation ->
+        def catalog_name = "${prefix}_catalog"
+        sql """
+            DROP CATALOG IF EXISTS ${catalog_name};
+        """
+        sql """
+            CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+                ${catalogProperties}
+            );
+        """
+        sql """
+            switch ${catalog_name};
+        """
+
+        def db_name = prefix + "_db" + System.currentTimeMillis() + 
ThreadLocalRandom.current().nextInt(1000)
+        sql """
+            DROP DATABASE IF EXISTS ${db_name} FORCE;
+        """
+        sql """
+            CREATE DATABASE IF NOT EXISTS ${db_name}
+            PROPERTIES ('location'='${dbLocation}');
+        """
+
+        def dbResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dbResult.size() == 1
+
+        sql """
+            use ${db_name};
+        """
+
+        def table_name = prefix + ThreadLocalRandom.current().nextInt(1000) + 
"_overwrite_table"
+
+        // Create non-partitioned table for insert overwrite test
+        sql """
+            CREATE TABLE ${table_name} (
+                id INT COMMENT 'id',
+                name VARCHAR(20) COMMENT 'name',
+                age INT COMMENT 'age'
+            ) ENGINE=hive
+            PROPERTIES (
+                'file_format'='parquet'
+            );
+        """
+
+        // Test 1: Initial insert
+        sql """
+            insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+        """
+        def result1 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result1[0][0] == 2
+
+        // Test 2: Insert overwrite - should replace all data
+        sql """
+            insert overwrite table ${table_name} values (3, 'charlie', 30);
+        """
+        def result2 = sql """
+            SELECT * FROM ${table_name};
+        """
+        assert result2.size() == 1
+        assert result2[0][0] == 3
+
+        // Test 3: Another insert overwrite with multiple rows
+        sql """
+            insert overwrite table ${table_name} values (4, 'david', 35), (5, 
'eve', 28), (6, 'frank', 40);
+        """
+        def result3 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result3[0][0] == 3
+
+        // Test 4: Verify data integrity after overwrite
+        def result4 = sql """
+            SELECT * FROM ${table_name} ORDER BY id;
+        """
+        assert result4.size() == 3
+        assert result4[0][0] == 4
+        assert result4[1][0] == 5
+        assert result4[2][0] == 6
+
+        sql """
+            DROP TABLE ${table_name};
+        """
+        sql """
+            DROP DATABASE ${db_name} FORCE;
+        """
+
+        def dropResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dropResult.size() == 0
+    }
+
     /*--------only execute query---------*/
     def testQuery = { String catalog_properties, String prefix, String 
db_name, String table_name, int data_count ->
 
@@ -371,6 +468,9 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
     //OBS - Partition table tests
     db_location = "obs://${obs_parent_path}/hive/hms/partition/" + 
System.currentTimeMillis()
     testPartitionTableInsert(hms_properties + obs_storage_properties, 
"hive_hms_obs_partition_test", db_location)
+    //OBS - Insert overwrite tests (verifies scheme preservation in 
listDirectories)
+    db_location = "obs://${obs_parent_path}/hive/hms/overwrite/" + 
System.currentTimeMillis()
+    testInsertOverwrite(hms_properties + obs_storage_properties, 
"hive_hms_obs_overwrite_test", db_location)
     //GCS
     if(context.config.otherConfigs.get("enableGCS")){
         db_location = "gs://${gcs_parent_path}/hive/hms/" + 
System.currentTimeMillis()
@@ -378,8 +478,11 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
         testQueryAndInsert(hms_properties + gcs_storage_new_properties, 
"hive_hms_gcs_test_new", db_location)
         testQueryAndInsert(hms_type_properties + hms_kerberos_old_prop + 
gcs_storage_old_properties, "hive_hms_on_gcs_kerberos_old", db_location)
         testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop + 
gcs_storage_new_properties, "hive_hms_on_gcs_kerberos_new", db_location)
+        //GCS - Insert overwrite tests
+        db_location = "gs://${gcs_parent_path}/hive/hms/overwrite/" + 
System.currentTimeMillis()
+        testInsertOverwrite(hms_properties + gcs_storage_new_properties, 
"hive_hms_gcs_overwrite_test", db_location)
     }
-       
+
     //COS
     db_location = "cosn://${cos_parent_path}/hive/hms/" + 
System.currentTimeMillis()
     testQueryAndInsert(hms_properties + cos_storage_properties, 
"hive_hms_cos_test", db_location)
@@ -390,6 +493,9 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
     //COS - Partition table tests
     db_location = "cosn://${cos_parent_path}/hive/hms/partition/" + 
System.currentTimeMillis()
     testPartitionTableInsert(hms_properties + cos_storage_properties, 
"hive_hms_cos_partition_test", db_location)
+    //COS - Insert overwrite tests
+    db_location = "cosn://${cos_parent_path}/hive/hms/overwrite/" + 
System.currentTimeMillis()
+    testInsertOverwrite(hms_properties + cos_storage_properties, 
"hive_hms_cos_overwrite_test", db_location)
 
     db_location = "cos://${cos_parent_path}/hive/hms/" + 
System.currentTimeMillis()
     testQueryAndInsert(hms_properties + cos_storage_properties, 
"hive_hms_cos_test", db_location)
@@ -405,9 +511,12 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
     db_location = "oss://${oss_parent_path}/hive/hms/partition/" + 
System.currentTimeMillis()
     testPartitionTableInsert(hms_properties + oss_storage_properties, 
"hive_hms_oss_partition_test", db_location)
     testPartitionTableInsert(hms_properties + oss_region_param + 
oss_storage_properties, "hive_hms_oss_partition_test_region", db_location)
+    //OSS - Insert overwrite tests
+    db_location = "oss://${oss_parent_path}/hive/hms/overwrite/" + 
System.currentTimeMillis()
+    testInsertOverwrite(hms_properties + oss_storage_properties, 
"hive_hms_oss_overwrite_test", db_location)
 
     //s3
-   db_location = "s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
+    db_location = 
"s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
     testQueryAndInsert(hms_properties + s3_storage_properties, 
"hive_hms_s3_test", db_location)
     db_location = 
"s3a://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
     testQueryAndInsert(hms_properties + s3_region_param + 
s3_storage_properties, "hive_hms_s3_test_region", db_location)
@@ -419,6 +528,9 @@ suite("hive_on_hms_and_dlf", 
"p2,external,new_catalog_property") {
     //testQueryAndInsert(hms_type_properties + hms_kerberos_new_prop + 
s3_storage_properties, "hive_hms_on_s3_kerberos_new",db_location)
     db_location = "s3://${s3_parent_path}/hive/hms/"+System.currentTimeMillis()
     testQueryAndInsert(hms_properties + s3_storage_properties, 
"hive_hms_s3_test", db_location)
+    //S3 - Insert overwrite tests
+    db_location = 
"s3://${s3_parent_path}/hive/hms/overwrite/"+System.currentTimeMillis()
+    testInsertOverwrite(hms_properties + s3_storage_properties, 
"hive_hms_s3_overwrite_test", db_location)
     //HDFS
     db_location = "${hdfs_parent_path}/hive/hms/" + System.currentTimeMillis()
     testQueryAndInsert(hms_properties + hdfs_properties, "hive_hms_hdfs_test", 
db_location)
diff --git 
a/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
 
b/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
index b1b1718c460..e69fa012617 100644
--- 
a/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
+++ 
b/regression-test/suites/external_table_p2/refactor_catalog_param/iceberg_and_hive_on_glue.groovy
@@ -143,6 +143,178 @@ suite("iceberg_and_hive_on_glue", 
"p2,external,hive,new_catalog_property") {
         assert dropResult.size() == 0
     }
 
+    /*--------test insert overwrite for hive---------*/
+    def testInsertOverwrite = { String catalogProperties, String prefix, 
String dbLocation ->
+        def catalog_name = "${prefix}_catalog"
+        sql """
+            DROP CATALOG IF EXISTS ${catalog_name};
+        """
+        sql """
+            CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+                ${catalogProperties}
+            );
+        """
+        sql """
+            switch ${catalog_name};
+        """
+
+        def db_name = prefix + "_db" + System.currentTimeMillis()
+        sql """
+            DROP DATABASE IF EXISTS ${db_name} FORCE;
+        """
+        sql """
+            CREATE DATABASE IF NOT EXISTS ${db_name}
+            PROPERTIES ('location'='${dbLocation}');
+        """
+
+        def dbResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dbResult.size() == 1
+
+        sql """
+            use ${db_name};
+        """
+
+        def table_name = prefix + "_overwrite_table"
+
+        // Create non-partitioned table for insert overwrite test
+        sql """
+            CREATE TABLE ${table_name} (
+                id INT COMMENT 'id',
+                name VARCHAR(20) COMMENT 'name',
+                age INT COMMENT 'age'
+            ) ENGINE=hive
+            PROPERTIES (
+                'file_format'='parquet'
+            );
+        """
+
+        // Test 1: Initial insert
+        sql """
+            insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+        """
+        def result1 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result1[0][0] == 2
+
+        // Test 2: Insert overwrite - should replace all data
+        sql """
+            insert overwrite table ${table_name} values (3, 'charlie', 30);
+        """
+        def result2 = sql """
+            SELECT * FROM ${table_name};
+        """
+        assert result2.size() == 1
+        assert result2[0][0] == 3
+
+        // Test 3: Another insert overwrite with multiple rows
+        sql """
+            insert overwrite table ${table_name} values (4, 'david', 35), (5, 
'eve', 28), (6, 'frank', 40);
+        """
+        def result3 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result3[0][0] == 3
+
+        sql """
+            DROP TABLE ${table_name};
+        """
+        sql """
+            DROP DATABASE ${db_name} FORCE;
+        """
+
+        def dropResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dropResult.size() == 0
+    }
+
+    /*--------test insert overwrite for iceberg---------*/
+    def testInsertOverwriteIceberg = { String catalogProperties, String prefix 
->
+        def catalog_name = "${prefix}_catalog"
+        sql """
+            DROP CATALOG IF EXISTS ${catalog_name};
+        """
+        sql """
+            CREATE CATALOG IF NOT EXISTS ${catalog_name} PROPERTIES (
+                ${catalogProperties}
+            );
+        """
+        sql """
+            switch ${catalog_name};
+        """
+
+        def db_name = prefix + "_db"
+        sql """
+            DROP DATABASE IF EXISTS ${db_name} FORCE;
+        """
+        sql """
+            CREATE DATABASE IF NOT EXISTS ${db_name};
+        """
+
+        def dbResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dbResult.size() == 1
+
+        sql """
+            use ${db_name};
+        """
+
+        def table_name = prefix + "_overwrite_table"
+
+        // Create table for insert overwrite test
+        sql """
+            CREATE TABLE ${table_name} (
+                id INT NOT NULL COMMENT 'id',
+                name VARCHAR(20) COMMENT 'name',
+                age INT COMMENT 'age'
+            );
+        """
+
+        // Test 1: Initial insert
+        sql """
+            insert into ${table_name} values (1, 'alice', 20), (2, 'bob', 25);
+        """
+        def result1 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result1[0][0] == 2
+
+        // Test 2: Insert overwrite - should replace all data
+        sql """
+            insert overwrite table ${table_name} values (3, 'charlie', 30);
+        """
+        def result2 = sql """
+            SELECT * FROM ${table_name};
+        """
+        assert result2.size() == 1
+        assert result2[0][0] == 3
+
+        // Test 3: Another insert overwrite with multiple rows
+        sql """
+            insert overwrite table ${table_name} values (4, 'david', 35), (5, 
'eve', 28), (6, 'frank', 40);
+        """
+        def result3 = sql """
+            SELECT COUNT(*) FROM ${table_name};
+        """
+        assert result3[0][0] == 3
+
+        sql """
+            DROP TABLE ${table_name};
+        """
+        sql """
+            DROP DATABASE ${db_name} FORCE;
+        """
+
+        def dropResult = sql """
+            show databases  like "${db_name}";
+        """
+        assert dropResult.size() == 0
+    }
+
     /*--------only execute query---------*/
     def testQuery = { String catalog_properties, String prefix, String 
db_name, String table_name, int data_count ->
 
@@ -223,4 +395,11 @@ suite("iceberg_and_hive_on_glue", 
"p2,external,hive,new_catalog_property") {
     testQueryAndInsertIcerberg(warehouse_location + 
iceberg_glue_catalog_base_properties + glue_properties_1, "iceberg_glue_on_s3")
     testQueryAndInsertIcerberg(warehouse_location + 
iceberg_glue_catalog_base_properties + glue_properties_2, "iceberg_glue_on_s3")
     testQueryAndInsertIcerberg(warehouse_location + 
iceberg_glue_catalog_base_properties + glue_properties_3, "iceberg_glue_on_s3")
+
+    // Iceberg - Insert overwrite tests
+    testInsertOverwriteIceberg(warehouse_location + 
iceberg_glue_catalog_base_properties + glue_properties_3, 
"iceberg_glue_overwrite_on_s3")
+
+    // Hive on Glue - Insert overwrite tests
+    def db_location = "${s3_warehouse}hive-glue-s3-warehouse/hive-overwrite/" 
+ System.currentTimeMillis()
+    testInsertOverwrite(hms_glue_catalog_base_properties + glue_properties_3, 
"hive_glue_overwrite_on_s3", db_location)
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to