This is an automated email from the ASF dual-hosted git repository.

Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 44de6fd7a93 [fix](regression) Use in-network MinIO endpoint for paimon 
JDBC seed (#64113)
44de6fd7a93 is described below

commit 44de6fd7a930ff162227bd6777ac4ddb8f12b779
Author: Chenjunwei <[email protected]>
AuthorDate: Thu Jul 2 10:35:13 2026 +0800

    [fix](regression) Use in-network MinIO endpoint for paimon JDBC seed 
(#64113)
    
    ### What problem does this PR solve?
    
    Issue Number: None
    
    Related PR: #61932
    
    Problem Summary:
    The master external regression suite can hang in
    `test_paimon_jdbc_catalog` while running `docker exec ... spark-sql`.
    
    Root cause:
    - the Spark seed path used `http://${externalEnvIp}:${minioPort}` as the
    MinIO endpoint
    - that host-mapped endpoint may be reachable from the host but not from
    inside the `spark-iceberg` container
    - Spark can then block in repeated S3A metadata retries
    - the local `ProcessBuilder` helper waited without consuming
    stdout/stderr or enforcing a timeout, so the suite could stay stuck
    until the CI job timeout
    
    This change makes only the Spark seed command use the paired MinIO
    container endpoint when it exists in the Docker network. The Doris
    catalog configuration still uses the configured external endpoint. The
    command helper now consumes stdout/stderr and applies bounded timeouts
    so future command failures surface instead of silently hanging.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test: Manual test
    - Source guard for the Paimon JDBC seed command failed before the change
    and passed after the change
    - `git diff HEAD~1..HEAD --check --
    
regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy`
    - Attempted `timeout 180 ./run-regression-test.sh --run -d
    external_table_p0/paimon -s test_paimon_jdbc_catalog`; it timed out
    during regression framework shade/package before running the suite
    - Behavior changed: Yes (the regression Spark seed step uses the
    Docker-network MinIO endpoint when the paired MinIO container exists;
    Doris catalog access still uses the configured external endpoint)
    - Does this need documentation: No
---
 .../paimon/test_paimon_jdbc_catalog.groovy         | 50 +++++++++++++++-------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
 
b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
index 82d8d5b0dfa..3974051f9f6 100644
--- 
a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
@@ -66,38 +66,58 @@ suite("test_paimon_jdbc_catalog", "p0,external") {
 
     assertTrue(jdbcDriversDir != null && !jdbcDriversDir.isEmpty(), 
"jdbc_drivers_dir must be configured")
 
-    def executeCommand = { String cmd, Boolean mustSuc ->
+    def executeCommand = { String cmd, Boolean mustSuc, int timeoutSeconds = 
300 ->
+        StringBuilder stdout = new StringBuilder()
+        StringBuilder stderr = new StringBuilder()
         try {
             logger.info("execute ${cmd}")
-            def proc = new ProcessBuilder("/bin/bash", "-c", 
cmd).redirectErrorStream(true).start()
-            int exitcode = proc.waitFor()
-            String output = proc.text
+            def proc = new ProcessBuilder("/bin/bash", "-c", cmd).start()
+            proc.consumeProcessOutput(stdout, stderr)
+            proc.waitForOrKill(timeoutSeconds * 1000)
+            int exitcode = proc.exitValue()
+            String output = stdout.toString()
+            String error = stderr.toString()
             if (exitcode != 0) {
-                logger.info("exit code: ${exitcode}, output\n: ${output}")
+                logger.info("exit code: ${exitcode}, stdout\n: 
${output}\nstderr\n: ${error}")
                 if (mustSuc) {
-                    assertTrue(false, "Execute failed: ${cmd}")
+                    assertTrue(false, "Execute failed: 
${cmd}\nstdout:\n${output}\nstderr:\n${error}")
                 }
             }
             return output
         } catch (IOException e) {
-            assertTrue(false, "Execute timeout: ${cmd}")
+            assertTrue(false, "Execute failed: ${cmd}, err: ${e.message}")
         }
     }
 
-    executeCommand("mkdir -p ${localDriverDir}", false)
-    executeCommand("mkdir -p ${jdbcDriversDir}", true)
+    executeCommand("mkdir -p ${localDriverDir}", false, 60)
+    executeCommand("mkdir -p ${jdbcDriversDir}", true, 60)
     if (!new File(localDriverPath).exists()) {
-        executeCommand("/usr/bin/curl --max-time 600 ${driverDownloadUrl} 
--output ${localDriverPath}", true)
+        executeCommand("/usr/bin/curl --max-time 600 ${driverDownloadUrl} 
--output ${localDriverPath}", true, 660)
     }
-    executeCommand("cp -f ${localDriverPath} ${jdbcDriversDir}/${driverName}", 
true)
+    executeCommand("cp -f ${localDriverPath} ${jdbcDriversDir}/${driverName}", 
true, 60)
 
-    String sparkContainerName = executeCommand("docker ps --filter 
name=spark-iceberg --format {{.Names}}", false)
+    String sparkContainerName = executeCommand("docker ps --filter 
name=spark-iceberg --format {{.Names}}", false, 30)
             ?.trim()
     if (sparkContainerName == null || sparkContainerName.isEmpty()) {
         logger.info("spark-iceberg container not found, skip this test")
         return
     }
-    executeCommand("docker cp ${localDriverPath} 
${sparkContainerName}:${sparkDriverPath}", true)
+    executeCommand("docker cp ${localDriverPath} 
${sparkContainerName}:${sparkDriverPath}", true, 60)
+
+    String sparkMinioEndpoint = "http://${externalEnvIp}:${minioPort}";
+    if (sparkContainerName.contains("spark-iceberg")) {
+        String sparkMinioContainerName = 
sparkContainerName.replaceFirst("spark-iceberg", "minio")
+        String resolvedSparkMinioContainer = executeCommand(
+                "docker ps --filter name=${sparkMinioContainerName} --format 
{{.Names}}",
+                false,
+                30
+        )?.trim()
+        if (resolvedSparkMinioContainer == sparkMinioContainerName) {
+            // Spark runs inside the docker network and may not be able to 
reach the host-mapped MinIO port.
+            sparkMinioEndpoint = "http://${resolvedSparkMinioContainer}:9000";
+        }
+    }
+    logger.info("spark seed minio endpoint: ${sparkMinioEndpoint}")
 
     def sparkPaimonJdbc = { String sqlText ->
         String escapedSql = sqlText.replaceAll('"', '\\\\"')
@@ -115,13 +135,13 @@ suite("test_paimon_jdbc_catalog", "p0,external") {
 --conf spark.sql.catalog.${sparkSeedCatalogName}.jdbc.user=postgres \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.jdbc.password=123456 \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.lock.enabled=false \
---conf 
spark.sql.catalog.${sparkSeedCatalogName}.s3.endpoint=http://${externalEnvIp}:${minioPort}
 \
+--conf 
spark.sql.catalog.${sparkSeedCatalogName}.s3.endpoint=${sparkMinioEndpoint} \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.s3.access-key=${minioAk} \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.s3.secret-key=${minioSk} \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.s3.region=us-east-1 \
 --conf spark.sql.catalog.${sparkSeedCatalogName}.s3.path.style.access=true \
 -e "${escapedSql}" """
-        executeCommand(command, true)
+        executeCommand(command, true, 300)
     }
 
     def assertSystemTableReadable = { String tableExpr, List<String> 
expectedColumns = [], Integer minCount = null ->


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to