This is an automated email from the ASF dual-hosted git repository.
ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 464a3842e [CELEBORN-1899] Fix configuration bug in shuffle s3
464a3842e is described below
commit 464a3842e32696a816688f2e5a85a608c1a717eb
Author: veli.yang <[email protected]>
AuthorDate: Wed Mar 12 10:40:52 2025 +0800
[CELEBORN-1899] Fix configuration bug in shuffle s3
### What changes were proposed in this pull request?
close [issues-3145](https://github.com/apache/celeborn/issues/3145)
### Why are the changes needed?
1. Support s3 schema.
2. Fixed the problem that diskFileInfo judged the S3 type incorrectly.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Deployment integration testing has been completed in the local environment.
Closes #3146 from shouwangyw/bugfix/resolve_bugs_3145.
Authored-by: veli.yang <[email protected]>
Signed-off-by: mingji <[email protected]>
---
.../scala/org/apache/celeborn/common/util/CelebornHadoopUtils.scala | 1 +
common/src/main/scala/org/apache/celeborn/common/util/Utils.scala | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git
a/common/src/main/scala/org/apache/celeborn/common/util/CelebornHadoopUtils.scala
b/common/src/main/scala/org/apache/celeborn/common/util/CelebornHadoopUtils.scala
index b703da07c..abd80a081 100644
---
a/common/src/main/scala/org/apache/celeborn/common/util/CelebornHadoopUtils.scala
+++
b/common/src/main/scala/org/apache/celeborn/common/util/CelebornHadoopUtils.scala
@@ -54,6 +54,7 @@ object CelebornHadoopUtils extends Logging {
"S3 storage is enabled but s3AccessKey, s3SecretKey, or
s3EndpointRegion is not set")
}
hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
+ hadoopConf.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hadoopConf.set(
"fs.s3a.aws.credentials.provider",
"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
diff --git a/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
b/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
index aa6f747e6..2de6a13ec 100644
--- a/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
@@ -1216,7 +1216,7 @@ object Utils extends Logging {
val SORTED_SUFFIX = ".sorted"
val INDEX_SUFFIX = ".index"
val SUFFIX_HDFS_WRITE_SUCCESS = ".success"
- val COMPATIBLE_HDFS_REGEX = "^(?!s3a://)[a-zA-Z0-9]+://.*"
+ val COMPATIBLE_HDFS_REGEX = "^(?!s3://)(?!s3a://)[a-zA-Z0-9]+://.*"
val S3_REGEX = "^s3[a]?://([a-z0-9][a-z0-9-]{1,61}[a-z0-9])(/.*)?$"
val UNKNOWN_APP_SHUFFLE_ID = -1