This is an automated email from the ASF dual-hosted git repository.

ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new b204a2601 [CELEBORN-1755] Update doc to include S3 as one of storage 
layers
b204a2601 is described below

commit b204a26010cf068099822f927584cd24338a8c3b
Author: zhaohehuhu <[email protected]>
AuthorDate: Mon Dec 2 11:00:18 2024 +0800

    [CELEBORN-1755] Update doc to include S3 as one of storage layers
    
    ### What changes were proposed in this pull request?
    
    as titile
    
    ### Why are the changes needed?
    
    The doc fail to mention S3 as one of storage layers
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Closes #2963 from zhaohehuhu/dev-1128.
    
    Authored-by: zhaohehuhu <[email protected]>
    Signed-off-by: mingji <[email protected]>
---
 .../org/apache/celeborn/common/CelebornConf.scala  |  2 +-
 .../apache/celeborn/common/CelebornConfSuite.scala | 23 ++++++++++++++++++++++
 docs/configuration/client.md                       |  2 +-
 docs/configuration/master.md                       |  2 +-
 docs/configuration/worker.md                       |  2 +-
 5 files changed, 27 insertions(+), 4 deletions(-)

diff --git 
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala 
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index c914e404b..32cf2ebfa 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -5544,7 +5544,7 @@ object CelebornConf extends Logging {
       .categories("master", "worker", "client")
       .version("0.3.0")
       .doc(
-        "Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD 
and SSD would be treated as identical.")
+        "Enabled storages. Available options: MEMORY,HDD,SSD,HDFS,S3. Note: 
HDD and SSD would be treated as identical.")
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
       .checkValue(p => p.split(",").map(StorageInfo.validate).reduce(_ && _), 
"")
diff --git 
a/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala 
b/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
index 62d8d42ca..37f225eb7 100644
--- a/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
+++ b/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
@@ -204,6 +204,10 @@ class CelebornConfSuite extends CelebornFunSuite {
 
     conf.set("celeborn.storage.availableTypes", "SSD,HDD")
     assert(!conf.workerBaseDirs.isEmpty)
+
+    conf.set("celeborn.storage.availableTypes", "S3")
+    conf.set("celeborn.storage.s3.dir", "s3a:///xxx")
+    assert(conf.workerBaseDirs.isEmpty)
   }
 
   test("Test commit file threads") {
@@ -214,6 +218,17 @@ class CelebornConfSuite extends CelebornFunSuite {
 
     conf.set("celeborn.storage.availableTypes", "SSD,HDD")
     assert(conf.workerCommitThreads === 32)
+
+    conf.set("celeborn.storage.availableTypes", "S3")
+    conf.set("celeborn.storage.s3.dir", "s3a:///xxx")
+    assert(conf.workerCommitThreads === 32)
+  }
+
+  test("Test commit buffer size") {
+    val conf = new CelebornConf()
+    conf.set("celeborn.storage.availableTypes", "S3")
+    conf.set("celeborn.storage.s3.dir", "s3a:///xxx")
+    assert(conf.workerS3FlusherBufferSize === 6291456)
   }
 
   test("Test available storage types") {
@@ -229,6 +244,9 @@ class CelebornConfSuite extends CelebornFunSuite {
 
     conf.set("celeborn.storage.availableTypes", "HDFS")
     assert(conf.availableStorageTypes == StorageInfo.HDFS_MASK)
+
+    conf.set("celeborn.storage.availableTypes", "S3")
+    assert(conf.availableStorageTypes == StorageInfo.S3_MASK)
   }
 
   test("Test role rpcDispatcherNumThreads") {
@@ -396,6 +414,11 @@ class CelebornConfSuite extends CelebornFunSuite {
 
   test("Test storage policy case 1") {
     val conf = new CelebornConf()
+
+    conf.set("celeborn.worker.storage.storagePolicy.createFilePolicy", 
"MEMORY,S3")
+    val createFilePolicy = conf.workerStoragePolicyCreateFilePolicy
+    assert(List("MEMORY", "S3") == createFilePolicy.get)
+
     conf.set("celeborn.worker.storage.storagePolicy.createFilePolicy", 
"MEMORY,SSD")
     val createFilePolicy1 = conf.workerStoragePolicyCreateFilePolicy
     assert(List("MEMORY", "SSD") == createFilePolicy1.get)
diff --git a/docs/configuration/client.md b/docs/configuration/client.md
index 50f5e64c1..606c216b0 100644
--- a/docs/configuration/client.md
+++ b/docs/configuration/client.md
@@ -128,7 +128,7 @@ license: |
 | celeborn.quota.identity.user-specific.tenant | default | false | Tenant id 
if celeborn.quota.identity.provider is 
org.apache.celeborn.common.identity.DefaultIdentityProvider. | 0.3.0 |  | 
 | celeborn.quota.identity.user-specific.userName | default | false | User name 
if celeborn.quota.identity.provider is 
org.apache.celeborn.common.identity.DefaultIdentityProvider. | 0.3.0 |  | 
 | celeborn.quota.interruptShuffle.enabled | false | false | Whether to enable 
interrupt shuffle when quota exceeds. | 0.6.0 |  | 
-| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. 
| 0.3.0 | celeborn.storage.activeTypes | 
+| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS,S3. Note: HDD and SSD would be treated as 
identical. | 0.3.0 | celeborn.storage.activeTypes | 
 | celeborn.storage.hdfs.dir | &lt;undefined&gt; | false | HDFS base directory 
for Celeborn to store shuffle data. | 0.2.0 |  | 
 | celeborn.storage.s3.access.key | &lt;undefined&gt; | false | S3 access key 
for Celeborn to store shuffle data. | 0.6.0 |  | 
 | celeborn.storage.s3.dir | &lt;undefined&gt; | false | S3 base directory for 
Celeborn to store shuffle data. | 0.6.0 |  | 
diff --git a/docs/configuration/master.md b/docs/configuration/master.md
index bdb9a791a..5368296a7 100644
--- a/docs/configuration/master.md
+++ b/docs/configuration/master.md
@@ -83,7 +83,7 @@ license: |
 | celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | false | Worker 
unavailable info would be cleared when the retention period is expired. Set -1 
to disable the expiration. | 0.3.1 |  | 
 | celeborn.quota.enabled | true | false | When Master side sets to true, the 
master will enable to check the quota via QuotaManager. When Client side sets 
to true, LifecycleManager will request Master side to check whether the current 
user has enough quota before registration of shuffle. Fallback to the default 
shuffle service when Master side checks that there is no enough quota for 
current user. | 0.2.0 |  | 
 | celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | 
Regex to decide which Celeborn configuration properties and environment 
variables in master and worker environments contain sensitive information. When 
this regex matches a property key or value, the value is redacted from the 
logging. | 0.5.0 |  | 
-| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. 
| 0.3.0 | celeborn.storage.activeTypes | 
+| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS,S3. Note: HDD and SSD would be treated as 
identical. | 0.3.0 | celeborn.storage.activeTypes | 
 | celeborn.storage.hdfs.dir | &lt;undefined&gt; | false | HDFS base directory 
for Celeborn to store shuffle data. | 0.2.0 |  | 
 | celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | false | Kerberos 
keytab file path for HDFS storage connection. | 0.3.2 |  | 
 | celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | false | 
Kerberos principal for HDFS storage connection. | 0.3.2 |  | 
diff --git a/docs/configuration/worker.md b/docs/configuration/worker.md
index 97e262718..29fb8457a 100644
--- a/docs/configuration/worker.md
+++ b/docs/configuration/worker.md
@@ -42,7 +42,7 @@ license: |
 | celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | 
Regex to decide which Celeborn configuration properties and environment 
variables in master and worker environments contain sensitive information. When 
this regex matches a property key or value, the value is redacted from the 
logging. | 0.5.0 |  | 
 | celeborn.shuffle.chunk.size | 8m | false | Max chunk size of reducer's 
merged shuffle data. For example, if a reducer's shuffle data is 128M and the 
data will need 16 fetch chunk requests to fetch. | 0.2.0 |  | 
 | celeborn.shuffle.sortPartition.block.compactionFactor | 0.25 | false | 
Combine sorted shuffle blocks such that size of compacted shuffle block does 
not exceed compactionFactor * celeborn.shuffle.chunk.size | 0.4.2 |  | 
-| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. 
| 0.3.0 | celeborn.storage.activeTypes | 
+| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS,S3. Note: HDD and SSD would be treated as 
identical. | 0.3.0 | celeborn.storage.activeTypes | 
 | celeborn.storage.hdfs.dir | &lt;undefined&gt; | false | HDFS base directory 
for Celeborn to store shuffle data. | 0.2.0 |  | 
 | celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | false | Kerberos 
keytab file path for HDFS storage connection. | 0.3.2 |  | 
 | celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | false | 
Kerberos principal for HDFS storage connection. | 0.3.2 |  | 

Reply via email to