This is an automated email from the ASF dual-hosted git repository.
feiwang pushed a commit to branch branch-0.6
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/branch-0.6 by this push:
new d6c850b2e [CELEBORN-1577][FOLLOWUP] Improve check quota message
d6c850b2e is described below
commit d6c850b2e4ec0f5852d435ad0dc9a9fb8fabf766
Author: Xianming Lei <[email protected]>
AuthorDate: Thu Jun 12 11:01:18 2025 -0700
[CELEBORN-1577][FOLLOWUP] Improve check quota message
### What changes were proposed in this pull request?
Improve check quota message.
### Why are the changes needed?
Make check quota message clearer.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing UTs.
Closes #3328 from leixm/follow_CELEBORN-1577.
Authored-by: Xianming Lei <[email protected]>
Signed-off-by: Wang, Fei <[email protected]>
(cherry picked from commit 03f97e6166ec492ea37e14d849cb5f437c919203)
Signed-off-by: Wang, Fei <[email protected]>
---
.../celeborn/client/ApplicationHeartbeater.scala | 3 +-
.../service/deploy/master/quota/QuotaManager.scala | 8 ++--
.../service/deploy/master/quota/QuotaStatus.scala | 6 +--
.../deploy/master/quota/QuotaManagerSuite.scala | 52 ++++++++++++----------
4 files changed, 37 insertions(+), 32 deletions(-)
diff --git
a/client/src/main/scala/org/apache/celeborn/client/ApplicationHeartbeater.scala
b/client/src/main/scala/org/apache/celeborn/client/ApplicationHeartbeater.scala
index d8eef2ded..e91e236b6 100644
---
a/client/src/main/scala/org/apache/celeborn/client/ApplicationHeartbeater.scala
+++
b/client/src/main/scala/org/apache/celeborn/client/ApplicationHeartbeater.scala
@@ -165,7 +165,8 @@ class ApplicationHeartbeater(
private def checkQuotaExceeds(response: CheckQuotaResponse): Unit = {
if (conf.quotaInterruptShuffleEnabled && !response.isAvailable) {
- cancelAllActiveStages(response.reason)
+ cancelAllActiveStages(
+ s"Application interrupted caused by storage quota exceeded, reason:
${response.reason}")
}
}
diff --git
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala
index f9886383c..8da25f0dd 100644
---
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala
+++
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManager.scala
@@ -125,14 +125,14 @@ class QuotaManager(
user: UserIdentifier,
consumption: ResourceConsumption): QuotaStatus = {
val quota = getUserStorageQuota(user)
- checkQuotaSpace(s"$USER_EXHAUSTED user: $user. ", consumption, quota)
+ checkQuotaSpace(s"$USER_EXHAUSTED User: $user. ", consumption, quota)
}
private def checkTenantQuotaSpace(
tenantId: String,
consumption: ResourceConsumption): QuotaStatus = {
val quota = getTenantStorageQuota(tenantId)
- checkQuotaSpace(s"$USER_EXHAUSTED tenant: $tenantId. ", consumption, quota)
+ checkQuotaSpace(s"$TENANT_EXHAUSTED Tenant: $tenantId. ", consumption,
quota)
}
private def checkClusterQuotaSpace(consumption: ResourceConsumption):
QuotaStatus = {
@@ -282,7 +282,7 @@ class QuotaManager(
notExpiredUserConsumptions,
getUserStorageQuota(userIdentifier),
notExpired,
- USER_EXHAUSTED)
+ s"$USER_EXHAUSTED User: $userIdentifier. ")
appConsumptions
}
@@ -305,7 +305,7 @@ class QuotaManager(
notExpiredResourceConsumption,
getTenantStorageQuota(tenantId),
notExpired,
- TENANT_EXHAUSTED)
+ s"$TENANT_EXHAUSTED Tenant: $tenantId. ")
appConsumptions
}
diff --git
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala
index 2d7d23835..306f8d872 100644
---
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala
+++
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/quota/QuotaStatus.scala
@@ -24,9 +24,9 @@ case class QuotaStatus(exceed: Boolean = false, exceedReason:
String = NORMAL)
object QuotaStatus {
val NORMAL: String = ""
val CLUSTER_EXHAUSTED: String =
- "Interrupt application caused by the cluster storage usage reach
threshold."
+ "Cluster storage usage reach threshold."
val TENANT_EXHAUSTED: String =
- "Interrupt application caused by the tenant storage usage reach threshold."
+ "Tenant storage usage reach threshold."
val USER_EXHAUSTED: String =
- "Interrupt or reject application caused by the user storage usage reach
threshold."
+ "User storage usage reach threshold."
}
diff --git
a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala
b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala
index 9988f0c1f..ee1d550b9 100644
---
a/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala
+++
b/master/src/test/scala/org/apache/celeborn/service/deploy/master/quota/QuotaManagerSuite.scala
@@ -154,13 +154,13 @@ class QuotaManagerSuite extends CelebornFunSuite
val exp1 = CheckQuotaResponse(true, "")
val exp2 = CheckQuotaResponse(
false,
- s"Interrupt or reject application caused by the user storage usage reach
threshold. " +
- s"user: `tenant_01`.`Jerry`. " +
+ s"User storage usage reach threshold. " +
+ s"User: `tenant_01`.`Jerry`. " +
s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ")
val exp3 = CheckQuotaResponse(
false,
- s"Interrupt or reject application caused by the user storage usage reach
threshold. " +
- s"user: `tenant_01`.`Jerry`. " +
+ s"User storage usage reach threshold. " +
+ s"User: `tenant_01`.`Jerry`. " +
s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " +
s"DISK_FILE_COUNT(20000) exceeds quota(10000). " +
s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ")
@@ -203,16 +203,16 @@ class QuotaManagerSuite extends CelebornFunSuite
val succeed = CheckQuotaResponse(true, "")
val failed = CheckQuotaResponse(
false,
- s"Interrupt or reject application caused by the user storage usage reach
threshold. " +
- s"user: `tenant_01`.`Jerry`. " +
+ s"User storage usage reach threshold. " +
+ s"User: `tenant_01`.`Jerry`. " +
s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " +
s"DISK_FILE_COUNT(20000) exceeds quota(10000). " +
s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). ")
assert(res1 == failed)
assert(res2 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
- "Used: " +
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`Jerry`. Used: " +
"ResourceConsumption(" +
"diskBytesWritten: 150.0 GiB, " +
"diskFileCount: 15000, " +
@@ -236,7 +236,7 @@ class QuotaManagerSuite extends CelebornFunSuite
assert(res1 == failed)
assert(res2 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
+ "User storage usage reach threshold. User: `tenant_01`.`Jerry`. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 150.0 GiB, " +
"diskFileCount: 15000, " +
@@ -249,7 +249,7 @@ class QuotaManagerSuite extends CelebornFunSuite
"hdfsFileCount=9223372036854775807]"))
assert(res3 == CheckQuotaResponse(
false,
- "Interrupt application caused by the cluster storage usage reach
threshold. " +
+ "Cluster storage usage reach threshold. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 50.0 GiB, " +
"diskFileCount: 5000, " +
@@ -293,11 +293,11 @@ class QuotaManagerSuite extends CelebornFunSuite
assert(res1 == CheckQuotaResponse(
false,
- "Interrupt application caused by the cluster storage usage reach
threshold. " +
+ "Cluster storage usage reach threshold. " +
"DISK_BYTES_WRITTEN(50.0 GiB) exceeds quota(20.0 GiB). "))
assert(res2 == CheckQuotaResponse(
false,
- "Interrupt application caused by the cluster storage usage reach
threshold. " +
+ "Cluster storage usage reach threshold. " +
"Used: " +
"ResourceConsumption(" +
"diskBytesWritten: 40.0 GiB, " +
@@ -502,14 +502,15 @@ class QuotaManagerSuite extends CelebornFunSuite
val res4 = quotaManager1.checkApplicationQuotaStatus("app3")
assert(res1 == CheckQuotaResponse(
false,
- s"Interrupt or reject application caused by the user storage usage reach
threshold. " +
- s"user: `tenant_01`.`Jerry`. " +
+ s"User storage usage reach threshold. " +
+ s"User: `tenant_01`.`Jerry`. " +
s"DISK_BYTES_WRITTEN(200.0 GiB) exceeds quota(100.0 GiB). " +
s"DISK_FILE_COUNT(20000) exceeds quota(10000). " +
s"HDFS_BYTES_WRITTEN(30.0 GiB) exceeds quota(10.0 GiB). "))
assert(res2 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`Jerry`. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 150.0 GiB, " +
"diskFileCount: 15000, " +
@@ -524,8 +525,8 @@ class QuotaManagerSuite extends CelebornFunSuite
assert(res3 == CheckQuotaResponse(true, ""))
assert(res4 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
- "Used: " +
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`John`. Used: " +
"ResourceConsumption(" +
"diskBytesWritten: 80.0 GiB, " +
"diskFileCount: 0, " +
@@ -619,15 +620,16 @@ class QuotaManagerSuite extends CelebornFunSuite
assert(res1 == CheckQuotaResponse(
false,
"" +
- "Interrupt or reject application caused by the user storage usage
reach threshold. " +
- "user: `tenant_01`.`Jerry`. DISK_BYTES_WRITTEN(230.0 GiB) exceeds
quota(100.0 GiB). "))
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`Jerry`. DISK_BYTES_WRITTEN(230.0 GiB) exceeds
quota(100.0 GiB). "))
assert(res2 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
- "user: `tenant_01`.`John`. DISK_BYTES_WRITTEN(220.0 GiB) exceeds
quota(100.0 GiB). "))
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`John`. DISK_BYTES_WRITTEN(220.0 GiB) exceeds
quota(100.0 GiB). "))
assert(res3 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`Jerry`. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 150.0 GiB, " +
"diskFileCount: 0, " +
@@ -640,7 +642,8 @@ class QuotaManagerSuite extends CelebornFunSuite
"hdfsFileCount=9223372036854775807]"))
assert(res4 == CheckQuotaResponse(
false,
- "Interrupt application caused by the tenant storage usage reach
threshold. " +
+ "Tenant storage usage reach threshold. " +
+ "Tenant: tenant_01. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 80.0 GiB, " +
"diskFileCount: 0, " +
@@ -653,7 +656,8 @@ class QuotaManagerSuite extends CelebornFunSuite
"hdfsFileCount=9223372036854775807]"))
assert(res5 == CheckQuotaResponse(
false,
- "Interrupt or reject application caused by the user storage usage reach
threshold. " +
+ "User storage usage reach threshold. " +
+ "User: `tenant_01`.`John`. " +
"Used: ResourceConsumption(" +
"diskBytesWritten: 150.0 GiB, " +
"diskFileCount: 0, " +