This is an automated email from the ASF dual-hosted git repository.
nicholasjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 6f5ad2dde [MINOR] Refine the log for fetch failure and rpc metrics dump
6f5ad2dde is described below
commit 6f5ad2dde891263d4f84874361a3b957b08ede50
Author: Wang, Fei <[email protected]>
AuthorDate: Mon Mar 10 10:56:53 2025 +0800
[MINOR] Refine the log for fetch failure and rpc metrics dump
### What changes were proposed in this pull request?
Minor change for the log, to make the fetch failure message and rpc metrics
dump much clear.
### Why are the changes needed?
As title.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
GA.
Closes #3136 from turboFei/log_shuffle.
Authored-by: Wang, Fei <[email protected]>
Signed-off-by: SteNicholas <[email protected]>
---
.../main/scala/org/apache/spark/celeborn/ExceptionMakerHelper.scala | 2 +-
.../main/scala/org/apache/celeborn/common/rpc/RpcMetricsTracker.scala | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git
a/client-spark/spark-2/src/main/scala/org/apache/spark/celeborn/ExceptionMakerHelper.scala
b/client-spark/spark-2/src/main/scala/org/apache/spark/celeborn/ExceptionMakerHelper.scala
index 6456f3f44..8e4e511b1 100644
---
a/client-spark/spark-2/src/main/scala/org/apache/spark/celeborn/ExceptionMakerHelper.scala
+++
b/client-spark/spark-2/src/main/scala/org/apache/spark/celeborn/ExceptionMakerHelper.scala
@@ -23,7 +23,7 @@ import org.apache.celeborn.common.util.ExceptionMaker
object ExceptionMakerHelper {
- val FETCH_FAILURE_ERROR_MSG = "Celeborn FetchFailure with shuffle id "
+ val FETCH_FAILURE_ERROR_MSG = "Celeborn FetchFailure with
appShuffleId/shuffleId: "
val SHUFFLE_FETCH_FAILURE_EXCEPTION_MAKER = new ExceptionMaker() {
override def makeFetchFailureException(
diff --git
a/common/src/main/scala/org/apache/celeborn/common/rpc/RpcMetricsTracker.scala
b/common/src/main/scala/org/apache/celeborn/common/rpc/RpcMetricsTracker.scala
index 5774b7a99..f9fffe306 100644
---
a/common/src/main/scala/org/apache/celeborn/common/rpc/RpcMetricsTracker.scala
+++
b/common/src/main/scala/org/apache/celeborn/common/rpc/RpcMetricsTracker.scala
@@ -86,7 +86,7 @@ private[celeborn] class RpcMetricsTracker(
if (slowRpcInterval < 0 || System.currentTimeMillis() - lastLogTime >
slowRpcInterval &&
lastSlowLogTime.compareAndSet(lastLogTime,
System.currentTimeMillis())) {
logWarning(
- s"slow rpc detected: currentQueueSize = ${queueLengthFunc()},
queueTime=$queueTime processTime=$processTime message=$message")
+ s"slow rpc detected: currentQueueSize = ${queueLengthFunc()},
queueTime=$queueTime(ns) processTime=$processTime(ns) message=$message")
}
val lastTime = lastDumpTime.get
@@ -144,7 +144,7 @@ private[celeborn] class RpcMetricsTracker(
return
val builder = new StringBuilder();
- builder.append(s"RPC statistics for $name").append("\n")
+ builder.append(s"RPC statistics for $name (time unit: ns)").append("\n")
builder.append(s"current queue size = ${queueLengthFunc()}").append("\n")
builder.append(s"max queue length = ${maxQueueLength.get()}").append("\n")
histogramMap.entrySet.asScala.foreach(entry => {