This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 318050144 [#1267] improvement(client): throw the detailed stacktrace
when exceptions happened (#1411)
318050144 is described below
commit 318050144b62c0256c151a278bb04ac73c6e544b
Author: RickyMa <[email protected]>
AuthorDate: Mon Jan 15 10:30:49 2024 +0800
[#1267] improvement(client): throw the detailed stacktrace when exceptions
happened (#1411)
### What changes were proposed in this pull request?
Save the previous exception in advance to prevent it from being lost during
the next retry.
### Why are the changes needed?
This is the follow up pr of
[#1344](https://github.com/apache/incubator-uniffle/pull/1344)
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing UTs.
---
.../apache/spark/shuffle/DelegationRssShuffleManager.java | 5 +----
.../java/org/apache/uniffle/common/util/RetryUtils.java | 14 ++++++++++++--
.../uniffle/client/impl/grpc/ShuffleServerGrpcClient.java | 2 +-
.../client/impl/grpc/ShuffleServerGrpcNettyClient.java | 2 +-
4 files changed, 15 insertions(+), 8 deletions(-)
diff --git
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
index cd44694eb..c99f9b057 100644
---
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
+++
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
@@ -164,10 +164,7 @@ public class DelegationRssShuffleManager implements
ShuffleManager {
return canAccess;
} catch (Throwable e) {
LOG.warn(
- "Fail to access cluster {} using {} for {}",
- coordinatorClient.getDesc(),
- accessId,
- e.getMessage());
+ "Fail to access cluster {} using {} for ",
coordinatorClient.getDesc(), accessId, e);
}
}
diff --git
a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
index d3134b48e..961fb4fcf 100644
--- a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
+++ b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
@@ -82,8 +82,18 @@ public class RetryUtils {
} catch (Throwable t) {
retry++;
if (isRetryFunc.apply(t) && retry < retryTimes) {
- LOG.info("Retry due to Throwable, " + t.getClass().getName() + " " +
t.getMessage());
- LOG.info("Waiting " + intervalMs + " milliseconds before next
connection attempt.");
+ if (LOG.isDebugEnabled()) {
+ LOG.error("Retry due to Throwable ", t);
+ } else {
+ LOG.error(
+ "Retry due to Throwable {}. Use DEBUG level to see the full
stack: {}",
+ t.getClass().getName(),
+ t.getMessage());
+ }
+ LOG.error(
+ "Will retry {} more time(s) after waiting {} milliseconds.",
+ retryTimes - retry,
+ intervalMs);
Thread.sleep(intervalMs);
if (callBack != null) {
callBack.execute();
diff --git
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
index a235d31b1..cd6f43e41 100644
---
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
+++
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
@@ -464,7 +464,7 @@ public class ShuffleServerGrpcClient extends GrpcClient
implements ShuffleServer
maxRetryAttempts,
t -> !(t instanceof OutOfMemoryError));
} catch (Throwable throwable) {
- LOG.warn(throwable.getMessage());
+ LOG.warn("Failed to send shuffle data due to ", throwable);
isSuccessful = false;
break;
}
diff --git
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
index 3a98b40e0..881934ac4 100644
---
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
+++
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
@@ -154,7 +154,7 @@ public class ShuffleServerGrpcNettyClient extends
ShuffleServerGrpcClient {
maxRetryAttempts,
t -> !(t instanceof OutOfMemoryError));
} catch (Throwable throwable) {
- LOG.warn(throwable.getMessage());
+ LOG.warn("Failed to send shuffle data due to ", throwable);
isSuccessful = false;
break;
}