This is an automated email from the ASF dual-hosted git repository.

zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new 318050144 [#1267] improvement(client): throw the detailed stacktrace 
when exceptions happened (#1411)
318050144 is described below

commit 318050144b62c0256c151a278bb04ac73c6e544b
Author: RickyMa <[email protected]>
AuthorDate: Mon Jan 15 10:30:49 2024 +0800

    [#1267] improvement(client): throw the detailed stacktrace when exceptions 
happened (#1411)
    
    ### What changes were proposed in this pull request?
    
    Save the previous exception in advance to prevent it from being lost during 
the next retry.
    
    ### Why are the changes needed?
    
    This is the follow up pr of  
[#1344](https://github.com/apache/incubator-uniffle/pull/1344)
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing UTs.
---
 .../apache/spark/shuffle/DelegationRssShuffleManager.java  |  5 +----
 .../java/org/apache/uniffle/common/util/RetryUtils.java    | 14 ++++++++++++--
 .../uniffle/client/impl/grpc/ShuffleServerGrpcClient.java  |  2 +-
 .../client/impl/grpc/ShuffleServerGrpcNettyClient.java     |  2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
index cd44694eb..c99f9b057 100644
--- 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
+++ 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
@@ -164,10 +164,7 @@ public class DelegationRssShuffleManager implements 
ShuffleManager {
         return canAccess;
       } catch (Throwable e) {
         LOG.warn(
-            "Fail to access cluster {} using {} for {}",
-            coordinatorClient.getDesc(),
-            accessId,
-            e.getMessage());
+            "Fail to access cluster {} using {} for ", 
coordinatorClient.getDesc(), accessId, e);
       }
     }
 
diff --git 
a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java 
b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
index d3134b48e..961fb4fcf 100644
--- a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
+++ b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
@@ -82,8 +82,18 @@ public class RetryUtils {
       } catch (Throwable t) {
         retry++;
         if (isRetryFunc.apply(t) && retry < retryTimes) {
-          LOG.info("Retry due to Throwable, " + t.getClass().getName() + " " + 
t.getMessage());
-          LOG.info("Waiting " + intervalMs + " milliseconds before next 
connection attempt.");
+          if (LOG.isDebugEnabled()) {
+            LOG.error("Retry due to Throwable ", t);
+          } else {
+            LOG.error(
+                "Retry due to Throwable {}. Use DEBUG level to see the full 
stack: {}",
+                t.getClass().getName(),
+                t.getMessage());
+          }
+          LOG.error(
+              "Will retry {} more time(s) after waiting {} milliseconds.",
+              retryTimes - retry,
+              intervalMs);
           Thread.sleep(intervalMs);
           if (callBack != null) {
             callBack.execute();
diff --git 
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
 
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
index a235d31b1..cd6f43e41 100644
--- 
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
+++ 
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcClient.java
@@ -464,7 +464,7 @@ public class ShuffleServerGrpcClient extends GrpcClient 
implements ShuffleServer
             maxRetryAttempts,
             t -> !(t instanceof OutOfMemoryError));
       } catch (Throwable throwable) {
-        LOG.warn(throwable.getMessage());
+        LOG.warn("Failed to send shuffle data due to ", throwable);
         isSuccessful = false;
         break;
       }
diff --git 
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
 
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
index 3a98b40e0..881934ac4 100644
--- 
a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
+++ 
b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/ShuffleServerGrpcNettyClient.java
@@ -154,7 +154,7 @@ public class ShuffleServerGrpcNettyClient extends 
ShuffleServerGrpcClient {
             maxRetryAttempts,
             t -> !(t instanceof OutOfMemoryError));
       } catch (Throwable throwable) {
-        LOG.warn(throwable.getMessage());
+        LOG.warn("Failed to send shuffle data due to ", throwable);
         isSuccessful = false;
         break;
       }

Reply via email to