This is an automated email from the ASF dual-hosted git repository.

zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new 2eb026bea [#1342] improvement(server): dump appId when clearing 
resource fails (#1343)
2eb026bea is described below

commit 2eb026bea685450d40df782b93d11747a332275e
Author: Junfan Zhang <[email protected]>
AuthorDate: Fri Dec 1 16:58:21 2023 +0800

    [#1342] improvement(server): dump appId when clearing resource fails (#1343)
    
    ### What changes were proposed in this pull request?
    
    record the appId and shuffleIds in the log when clearing resource fail
    
    ### Why are the changes needed?
    
    Fix: #1342
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Neen't
---
 .../org/apache/uniffle/server/ShuffleTaskManager.java   | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git 
a/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java 
b/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
index 4021c7d33..3aaba8c18 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
@@ -169,8 +169,9 @@ public class ShuffleTaskManager {
     clearResourceThread =
         () -> {
           while (true) {
+            PurgeEvent event = null;
             try {
-              PurgeEvent event = expiredAppIdQueue.take();
+              event = expiredAppIdQueue.take();
               long startTime = System.currentTimeMillis();
               if (event instanceof AppPurgeEvent) {
                 removeResources(event.getAppId(), true);
@@ -185,7 +186,19 @@ public class ShuffleTaskManager {
                 
ShuffleServerMetrics.summaryTotalRemoveResourceByShuffleIdsTime.observe(usedTime);
               }
             } catch (Exception e) {
-              LOG.error("Exception happened when clear resource for expired 
application", e);
+              StringBuilder diagnosticMessageBuilder =
+                  new StringBuilder(
+                      "Exception happened when clearing resource for expired 
application");
+              if (event != null) {
+                diagnosticMessageBuilder.append(" for appId: ");
+                diagnosticMessageBuilder.append(event.getAppId());
+
+                if (CollectionUtils.isNotEmpty(event.getShuffleIds())) {
+                  diagnosticMessageBuilder.append(", shuffleIds: ");
+                  diagnosticMessageBuilder.append(event.getShuffleIds());
+                }
+              }
+              LOG.error("{}", diagnosticMessageBuilder, e);
             }
           }
         };

Reply via email to