This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e7bd56d395 Improved failure messages for MSQCompactionRunner. (#18787)
1e7bd56d395 is described below

commit 1e7bd56d395152deba97b77d01f21b2cab73e686
Author: Gian Merlino <[email protected]>
AuthorDate: Sat Dec 13 18:00:19 2025 -0800

    Improved failure messages for MSQCompactionRunner. (#18787)
    
    * Improved failure messages for MSQCompactionRunner.
    
    Include the first failure message in the task status itself, so it is
    not necessary to fetch task logs to see the error message.
    
    Also, don't log the entire task JSON for failed subtasks. It is logged
    once when a subtask is initially run, and that's enough.
    
    * Improve messages.
---
 .../druid/msq/indexing/MSQCompactionRunner.java    | 45 +++++++++++++++-------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git 
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java
 
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java
index c764a436fe4..db92290b1f9 100644
--- 
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java
+++ 
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java
@@ -717,40 +717,57 @@ public class MSQCompactionRunner implements 
CompactionRunner
     final int totalNumSpecs = tasks.size();
     log.info("Generated [%d] MSQControllerTask specs", totalNumSpecs);
 
+    TaskStatus firstFailure = null;
     int failCnt = 0;
 
-    for (MSQControllerTask eachTask : tasks) {
-      final String json = 
toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachTask);
-      if (!currentSubTaskHolder.setTask(eachTask)) {
+    for (int taskCnt = 0; taskCnt < tasks.size(); taskCnt++) {
+      final MSQControllerTask task = tasks.get(taskCnt);
+      final String json = 
toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(task);
+      if (!currentSubTaskHolder.setTask(task)) {
         String errMsg = "Task was asked to stop. Finish as failed.";
-        log.info(errMsg);
+        log.info("%s", errMsg);
         return TaskStatus.failure(compactionTaskId, errMsg);
       }
       try {
-        if (eachTask.isReady(toolbox.getTaskActionClient())) {
-          log.info("Running MSQControllerTask: " + json);
-          final TaskStatus eachResult = eachTask.run(toolbox);
-          if (!eachResult.isSuccess()) {
+        if (task.isReady(toolbox.getTaskActionClient())) {
+          log.info("Running MSQControllerTask number[%d]: %s", taskCnt, json);
+          final TaskStatus taskStatus = task.run(toolbox);
+          if (!taskStatus.isSuccess()) {
             failCnt++;
-            log.warn("Failed to run MSQControllerTask: [%s].\nTrying the next 
MSQControllerTask.", json);
+            log.warn("Failed to run MSQControllerTask number[%d]: %s", 
taskCnt, taskStatus.getErrorMsg());
+            if (firstFailure == null) {
+              firstFailure = taskStatus;
+            }
           }
         } else {
           failCnt++;
-          log.warn("MSQControllerTask is not ready: [%s].\nTrying the next 
MSQControllerTask.", json);
+          log.warn("MSQControllerTask number[%d] is not ready.", taskCnt);
         }
       }
       catch (Exception e) {
         failCnt++;
-        log.warn(e, "Failed to run MSQControllerTask: [%s].\nTrying the next 
MSQControllerTask.", json);
+        log.warn(e, "Failed to run MSQControllerTask number[%d].", taskCnt);
       }
     }
-    String msg = StringUtils.format(
+
+    log.info(
         "Ran [%d] MSQControllerTasks, [%d] succeeded, [%d] failed",
         totalNumSpecs,
         totalNumSpecs - failCnt,
         failCnt
     );
-    log.info(msg);
-    return failCnt == 0 ? TaskStatus.success(compactionTaskId) : 
TaskStatus.failure(compactionTaskId, msg);
+
+    if (failCnt == 0) {
+      return TaskStatus.success(compactionTaskId);
+    } else if (firstFailure != null && failCnt == 1) {
+      return TaskStatus.failure(compactionTaskId, firstFailure.getErrorMsg());
+    } else {
+      final StringBuilder msgBuilder =
+          new 
StringBuilder().append(failCnt).append("/").append(totalNumSpecs).append(" jobs 
failed");
+      if (firstFailure != null) {
+        msgBuilder.append("; first failure was: 
").append(firstFailure.getErrorMsg());
+      }
+      return TaskStatus.failure(compactionTaskId, msgBuilder.toString());
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to