This is an automated email from the ASF dual-hosted git repository.

kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 7de79402af2 Add task status description to `task/run/time` metric 
(#18083)
7de79402af2 is described below

commit 7de79402af2cc9c0b8a6c771aa56a603e470ad0a
Author: Kashif Faraz <[email protected]>
AuthorDate: Thu Jun 5 20:41:26 2025 +0530

    Add task status description to `task/run/time` metric (#18083)
    
    Changes
    ---------
    - Emit dimension description (up to length 100) with `task/run/time` metric
    - Minor clean up of task shutdown message in `SeekableStreamSupervisor`
---
 docs/operations/metrics.md                                          | 2 +-
 .../java/org/apache/druid/indexing/common/task/IndexTaskUtils.java  | 6 ++++++
 .../seekablestream/supervisor/SeekableStreamSupervisor.java         | 3 +--
 .../test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java | 5 +++--
 processing/src/main/java/org/apache/druid/query/DruidMetrics.java   | 1 +
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md
index 7ac1217821d..a68cafac85c 100644
--- a/docs/operations/metrics.md
+++ b/docs/operations/metrics.md
@@ -295,7 +295,7 @@ If the JVM does not support CPU time measurement for the 
current thread, `ingest
 
 |Metric|Description|Dimensions|Normal value|
 |------|-----------|----------|------------|
-|`task/run/time`|Milliseconds taken to run a task.| `dataSource`, `taskId`, 
`taskType`, `groupId`, `taskStatus`, `tags`|Varies|
+|`task/run/time`|Milliseconds taken to run a task.| `dataSource`, `taskId`, 
`taskType`, `groupId`, `taskStatus`, `description`, `tags`|Varies|
 |`task/pending/time`|Milliseconds taken for a task to wait for running.| 
`dataSource`, `taskId`, `taskType`, `groupId`, `tags`|Varies|
 |`task/action/log/time`|Milliseconds taken to log a task action to the audit 
log.| `dataSource`, `taskId`, `taskType`, `groupId`, `taskActionType`, `tags`|< 
1000 (subsecond)|
 |`task/action/run/time`|Milliseconds taken to execute a task action.| 
`dataSource`, `taskId`, `taskType`, `groupId`, `taskActionType`, `tags`|Varies 
from subsecond to a few seconds, based on action type.|
diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTaskUtils.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTaskUtils.java
index 3108c31ce83..e08c096d393 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTaskUtils.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTaskUtils.java
@@ -64,6 +64,12 @@ public class IndexTaskUtils
   {
     metricBuilder.setDimension(DruidMetrics.TASK_ID, taskStatus.getId());
     metricBuilder.setDimension(DruidMetrics.TASK_STATUS, 
taskStatus.getStatusCode().toString());
+
+    final String errorMsg = taskStatus.getErrorMsg();
+    if (errorMsg != null && !errorMsg.isEmpty()) {
+      final String statusDescription = errorMsg.length() > 100 ? 
errorMsg.substring(0, 100) : errorMsg;
+      metricBuilder.setDimension(DruidMetrics.DESCRIPTION, statusDescription);
+    }
   }
 
   public static void setSegmentDimensions(
diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java
index 7b89f6e9f26..e0cec3edf62 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java
@@ -2056,8 +2056,7 @@ public abstract class 
SeekableStreamSupervisor<PartitionIdType, SequenceOffsetTy
         if (!inactivePartitionsInTask.isEmpty()) {
           killTaskWithSuccess(
               taskId,
-              "Task[%s] with partition set[%s] has inactive partitions[%s], 
stopping task.",
-              taskId,
+              "Task partition set[%s] has inactive partitions[%s].",
               taskPartitions,
               inactivePartitionsInTask
           );
diff --git 
a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java
 
b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java
index 040d54fea62..0579f0de1a1 100644
--- 
a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java
+++ 
b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java
@@ -82,6 +82,7 @@ import org.apache.druid.java.util.metrics.StubServiceEmitter;
 import org.apache.druid.metadata.DefaultPasswordProvider;
 import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory;
 import org.apache.druid.metadata.SQLMetadataConnector;
+import org.apache.druid.query.DruidMetrics;
 import org.apache.druid.segment.indexing.DataSchema;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.coordinator.stats.CoordinatorRunStats;
@@ -435,14 +436,14 @@ public class TaskQueueTest extends IngestionTestBase
     // Kill the task, send announcement and wait for TaskQueue to handle update
     taskQueue.shutdown(taskId, "shutdown");
     taskRunner.taskAddedOrUpdated(
-        TaskAnnouncement.create(task, TaskStatus.failure(taskId, "shutdown"), 
taskLocation),
+        TaskAnnouncement.create(task, TaskStatus.failure(taskId, "shutdown on 
runner"), taskLocation),
         workerHolder
     );
     taskQueue.manageQueuedTasks();
     Thread.sleep(100);
 
     // Verify that metrics are emitted on receiving announcement
-    serviceEmitter.verifyEmitted("task/run/time", 1);
+    serviceEmitter.verifyEmitted("task/run/time", 
Map.of(DruidMetrics.DESCRIPTION, "shutdown on runner"), 1);
     CoordinatorRunStats stats = taskQueue.getQueueStats();
     Assert.assertEquals(0L, 
stats.get(Stats.TaskQueue.STATUS_UPDATES_IN_QUEUE));
     Assert.assertEquals(1L, stats.get(Stats.TaskQueue.HANDLED_STATUS_UPDATES));
diff --git a/processing/src/main/java/org/apache/druid/query/DruidMetrics.java 
b/processing/src/main/java/org/apache/druid/query/DruidMetrics.java
index 6551fc5c600..1ac944ba548 100644
--- a/processing/src/main/java/org/apache/druid/query/DruidMetrics.java
+++ b/processing/src/main/java/org/apache/druid/query/DruidMetrics.java
@@ -42,6 +42,7 @@ public class DruidMetrics
   public static final String GROUP_ID = "groupId";
   public static final String TASK_TYPE = "taskType";
   public static final String TASK_STATUS = "taskStatus";
+  public static final String DESCRIPTION = "description";
 
   // Ingestion dimensions
   public static final String PARTITIONING_TYPE = "partitioningType";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to