This is an automated email from the ASF dual-hosted git repository.
swaminathanmanish pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 6ed151a4c85 Fix minion wait &run time alerts to self-resolve using
gauge (#18517)
6ed151a4c85 is described below
commit 6ed151a4c858269bb634ec0ad3a8f5e8d141632f
Author: swaminathanmanish <[email protected]>
AuthorDate: Mon Jun 1 18:52:51 2026 +0530
Fix minion wait &run time alerts to self-resolve using gauge (#18517)
* Fix MinionSubTaskHighWaitTime alert to self-resolve using gauge
Replace ControllerTimer.SUBTASK_WAITING_TIME (histogram) with a new
MAX_SUBTASK_WAIT_TIME_MS gauge in TaskMetricsEmitter.
The timer's _Max stat retains its peak value across emission cycles and
does not decay when the queue drains, causing the alert to stay firing
after all subtasks have finished. The new gauge is written every cycle
with the current max wait time across waiting subtasks for each
(table, taskType) pair, defaulting to 0 when no subtasks are waiting,
so the alert self-resolves on the next emit cycle.
Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
* Remove unused SUBTASK_WAITING_TIME timer — replaced by
MAX_SUBTASK_WAIT_TIME_MS gauge
Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
* Replace SUBTASK_RUNNING_TIME timer with MAX_SUBTASK_RUNNING_TIME_MS gauge
Timer histograms retain peak _Max values across emit cycles and never decay,
preventing alerts from self-resolving. Replace with a per-table gauge that
writes the current max running time each cycle (0 when no running subtasks),
matching the same approach used for MAX_SUBTASK_WAIT_TIME_MS.
Also removes the now-unused SUBTASK_RUNNING_TIME ControllerTimer entry and
its imports from TaskMetricsEmitter.
Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
---------
Co-authored-by: Claude Sonnet 4.6 <[email protected]>
---
.../pinot/common/metrics/ControllerGauge.java | 2 ++
.../pinot/common/metrics/ControllerTimer.java | 3 ---
.../helix/core/minion/TaskMetricsEmitter.java | 22 ++++++++++-----
.../helix/core/minion/TaskMetricsEmitterTest.java | 31 ++++++++++++++++++----
4 files changed, 44 insertions(+), 14 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 2dfc9d96540..58354a72f35 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -71,7 +71,9 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
// TODO: Unify below subtask metrics into a single metric with status label
NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true),
NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
+ MAX_SUBTASK_WAIT_TIME_MS("MaxSubtaskWaitTimeMs", false),
NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
+ MAX_SUBTASK_RUNNING_TIME_MS("MaxSubtaskRunningTimeMs", false),
NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true),
NUM_MINION_SUBTASKS_UNKNOWN("NumMinionSubtasksUnknown", true),
NUM_MINION_SUBTASKS_DROPPED("NumMinionSubtasksDropped", true),
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerTimer.java
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerTimer.java
index 00000559434..40b9c39c959 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerTimer.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerTimer.java
@@ -39,9 +39,6 @@ public enum ControllerTimer implements AbstractMetrics.Timer {
// Audit logging timers
AUDIT_REQUEST_PROCESSING_TIME("auditRequestProcessingTime", true),
AUDIT_RESPONSE_PROCESSING_TIME("auditResponseProcessingTime", true),
- // Log subtask waiting (until not started) and running (until not completed)
time
- SUBTASK_WAITING_TIME("subtaskWaitingTime", false),
- SUBTASK_RUNNING_TIME("subtaskRunningTime", false),
// Query workload propagation metrics
QUERY_WORKLOAD_PROPAGATE_TIME_MS("queryWorkloadPropagateTimeMs", false),
QUERY_WORKLOAD_SEND_MESSAGE_TIME_MS("queryWorkloadSendMessageTimeMs", false),
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
index 98c99211fd2..b562118ab10 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java
@@ -25,10 +25,8 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
-import java.util.concurrent.TimeUnit;
import org.apache.pinot.common.metrics.ControllerGauge;
import org.apache.pinot.common.metrics.ControllerMetrics;
-import org.apache.pinot.common.metrics.ControllerTimer;
import org.apache.pinot.controller.ControllerConf;
import org.apache.pinot.controller.LeadControllerManager;
import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
@@ -109,6 +107,8 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
for (String taskType : taskTypes) {
TaskCount taskTypeAccumulatedCount = new TaskCount();
Map<String, TaskCount> tableAccumulatedCount = new HashMap<>();
+ Map<String, Long> tableMaxWaitTimeMs = new HashMap<>();
+ Map<String, Long> tableMaxRunningTimeMs = new HashMap<>();
try {
// Capture the current execution timestamp for this task type
collection cycle
long currentExecutionTimestamp = System.currentTimeMillis();
@@ -147,12 +147,10 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
}
count.accumulate(taskCount);
taskStatusSummary.getSubtaskWaitingTimes().values().forEach(subtaskWaitingTime
-> {
- _controllerMetrics.addTimedTableValue(tableNameWithType,
ControllerTimer.SUBTASK_WAITING_TIME,
- subtaskWaitingTime, TimeUnit.MILLISECONDS);
+ tableMaxWaitTimeMs.merge(tableNameWithType,
subtaskWaitingTime, Math::max);
});
taskStatusSummary.getSubtaskRunningTimes().values().forEach(subtaskRunningTime
-> {
- _controllerMetrics.addTimedTableValue(tableNameWithType,
ControllerTimer.SUBTASK_RUNNING_TIME,
- subtaskRunningTime, TimeUnit.MILLISECONDS);
+ tableMaxRunningTimeMs.merge(tableNameWithType,
subtaskRunningTime, Math::max);
});
return count;
});
@@ -211,6 +209,16 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR, tablePercent);
});
+ // Emit 0 for tables with no waiting/running subtasks so the gauge
(and alert) self-resolves
+ tableAccumulatedCount.keySet().forEach(tableNameWithType -> {
+ _controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
+ ControllerGauge.MAX_SUBTASK_WAIT_TIME_MS,
+ tableMaxWaitTimeMs.getOrDefault(tableNameWithType, 0L));
+ _controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
+ ControllerGauge.MAX_SUBTASK_RUNNING_TIME_MS,
+ tableMaxRunningTimeMs.getOrDefault(tableNameWithType, 0L));
+ });
+
if (_preReportedTables.containsKey(taskType)) {
Set<String> tableNameWithTypeSet = _preReportedTables.get(taskType);
tableNameWithTypeSet.removeAll(tableAccumulatedCount.keySet());
@@ -286,6 +294,8 @@ public class TaskMetricsEmitter extends BasePeriodicTask {
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType,
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR);
+ _controllerMetrics.removeTableGauge(tableNameWithType, taskType,
ControllerGauge.MAX_SUBTASK_WAIT_TIME_MS);
+ _controllerMetrics.removeTableGauge(tableNameWithType, taskType,
ControllerGauge.MAX_SUBTASK_RUNNING_TIME_MS);
});
}
}
diff --git
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java
index 5d95f996b03..3ce31119956 100644
---
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java
+++
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java
@@ -174,8 +174,9 @@ public class TaskMetricsEmitterTest {
private void runAndAssertForTaskType1WithTwoTables() {
PinotMetricsRegistry metricsRegistry =
_controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
- // Expected 31 metrics: 29 original + 2 timing metrics
(SUBTASK_WAITING_TIME and SUBTASK_RUNNING_TIME)
- Assert.assertEquals(metricsRegistry.allMetrics().size(), 31);
+ // Expected 33 metrics: 29 original + MAX_SUBTASK_WAIT_TIME_MS gauge (x2
tables)
+ // + MAX_SUBTASK_RUNNING_TIME_MS gauge (x2 tables)
+ Assert.assertEquals(metricsRegistry.allMetrics().size(), 33);
Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.onlineMinionInstances")));
@@ -250,6 +251,26 @@ public class TaskMetricsEmitterTest {
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.percentMinionSubtasksInError.table2_OFFLINE.taskType1"))
.getMetric()).value(), 50L);
+
+ // table1 has a waiting subtask (subtask2, 3000ms); table2 has none (0ms)
+ Assert.assertEquals(((YammerSettableGauge<?>)
metricsRegistry.allMetrics().get(
+ new YammerMetricName(ControllerMetrics.class,
+
"pinot.controller.maxSubtaskWaitTimeMs.table1_OFFLINE.taskType1"))
+ .getMetric()).value(), 3000L);
+ Assert.assertEquals(((YammerSettableGauge<?>)
metricsRegistry.allMetrics().get(
+ new YammerMetricName(ControllerMetrics.class,
+
"pinot.controller.maxSubtaskWaitTimeMs.table2_OFFLINE.taskType1"))
+ .getMetric()).value(), 0L);
+
+ // table2 has a running subtask (subtask1, 5000ms); table1 has none (0ms)
+ Assert.assertEquals(((YammerSettableGauge<?>)
metricsRegistry.allMetrics().get(
+ new YammerMetricName(ControllerMetrics.class,
+
"pinot.controller.maxSubtaskRunningTimeMs.table1_OFFLINE.taskType1"))
+ .getMetric()).value(), 0L);
+ Assert.assertEquals(((YammerSettableGauge<?>)
metricsRegistry.allMetrics().get(
+ new YammerMetricName(ControllerMetrics.class,
+
"pinot.controller.maxSubtaskRunningTimeMs.table2_OFFLINE.taskType1"))
+ .getMetric()).value(), 5000L);
}
@Test
@@ -281,9 +302,9 @@ public class TaskMetricsEmitterTest {
PinotMetricsRegistry metricsRegistry =
_controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
- // Expected at least 21 metrics: 20 original + 1 timing metric
(SUBTASK_WAITING_TIME)
- // The actual count may vary slightly based on test execution order
- Assert.assertTrue(metricsRegistry.allMetrics().size() >= 21);
+ // Expected at least 22 metrics: 20 original + MAX_SUBTASK_WAIT_TIME_MS
gauge (x1 table)
+ // + MAX_SUBTASK_RUNNING_TIME_MS gauge (x1 table). Count may vary based on
test execution order.
+ Assert.assertTrue(metricsRegistry.allMetrics().size() >= 22);
Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.onlineMinionInstances")));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]