This is an automated email from the ASF dual-hosted git repository.
chufenggao pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git
The following commit(s) were added to refs/heads/dev by this push:
new 0e15ce3389 [Improvement][Metrics] Switch to use tags to indicate task
/ workflow execution status for metrics (#11128)
0e15ce3389 is described below
commit 0e15ce33894f4cdcdfdbff55fefc85530724dea2
Author: Eric Gao <[email protected]>
AuthorDate: Tue Jul 26 13:44:02 2022 +0800
[Improvement][Metrics] Switch to use tags to indicate task / workflow
execution status for metrics (#11128)
* [Improvement][Metrics] Switch to use tags to indicate task / workflow
execution status for metrics (#10867)
* Update docs and grafana demo dashboards
---
docs/docs/en/guide/metrics/metrics.md | 30 +-
docs/docs/zh/guide/metrics/metrics.md | 32 +-
.../master/event/TaskRetryStateEventHandler.java | 2 +-
.../server/master/event/TaskStateEventHandler.java | 8 +-
.../master/event/TaskTimeoutStateEventHandler.java | 2 +-
.../master/event/WorkflowStartEventHandler.java | 3 +-
.../master/event/WorkflowStateEventHandler.java | 8 +-
.../event/WorkflowTimeoutStateEventHandler.java | 2 +-
.../master/metrics/ProcessInstanceMetrics.java | 85 +-
.../server/master/metrics/TaskMetrics.java | 93 +-
.../master/runner/WorkflowExecuteRunnable.java | 2 +-
.../master/service/MasterFailoverService.java | 4 +-
.../master/service/WorkerFailoverService.java | 3 +-
.../resources/grafana/DolphinSchedulerMaster.json | 2694 ++++++++++----------
14 files changed, 1443 insertions(+), 1525 deletions(-)
diff --git a/docs/docs/en/guide/metrics/metrics.md
b/docs/docs/en/guide/metrics/metrics.md
index 9ecdc3619f..1b5d099baf 100644
--- a/docs/docs/en/guide/metrics/metrics.md
+++ b/docs/docs/en/guide/metrics/metrics.md
@@ -60,14 +60,15 @@ For example, you can get the master metrics by `curl
http://localhost:5679/actua
### Task Related Metrics
-- ds.task.timeout.count: (counter) the number of timeout tasks
-- ds.task.finish.count: (counter) the number of finished tasks, both succeeded
and failed included
-- ds.task.success.count: (counter) the number of successful tasks
-- ds.task.failure.count: (counter) the number of failed tasks
-- ds.task.stop.count: (counter) the number of stopped tasks
-- ds.task.retry.count: (counter) the number of retried tasks
-- ds.task.submit.count: (counter) the number of submitted tasks
-- ds.task.failover.count: (counter) the number of task fail-overs
+- ds.task.instance.count: (counter) the number of task instances, sliced by
the tag `state`:
+ - timeout: the number of timeout tasks
+ - finish: the number of finished tasks, both succeeded and failed included
+ - success: the number of successful tasks
+ - fail: the number of failed tasks
+ - stop: the number of stopped tasks
+ - retry: the number of retried tasks
+ - submit: the number of submitted tasks
+ - failover: the number of task fail-overs
- ds.task.dispatch.count: (counter) the number of tasks dispatched to worker
- ds.task.dispatch.failure.count: (counter) the number of tasks failed to
dispatch, retry failure included
- ds.task.dispatch.error.count: (counter) the number of task dispatch errors
@@ -83,12 +84,13 @@ For example, you can get the master metrics by `curl
http://localhost:5679/actua
- ds.workflow.create.command.count: (counter) the number of commands created
and inserted by workflows
- ds.workflow.instance.submit.count: (counter) the number of submitted
workflow instances
- ds.workflow.instance.running: (gauge) the number of running workflow
instances
-- ds.workflow.instance.timeout.count: (counter) the number of timeout workflow
instances
-- ds.workflow.instance.finish.count: (counter) indicates the number of
finished workflow instances, both successes and failures included
-- ds.workflow.instance.success.count: (counter) the number of successful
workflow instances
-- ds.workflow.instance.failure.count: (counter) the number of failed workflow
instances
-- ds.workflow.instance.stop.count: (counter) the number of stopped workflow
instances
-- ds.workflow.instance.failover.count: (counter) the number of workflow
instance fail-overs
+- ds.workflow.instance.count: (counter) the number of workflow instances,
sliced by the tag `state`:
+ - timeout: the number of timeout workflow instances
+ - finish: the number of finished workflow instances, both successes and
failures included
+ - success: the number of successful workflow instances
+ - fail: the number of failed workflow instances
+ - stop: the number of stopped workflow instances
+ - failover: the number of workflow instance fail-overs
### Master Server Metrics
diff --git a/docs/docs/zh/guide/metrics/metrics.md
b/docs/docs/zh/guide/metrics/metrics.md
index 3116f5445b..9101935cc0 100644
--- a/docs/docs/zh/guide/metrics/metrics.md
+++ b/docs/docs/zh/guide/metrics/metrics.md
@@ -61,14 +61,15 @@ metrics exporter端口`server.port`是在application.yaml里定义的:
master: `
### 任务相关指标
-- ds.task.timeout.count: (counter) 超时的任务数量
-- ds.task.finish.count: (counter) 完成的任务数量,成功和失败的任务都算在内
-- ds.task.success.count: (counter) 成功完成的任务数量
-- ds.task.failure.count: (counter) 失败的任务数量
-- ds.task.stop.count: (counter) 暂停的任务数量
-- ds.task.retry.count: (counter) 重试的任务数量
-- ds.task.submit.count: (counter) 已提交的任务数量
-- ds.task.failover.count: (counter) 容错的任务数量
+- ds.task.instance.count: (counter) 任务实例数量,由tag `state`按状态切分:
+ - timeout:超时的任务数量
+ - finish:完成的任务数量,成功和失败的任务都算在内
+ - success:成功完成的任务数量
+ - fail:失败的任务数量
+ - stop:暂停的任务数量
+ - retry:重试的任务数量
+ - submit:已提交的任务数量
+ - failover:容错的任务数量
- ds.task.dispatch.count: (counter) 分发到worker上的任务数量
- ds.task.dispatch.failure.count: (counter) 分发失败的任务数量,重试也包含在内
- ds.task.dispatch.error.count: (counter) 分发任务的错误数量
@@ -82,14 +83,15 @@ metrics exporter端口`server.port`是在application.yaml里定义的:
master: `
### 工作流相关指标
- ds.workflow.create.command.count: (counter) 工作量创建并插入的命令数量
-- ds.workflow.instance.submit.count: (counter) 已提交的工作量实例数量
- ds.workflow.instance.running: (gauge) 正在运行的工作流实例数量
-- ds.workflow.instance.timeout.count: (counter) 运行超时的工作流实例数量
-- ds.workflow.instance.finish.count: (counter) 已完成的工作流实例数量,包含成功和失败
-- ds.workflow.instance.success.count: (counter) 运行成功的工作流实例数量
-- ds.workflow.instance.failure.count: (counter) 运行失败的工作流实例数量
-- ds.workflow.instance.stop.count: (counter) 停止的工作流实例数量
-- ds.workflow.instance.failover.count: (counter) 容错的工作流实例数量
+- ds.workflow.instance.count: (counter) 工作流实例数量,由tag `state`按状态切分:
+ - submit:已提交的工作量实例数量
+ - timeout:运行超时的工作流实例数量
+ - finish:已完成的工作流实例数量,包含成功和失败
+ - success:运行成功的工作流实例数量
+ - fail:运行失败的工作流实例数量
+ - stop:停止的工作流实例数量
+ - failover:容错的工作流实例数量
### Master Server指标
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java
index ee8168856a..f6f7069ab3 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskRetryStateEventHandler.java
@@ -31,7 +31,7 @@ public class TaskRetryStateEventHandler implements
StateEventHandler {
@Override
public boolean handleStateEvent(WorkflowExecuteRunnable
workflowExecuteRunnable, StateEvent stateEvent)
throws StateEventHandleException {
- TaskMetrics.incTaskRetry();
+ TaskMetrics.incTaskInstanceByState("retry");
Map<Long, TaskInstance> waitToRetryTaskInstanceMap =
workflowExecuteRunnable.getWaitToRetryTaskInstanceMap();
TaskInstance taskInstance =
waitToRetryTaskInstanceMap.get(stateEvent.getTaskCode());
workflowExecuteRunnable.addTaskToStandByList(taskInstance);
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java
index e3ad268f97..c0cf864d31 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskStateEventHandler.java
@@ -96,17 +96,17 @@ public class TaskStateEventHandler implements
StateEventHandler {
return;
}
if (taskStateEvent.getExecutionStatus().typeIsFinished()) {
- TaskMetrics.incTaskFinish();
+ TaskMetrics.incTaskInstanceByState("finish");
}
switch (taskStateEvent.getExecutionStatus()) {
case STOP:
- TaskMetrics.incTaskStop();
+ TaskMetrics.incTaskInstanceByState("stop");
break;
case SUCCESS:
- TaskMetrics.incTaskSuccess();
+ TaskMetrics.incTaskInstanceByState("success");
break;
case FAILURE:
- TaskMetrics.incTaskFailure();
+ TaskMetrics.incTaskInstanceByState("fail");
break;
default:
break;
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java
index 240f10ff2c..c43c0bcbf2 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskTimeoutStateEventHandler.java
@@ -35,7 +35,7 @@ public class TaskTimeoutStateEventHandler implements
StateEventHandler {
@Override
public boolean handleStateEvent(WorkflowExecuteRunnable
workflowExecuteRunnable, StateEvent stateEvent)
throws StateEventHandleError {
- TaskMetrics.incTaskTimeout();
+ TaskMetrics.incTaskInstanceByState("timeout");
workflowExecuteRunnable.checkTaskInstanceByStateEvent(stateEvent);
TaskInstance taskInstance =
workflowExecuteRunnable.getTaskInstance(stateEvent.getTaskInstanceId()).get();
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java
index b4d9fc1f85..c598cb5a90 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStartEventHandler.java
@@ -59,8 +59,7 @@ public class WorkflowStartEventHandler implements
WorkflowEventHandler {
"The workflow start event is invalid, cannot find the workflow
instance from cache");
}
ProcessInstance processInstance =
workflowExecuteRunnable.getProcessInstance();
-
- ProcessInstanceMetrics.incProcessInstanceSubmit();
+ ProcessInstanceMetrics.incProcessInstanceByState("submit");
CompletableFuture<WorkflowSubmitStatue> workflowSubmitFuture =
CompletableFuture.supplyAsync(workflowExecuteRunnable::call,
workflowExecuteThreadPool);
workflowSubmitFuture.thenAccept(workflowSubmitStatue -> {
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java
index 3abdd879bb..a37b3023a3 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowStateEventHandler.java
@@ -75,17 +75,17 @@ public class WorkflowStateEventHandler implements
StateEventHandler {
private void measureProcessState(StateEvent processStateEvent) {
if (processStateEvent.getExecutionStatus().typeIsFinished()) {
- ProcessInstanceMetrics.incProcessInstanceFinish();
+ ProcessInstanceMetrics.incProcessInstanceByState("finish");
}
switch (processStateEvent.getExecutionStatus()) {
case STOP:
- ProcessInstanceMetrics.incProcessInstanceStop();
+ ProcessInstanceMetrics.incProcessInstanceByState("stop");
break;
case SUCCESS:
- ProcessInstanceMetrics.incProcessInstanceSuccess();
+ ProcessInstanceMetrics.incProcessInstanceByState("success");
break;
case FAILURE:
- ProcessInstanceMetrics.incProcessInstanceFailure();
+ ProcessInstanceMetrics.incProcessInstanceByState("fail");
break;
default:
break;
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java
index c2fc873bdc..b04866a76a 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/WorkflowTimeoutStateEventHandler.java
@@ -27,7 +27,7 @@ import com.google.auto.service.AutoService;
public class WorkflowTimeoutStateEventHandler implements StateEventHandler {
@Override
public boolean handleStateEvent(WorkflowExecuteRunnable
workflowExecuteRunnable, StateEvent stateEvent) {
- ProcessInstanceMetrics.incProcessInstanceTimeout();
+ ProcessInstanceMetrics.incProcessInstanceByState("timeout");
workflowExecuteRunnable.processTimeout();
return true;
}
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java
index 8edf3f0c86..4cd8b3715a 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/ProcessInstanceMetrics.java
@@ -17,9 +17,14 @@
package org.apache.dolphinscheduler.server.master.metrics;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
+import com.google.common.collect.ImmutableSet;
+
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
@@ -31,6 +36,24 @@ public final class ProcessInstanceMetrics {
throw new UnsupportedOperationException("Utility class");
}
+ private static Map<String, Counter> PROCESS_INSTANCE_COUNTERS = new
HashMap<>();
+
+ private static final Set<String> PROCESS_INSTANCE_STATES = ImmutableSet.of(
+ "submit", "timeout", "finish", "failover", "success", "fail",
"stop");
+
+ static {
+ for (final String state : PROCESS_INSTANCE_STATES) {
+ PROCESS_INSTANCE_COUNTERS.put(
+ state,
+ Counter.builder("ds.workflow.instance.count")
+ .tag("state", state)
+ .description(String.format("Process instance %s
total count", state))
+ .register(Metrics.globalRegistry)
+ );
+ }
+
+ }
+
private static final Timer COMMAND_QUERY_TIMETER =
Timer.builder("ds.workflow.command.query.duration")
.description("Command query duration")
@@ -41,41 +64,6 @@ public final class ProcessInstanceMetrics {
.description("Process instance generated duration")
.register(Metrics.globalRegistry);
- private static final Counter PROCESS_INSTANCE_SUBMIT_COUNTER =
- Counter.builder("ds.workflow.instance.submit.count")
- .description("Process instance submit total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_TIMEOUT_COUNTER =
- Counter.builder("ds.workflow.instance.timeout.count")
- .description("Process instance timeout total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_FINISH_COUNTER =
- Counter.builder("ds.workflow.instance.finish.count")
- .description("Process instance finish total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_SUCCESS_COUNTER =
- Counter.builder("ds.workflow.instance.success.count")
- .description("Process instance success total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_FAILURE_COUNTER =
- Counter.builder("ds.workflow.instance.failure.count")
- .description("Process instance failure total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_STOP_COUNTER =
- Counter.builder("ds.workflow.instance.stop.count")
- .description("Process instance stop total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter PROCESS_INSTANCE_FAILOVER_COUNTER =
- Counter.builder("ds.workflow.instance.failover.count")
- .description("Process instance failover total count")
- .register(Metrics.globalRegistry);
-
public static void recordCommandQueryTime(long milliseconds) {
COMMAND_QUERY_TIMETER.record(milliseconds, TimeUnit.MILLISECONDS);
}
@@ -96,31 +84,8 @@ public final class ProcessInstanceMetrics {
.register(Metrics.globalRegistry);
}
- public static void incProcessInstanceSubmit() {
- PROCESS_INSTANCE_SUBMIT_COUNTER.increment();
- }
-
- public static void incProcessInstanceTimeout() {
- PROCESS_INSTANCE_TIMEOUT_COUNTER.increment();
- }
-
- public static void incProcessInstanceFinish() {
- PROCESS_INSTANCE_FINISH_COUNTER.increment();
+ public static void incProcessInstanceByState(final String state) {
+ PROCESS_INSTANCE_COUNTERS.get(state).increment();
}
- public static void incProcessInstanceSuccess() {
- PROCESS_INSTANCE_SUCCESS_COUNTER.increment();
- }
-
- public static void incProcessInstanceFailure() {
- PROCESS_INSTANCE_FAILURE_COUNTER.increment();
- }
-
- public static void incProcessInstanceStop() {
- PROCESS_INSTANCE_STOP_COUNTER.increment();
- }
-
- public static void incProcessInstanceFailover() {
- PROCESS_INSTANCE_FAILOVER_COUNTER.increment();
- }
}
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java
index 3f2ed1544f..5b20c59bac 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/TaskMetrics.java
@@ -17,8 +17,12 @@
package org.apache.dolphinscheduler.server.master.metrics;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
import java.util.function.Supplier;
+import com.facebook.presto.jdbc.internal.guava.collect.ImmutableSet;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
@@ -29,45 +33,24 @@ public final class TaskMetrics {
throw new UnsupportedOperationException("Utility class");
}
- private static final Counter TASK_SUBMIT_COUNTER =
- Counter.builder("ds.task.submit.count")
- .description("Task submit total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter TASK_FINISH_COUNTER =
- Counter.builder("ds.task.finish.count")
- .description("Task finish total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter TASK_SUCCESS_COUNTER =
- Counter.builder("ds.task.success.count")
- .description("Task success total count")
- .register(Metrics.globalRegistry);
- private static final Counter TASK_FAILURE_COUNTER =
- Counter.builder("ds.task.failure.count")
- .description("Task failure total count")
- .register(Metrics.globalRegistry);
+ private static Map<String, Counter> TASK_INSTANCE_COUNTERS = new
HashMap<>();
- private static final Counter TASK_TIMEOUT_COUNTER =
- Counter.builder("ds.task.timeout.count")
- .description("Task timeout total count")
- .register(Metrics.globalRegistry);
+ private static final Set<String> TASK_INSTANCE_STATES = ImmutableSet.of(
+ "submit", "timeout", "finish", "failover", "retry", "dispatch",
"success", "fail", "stop");
- private static final Counter TASK_RETRY_COUNTER =
- Counter.builder("ds.task.retry.count")
- .description("Task retry total count")
- .register(Metrics.globalRegistry);
+ static {
+ for (final String state : TASK_INSTANCE_STATES) {
+ TASK_INSTANCE_COUNTERS.put(
+ state,
+ Counter.builder("ds.task.instance.count")
+ .tags("state", state)
+ .description(String.format("Process instance %s
total count", state))
+ .register(Metrics.globalRegistry)
+ );
+ }
- private static final Counter TASK_STOP_COUNTER =
- Counter.builder("ds.task.stop.count")
- .description("Task stop total count")
- .register(Metrics.globalRegistry);
-
- private static final Counter TASK_FAILOVER_COUNTER =
- Counter.builder("ds.task.failover.count")
- .description("Task failover total count")
- .register(Metrics.globalRegistry);
+ }
private static final Counter TASK_DISPATCH_COUNTER =
Counter.builder("ds.task.dispatch.count")
@@ -76,52 +59,20 @@ public final class TaskMetrics {
private static final Counter TASK_DISPATCHER_FAILED =
Counter.builder("ds.task.dispatch.failure.count")
- .description("Task dispatch failed count")
+ .description("Task dispatch failures count, retried ones
included")
.register(Metrics.globalRegistry);
private static final Counter TASK_DISPATCH_ERROR =
Counter.builder("ds.task.dispatch.error.count")
- .description("Task dispatch error")
+ .description("Number of errors during task dispatch")
.register(Metrics.globalRegistry);
- public static void incTaskSubmit() {
- TASK_SUBMIT_COUNTER.increment();
- }
-
public synchronized static void registerTaskPrepared(Supplier<Number>
consumer) {
Gauge.builder("ds.task.prepared", consumer)
.description("Task prepared count")
.register(Metrics.globalRegistry);
}
- public static void incTaskFinish() {
- TASK_FINISH_COUNTER.increment();
- }
-
- public static void incTaskSuccess() {
- TASK_SUCCESS_COUNTER.increment();
- }
-
- public static void incTaskFailure() {
- TASK_FAILURE_COUNTER.increment();
- }
-
- public static void incTaskTimeout() {
- TASK_TIMEOUT_COUNTER.increment();
- }
-
- public static void incTaskRetry() {
- TASK_RETRY_COUNTER.increment();
- }
-
- public static void incTaskStop() {
- TASK_STOP_COUNTER.increment();
- }
-
- public static void incTaskFailover() {
- TASK_FAILOVER_COUNTER.increment();
- }
-
public static void incTaskDispatchFailed(int failedCount) {
TASK_DISPATCHER_FAILED.increment(failedCount);
}
@@ -134,4 +85,8 @@ public final class TaskMetrics {
TASK_DISPATCH_COUNTER.increment();
}
+ public static void incTaskInstanceByState(final String state) {
+ TASK_INSTANCE_COUNTERS.get(state).increment();
+ }
+
}
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java
index 3de9299d22..27f90d3d00 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java
@@ -1673,7 +1673,7 @@ public class WorkflowExecuteRunnable implements
Callable<WorkflowSubmitStatue> {
taskInstance.getName(),
taskInstance.getId(),
taskInstance.getTaskCode());
- TaskMetrics.incTaskSubmit();
+ TaskMetrics.incTaskInstanceByState("submit");
readyToSubmitTaskQueue.put(taskInstance);
}
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java
index efe33de5ff..f89c872784 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/MasterFailoverService.java
@@ -169,7 +169,7 @@ public class MasterFailoverService {
}
}
- ProcessInstanceMetrics.incProcessInstanceFailover();
+ ProcessInstanceMetrics.incProcessInstanceByState("failover");
//updateProcessInstance host is null to mark this
processInstance has been failover
// and insert a failover command
processInstance.setHost(Constants.NULL);
@@ -211,7 +211,7 @@ public class MasterFailoverService {
* @param taskInstance
*/
private void failoverTaskInstance(@NonNull ProcessInstance
processInstance, @NonNull TaskInstance taskInstance) {
- TaskMetrics.incTaskFailover();
+ TaskMetrics.incTaskInstanceByState("failover");
boolean isMasterTask =
TaskProcessorFactory.isMasterTask(taskInstance.getTaskType());
taskInstance.setProcessInstance(processInstance);
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java
index 9a4578d8e6..d817e67fe2 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/WorkerFailoverService.java
@@ -156,8 +156,7 @@ public class WorkerFailoverService {
* @param taskInstance
*/
private void failoverTaskInstance(@NonNull ProcessInstance
processInstance, @NonNull TaskInstance taskInstance) {
-
- TaskMetrics.incTaskFailover();
+ TaskMetrics.incTaskInstanceByState("failover");
boolean isMasterTask =
TaskProcessorFactory.isMasterTask(taskInstance.getTaskType());
taskInstance.setProcessInstance(processInstance);
diff --git
a/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json
b/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json
index 5461759c8c..aa02dfae6a 100644
---
a/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json
+++
b/dolphinscheduler-meter/src/main/resources/grafana/DolphinSchedulerMaster.json
@@ -43,12 +43,11 @@
"fiscalYearStartMonth": 0,
"gnetId": 4701,
"graphTooltip": 1,
- "iteration": 1654674717443,
"links": [],
"liveNow": false,
"panels": [
{
- "collapsed": true,
+ "collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
@@ -56,1442 +55,1439 @@
"y": 0
},
"id": 164,
- "panels": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "panels": [],
+ "title": "MasterServer",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 0,
- "y": 1
- },
- "id": 148,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "tooltip": {
- "mode": "single",
- "sort": "none"
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "editorMode": "code",
- "expr": "increase(ds_master_overload_count_total[1m])",
- "legendFormat": "",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Master Overload/1m",
- "type": "timeseries"
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 0,
+ "y": 1
+ },
+ "id": 148,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
},
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
+ "editorMode": "code",
+ "expr": "increase(ds_master_overload_count_total[1m])",
+ "legendFormat": "",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Master Overload/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 6,
- "y": 1
- },
- "id": 150,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "tooltip": {
- "mode": "single",
- "sort": "none"
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "editorMode": "code",
- "expr": "increase(ds_master_consume_command_count_total{}[1m])",
- "legendFormat": "master_consume_command",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Master Consume Command/1m",
- "type": "timeseries"
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 6,
+ "y": 1
+ },
+ "id": 150,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
},
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 12,
- "y": 1
- },
- "id": 168,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
+ "editorMode": "code",
+ "expr": "increase(ds_master_consume_command_count_total{}[1m])",
+ "legendFormat": "master_consume_command",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Master Consume Command/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "editorMode": "code",
- "exemplar": false,
- "expr":
"jvm_threads_live_threads{application=\"master-server\"}",
- "legendFormat": "live_thread",
- "range": true,
- "refId": "A"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "editorMode": "code",
- "expr":
"jvm_threads_daemon_threads{application=\"master-server\"}",
- "hide": false,
- "legendFormat": "daemon_thread",
- "range": true,
- "refId": "B"
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "editorMode": "code",
- "expr":
"jvm_threads_peak_threads{application=\"master-server\"}",
- "hide": false,
- "legendFormat": "peak_thread",
- "range": true,
- "refId": "C"
- }
- ],
- "title": "JVM Thread",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 18,
- "y": 1
- },
- "id": 170,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "tooltip": {
- "mode": "single",
- "sort": "none"
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "expr":
"jvm_threads_states_threads{application=\"master-server\"}",
- "refId": "A"
- }
- ],
- "title": "Thread Status",
- "type": "timeseries"
- }
- ],
- "title": "MasterServer",
- "type": "row"
- },
- {
- "collapsed": true,
- "datasource": {
- "type": "datasource",
- "uid": "grafana"
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
},
"gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
+ "h": 8,
+ "w": 6,
+ "x": 12,
"y": 1
},
- "id": 126,
- "panels": [
+ "id": 168,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "thresholds"
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- }
- ]
- },
- "unit": "JOBS"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 2
- },
- "id": 63,
- "links": [],
- "maxDataPoints": 100,
- "options": {
- "colorMode": "value",
- "graphMode": "area",
- "justifyMode": "auto",
- "orientation": "horizontal",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
- },
- "text": {},
- "textMode": "auto"
- },
- "pluginVersion": "8.5.3",
- "targets": [
- {
- "exemplar": true,
- "expr": "sum(ds_master_quartz_job_executed_total)",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 14400
- }
- ],
- "title": "Job Total Count",
- "type": "stat"
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "jvm_threads_live_threads{application=\"master-server\"}",
+ "legendFormat": "live_thread",
+ "range": true,
+ "refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "thresholds"
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "red",
- "value": null
- },
- {
- "color": "green",
- "value": 80
- }
- ]
- },
- "unit": "percent"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 2
- },
- "id": 144,
- "links": [],
- "maxDataPoints": 100,
- "options": {
- "orientation": "horizontal",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
- },
- "showThresholdLabels": false,
- "showThresholdMarkers": true,
- "text": {}
- },
- "pluginVersion": "8.5.3",
- "targets": [
- {
- "exemplar": true,
- "expr":
"sum(ds_master_quartz_job_executed_total{result=\"success\"}) /
sum(ds_master_quartz_job_executed_total) * 100",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 14400
- }
- ],
- "title": "Job Successful Rate",
- "type": "gauge"
+ "editorMode": "code",
+ "expr": "jvm_threads_daemon_threads{application=\"master-server\"}",
+ "hide": false,
+ "legendFormat": "daemon_thread",
+ "range": true,
+ "refId": "B"
},
{
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "description": "",
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 9,
- "w": 6,
- "x": 0,
- "y": 10
- },
- "hiddenSeries": false,
- "id": 139,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "8.5.3",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "exemplar": true,
- "expr": "sum(ds_master_quartz_job_executed_total{})",
- "hide": false,
- "interval": "",
- "legendFormat": "Total",
- "refId": "A"
+ "editorMode": "code",
+ "expr": "jvm_threads_peak_threads{application=\"master-server\"}",
+ "hide": false,
+ "legendFormat": "peak_thread",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "JVM Thread",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- {
- "exemplar": true,
- "expr":
"ds_master_quartz_job_executed_total{result=\"success\"}",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Successful",
- "refId": "B"
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- {
- "exemplar": true,
- "expr":
"ds_master_quartz_job_executed_total{result=\"failure\"}",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Failed ({{exception}})",
- "refId": "C"
- }
- ],
- "thresholds": [],
- "timeRegions": [],
- "title": "Quartz Job Executed Count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "mode": "time",
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "$$hashKey": "object:1516",
- "format": "short",
- "logBase": 1,
- "min": "0",
- "show": true
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- {
- "$$hashKey": "object:1517",
- "format": "short",
- "logBase": 1,
- "show": true
+ "thresholdsStyle": {
+ "mode": "off"
}
- ],
- "yaxis": {
- "align": false
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
},
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 9,
- "w": 6,
- "x": 6,
- "y": 10
- },
- "hiddenSeries": false,
- "id": 101,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "8.5.3",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "exemplar": true,
- "expr":
"rate(ds_master_quartz_job_execution_time_seconds_sum[1m])/rate(ds_master_quartz_job_execution_time_seconds_count[1m])",
- "format": "time_series",
- "hide": false,
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "avg ({{exception}})",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "quartz_job_execution_seconds_max",
- "format": "time_series",
- "hide": false,
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "max ({{exception}})",
- "refId": "B"
- }
- ],
- "thresholds": [],
- "timeRegions": [],
- "title": "Quartz Job Execution Time",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "mode": "time",
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "$$hashKey": "object:1671",
- "format": "s",
- "logBase": 1,
- "min": "0",
- "show": true
- },
- {
- "$$hashKey": "object:1672",
- "format": "short",
- "label": "",
- "logBase": 1,
- "show": true
- }
- ],
- "yaxis": {
- "align": false
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "description": "",
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 9,
- "w": 6,
- "x": 12,
- "y": 10
- },
- "hiddenSeries": false,
- "id": 119,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "8.5.3",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "exemplar": true,
- "expr": "sum(increase(ds_master_quartz_job_executed_total[1m]))",
- "hide": false,
- "interval": "",
- "legendFormat": "Total",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr":
"increase(ds_master_quartz_job_executed_total{result=\"success\"}[1m])",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Successful",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr":
"increase(ds_master_quartz_job_executed_total{result=\"failure\"}[1m])",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Failed ({{exception}})",
- "refId": "C"
- }
- ],
- "thresholds": [],
- "timeRegions": [],
- "title": "Quartz Job Executed Count / Minute",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "mode": "time",
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "$$hashKey": "object:1516",
- "format": "short",
- "logBase": 1,
- "min": "0",
- "show": true
- },
- {
- "$$hashKey": "object:1517",
- "format": "short",
- "logBase": 1,
- "show": true
- }
- ],
- "yaxis": {
- "align": false
- }
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 18,
+ "y": 1
+ },
+ "id": 170,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
},
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
- "cards": {},
- "color": {
- "cardColor": "#F2495C",
- "colorScale": "sqrt",
- "colorScheme": "interpolateReds",
- "exponent": 0.5,
- "mode": "opacity"
- },
- "dataFormat": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "gridPos": {
- "h": 9,
- "w": 6,
- "x": 18,
- "y": 10
- },
- "heatmap": {},
- "hideZeroBuckets": false,
- "highlightCards": true,
- "id": 146,
- "legend": {
- "show": true
- },
- "pluginVersion": "8.2.3",
- "reverseYBuckets": false,
- "targets": [
- {
- "exemplar": true,
- "expr": "histogram_quantile(0.95,
sum(rate(ds_master_quartz_job_execution_time_seconds_bucket[5m])) by (le))",
- "interval": "",
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "title": "Quartz Job Execution Time Distribution",
- "tooltip": {
- "show": true,
- "showHistogram": false
- },
- "type": "heatmap",
- "xAxis": {
- "show": true
- },
- "yAxis": {
- "format": "s",
- "logBase": 1,
- "show": true
- },
- "yBucketBound": "auto"
+ "expr": "jvm_threads_states_threads{application=\"master-server\"}",
+ "refId": "A"
}
],
- "title": "Scheduler",
- "type": "row"
+ "title": "Thread Status",
+ "type": "timeseries"
},
{
- "collapsed": true,
+ "collapsed": false,
+ "datasource": {
+ "type": "datasource",
+ "uid": "grafana"
+ },
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 2
+ "y": 9
},
- "id": 166,
- "panels": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "id": 126,
+ "panels": [],
+ "title": "Scheduler",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 3
- },
- "id": 152,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
+ ]
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr":
"increase(ds_workflow_instance_submit_count_total{}[1m])",
- "refId": "A"
- }
- ],
- "title": "Process Instance Submit/1m",
- "type": "timeseries"
+ "unit": "JOBS"
},
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 3
- },
- "id": 162,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr":
"increase(ds_workflow_instance_finish_count_total{}[1m])",
- "refId": "A"
- }
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 63,
+ "links": [],
+ "maxDataPoints": 100,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
],
- "title": "Process Instance Finish/1m",
- "type": "timeseries"
+ "fields": "",
+ "values": false
},
+ "text": {},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.5.3",
+ "targets": [
{
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "exemplar": true,
+ "expr": "sum(ds_master_quartz_job_executed_total)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 14400
+ }
+ ],
+ "title": "Job Total Count",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ {
+ "color": "green",
+ "value": 80
}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 0,
- "y": 11
- },
- "id": 156,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
+ ]
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr":
"increase(ds_workflow_instance_success_count_total{}[1m])",
- "refId": "A"
- }
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "id": 144,
+ "links": [],
+ "maxDataPoints": 100,
+ "options": {
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
],
- "title": "Process Instance Success /1m",
- "type": "timeseries"
+ "fields": "",
+ "values": false
},
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true,
+ "text": {}
+ },
+ "pluginVersion": "8.5.3",
+ "targets": [
{
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 6,
- "y": 11
- },
- "id": 160,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "increase(ds_workflow_instance_stop_count_total{}[1m])",
- "refId": "A"
- }
- ],
- "title": "Process Instance Stop/1m",
- "type": "timeseries"
+ "exemplar": true,
+ "expr":
"sum(ds_master_quartz_job_executed_total{result=\"success\"}) /
sum(ds_master_quartz_job_executed_total) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 14400
+ }
+ ],
+ "title": "Job Successful Rate",
+ "type": "gauge"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 139,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "8.5.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(ds_master_quartz_job_executed_total{})",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Total",
+ "refId": "A"
},
+ {
+ "exemplar": true,
+ "expr": "ds_master_quartz_job_executed_total{result=\"success\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Successful",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "ds_master_quartz_job_executed_total{result=\"failure\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Failed ({{exception}})",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeRegions": [],
+ "title": "Quartz Job Executed Count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1516",
+ "format": "short",
+ "logBase": 1,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1517",
+ "format": "short",
+ "logBase": 1,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 101,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "8.5.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr":
"rate(ds_master_quartz_job_execution_time_seconds_sum[1m])/rate(ds_master_quartz_job_execution_time_seconds_count[1m])",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "avg ({{exception}})",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "quartz_job_execution_seconds_max",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "max ({{exception}})",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeRegions": [],
+ "title": "Quartz Job Execution Time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1671",
+ "format": "s",
+ "logBase": 1,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1672",
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 12,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 119,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "8.5.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(increase(ds_master_quartz_job_executed_total[1m]))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Total",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr":
"increase(ds_master_quartz_job_executed_total{result=\"success\"}[1m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Successful",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr":
"increase(ds_master_quartz_job_executed_total{result=\"failure\"}[1m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Failed ({{exception}})",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeRegions": [],
+ "title": "Quartz Job Executed Count / Minute",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1516",
+ "format": "short",
+ "logBase": 1,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1517",
+ "format": "short",
+ "logBase": 1,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false
+ }
+ },
+ {
+ "cards": {},
+ "color": {
+ "cardColor": "#F2495C",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateReds",
+ "exponent": 0.5,
+ "mode": "opacity"
+ },
+ "dataFormat": "timeseries",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 18,
+ "y": 18
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 146,
+ "legend": {
+ "show": true
+ },
+ "pluginVersion": "8.2.3",
+ "reverseYBuckets": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.95,
sum(rate(ds_master_quartz_job_execution_time_seconds_bucket[5m])) by (le))",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Quartz Job Execution Time Distribution",
+ "tooltip": {
+ "show": true,
+ "showHistogram": false
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "yAxis": {
+ "format": "s",
+ "logBase": 1,
+ "show": true
+ },
+ "yBucketBound": "auto"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 27
+ },
+ "id": 166,
+ "panels": [],
+ "title": "ProcessInstance",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 28
+ },
+ "id": 152,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"submit\"}[1m]))",
+ "refId": "A"
+ }
+ ],
+ "title": "Process Instance Submit/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 28
+ },
+ "id": 162,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"finish\"}[1m]))",
+ "refId": "A"
+ }
+ ],
+ "title": "Process Instance Finish/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 0,
+ "y": 36
+ },
+ "id": 156,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"success\"}[1m]))",
+ "refId": "A"
+ }
+ ],
+ "title": "Process Instance Success /1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 12,
- "y": 11
- },
- "id": 154,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "tooltip": {
- "mode": "single",
- "sort": "none"
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "expr":
"increase(ds_workflow_instance_timeout_count_total{}[1m])",
- "refId": "A"
- }
- ],
- "title": "Process Instance Timeout/1m",
- "type": "timeseries"
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 6,
+ "y": 36
+ },
+ "id": 160,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
},
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"stop\"}[1m]))",
+ "refId": "A"
+ }
+ ],
+ "title": "Process Instance Stop/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ {
+ "color": "red",
+ "value": 80
}
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 12,
+ "y": 36
+ },
+ "id": 154,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"timeout\"}[1m]))",
+ "refId": "A"
+ }
+ ],
+ "title": "Process Instance Timeout/1m",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 18,
- "y": 11
- },
- "id": 158,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "tooltip": {
- "mode": "single",
- "sort": "none"
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "expr":
"increase(ds_workflow_instance_failure_count_total{}[1m])",
- "refId": "A"
- }
- ],
- "title": "Process Instance Failure/1m",
- "type": "timeseries"
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 18,
+ "y": 36
+ },
+ "id": 158,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "PBFA97CFB590B2093"
+ },
+ "expr":
"sum(increase(ds_workflow_instance_count_total{state=\"fail\"}[1m]))",
+ "refId": "A"
}
],
- "title": "ProcessInstance",
- "type": "row"
+ "title": "Process Instance Failure/1m",
+ "type": "timeseries"
},
{
"collapsed": false,
@@ -1499,7 +1495,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 3
+ "y": 44
},
"id": 172,
"panels": [],
@@ -1565,7 +1561,7 @@
"h": 8,
"w": 8,
"x": 0,
- "y": 4
+ "y": 45
},
"id": 178,
"options": {
@@ -1669,7 +1665,7 @@
"h": 8,
"w": 8,
"x": 8,
- "y": 4
+ "y": 45
},
"id": 180,
"options": {
@@ -1689,7 +1685,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_submit_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"submit\"}[1m]))",
"refId": "A"
}
],
@@ -1755,7 +1751,7 @@
"h": 8,
"w": 8,
"x": 16,
- "y": 4
+ "y": 45
},
"id": 182,
"options": {
@@ -1775,7 +1771,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_finish_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"finish\"}[1m]))",
"refId": "A"
}
],
@@ -1841,7 +1837,7 @@
"h": 8,
"w": 8,
"x": 0,
- "y": 12
+ "y": 53
},
"id": 184,
"options": {
@@ -1861,7 +1857,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_success_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"success\"}[1m]))",
"refId": "A"
}
],
@@ -1927,7 +1923,7 @@
"h": 8,
"w": 8,
"x": 8,
- "y": 12
+ "y": 53
},
"id": 186,
"options": {
@@ -1947,7 +1943,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_failure_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"fail\"}[1m]))",
"refId": "A"
}
],
@@ -2013,7 +2009,7 @@
"h": 8,
"w": 8,
"x": 16,
- "y": 12
+ "y": 53
},
"id": 188,
"options": {
@@ -2033,7 +2029,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_timeout_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"timeout\"}[1m]))",
"refId": "A"
}
],
@@ -2099,7 +2095,7 @@
"h": 8,
"w": 8,
"x": 0,
- "y": 20
+ "y": 61
},
"id": 190,
"options": {
@@ -2119,7 +2115,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_retry_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"retry\"}[1m]))",
"refId": "A"
}
],
@@ -2185,7 +2181,7 @@
"h": 8,
"w": 8,
"x": 8,
- "y": 20
+ "y": 61
},
"id": 192,
"options": {
@@ -2205,7 +2201,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_stop_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"retry\"}[1m]))",
"refId": "A"
}
],
@@ -2271,7 +2267,7 @@
"h": 8,
"w": 8,
"x": 16,
- "y": 20
+ "y": 61
},
"id": 194,
"options": {
@@ -2291,7 +2287,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
- "expr": "sum(increase(ds_task_failover_count_total{}[1m]))",
+ "expr":
"sum(increase(ds_task_instance_count_total{state=\"failover\"}[1m]))",
"refId": "A"
}
],
@@ -2308,8 +2304,8 @@
{
"current": {
"selected": false,
- "text": "master-server",
- "value": "master-server"
+ "text": "standalone-server",
+ "value": "standalone-server"
},
"datasource": {
"type": "prometheus",
@@ -2339,8 +2335,8 @@
"allFormat": "glob",
"current": {
"selected": false,
- "text": "host.docker.internal:5679",
- "value": "host.docker.internal:5679"
+ "text": "host.docker.internal:12345",
+ "value": "host.docker.internal:12345"
},
"datasource": {
"type": "prometheus",
@@ -2434,7 +2430,7 @@
]
},
"time": {
- "from": "now-5m",
+ "from": "now-30m",
"to": "now"
},
"timepicker": {
@@ -2466,6 +2462,6 @@
"timezone": "browser",
"title": "Master",
"uid": "6XgATOcnz",
- "version": 2,
+ "version": 1,
"weekStart": ""
}
\ No newline at end of file