This is an automated email from the ASF dual-hosted git repository.

weichiu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e4e5f34361 HDDS-14039. Create Grafana dashboard for Ozone SCM 
safemode rules and exit (#9400)
7e4e5f34361 is described below

commit 7e4e5f34361f34da49d41ba75161463248f5f0e7
Author: sreejasahithi <[email protected]>
AuthorDate: Tue Jan 13 10:33:48 2026 +0530

    HDDS-14039. Create Grafana dashboard for Ozone SCM safemode rules and exit 
(#9400)
---
 .../hdds/scm/safemode/DataNodeSafeModeRule.java    |   8 +-
 .../hdds/scm/safemode/SCMSafeModeManager.java      |   1 +
 .../hadoop/hdds/scm/safemode/SafeModeMetrics.java  |  27 +
 .../scm/safemode/TestDataNodeSafeModeRule.java     |   8 +-
 .../hdds/scm/safemode/TestSCMSafeModeManager.java  |  32 +
 .../grafana/dashboards/Ozone - SCM Safemode.json   | 766 +++++++++++++++++++++
 6 files changed, 840 insertions(+), 2 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
index 63be485e028..0cd763413e8 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
@@ -51,6 +51,7 @@ public DataNodeSafeModeRule(EventQueue eventQueue,
     requiredDns = conf.getInt(
         HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
         HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT);
+    getSafeModeMetrics().setNumRequiredDatanodesThreshold(requiredDns);
     registeredDnSet = new HashSet<>(requiredDns * 2);
     this.nodeManager = nodeManager;
   }
@@ -71,9 +72,14 @@ protected boolean validate() {
   @Override
   protected void process(NodeRegistrationContainerReport reportsProto) {
 
-    registeredDnSet.add(reportsProto.getDatanodeDetails().getID());
+    DatanodeID dnId = reportsProto.getDatanodeDetails().getID();
+    boolean added = registeredDnSet.add(dnId);
     registeredDns = registeredDnSet.size();
 
+    if (added) {
+      getSafeModeMetrics().incCurrentRegisteredDatanodesCount();
+    }
+    
     if (scmInSafeMode()) {
       SCMSafeModeManager.getLogger().info(
           "SCM in safe mode. {} DataNodes registered, {} required.",
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
index bc2a26fbf91..67d47d101df 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
@@ -119,6 +119,7 @@ public SafeModeMetrics getSafeModeMetrics() {
 
   private void emitSafeModeStatus() {
     final SafeModeStatus safeModeStatus = status.get();
+    safeModeMetrics.setScmInSafeMode(safeModeStatus.isInSafeMode());
     scmContext.updateSafeModeStatus(safeModeStatus);
 
     // notify SCMServiceManager
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
index f5f4ce12992..ae65eafcb91 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
@@ -22,6 +22,7 @@
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
 import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
 
 /**
@@ -52,6 +53,12 @@ public class SafeModeMetrics {
   private @Metric MutableCounterLong
       currentPipelinesWithAtleastOneReplicaReportedCount;
 
+  @Metric("Metric will be set to 1 if SCM is in SafeMode, otherwise 0") 
+  private MutableGaugeInt scmInSafeMode;
+  
+  @Metric private MutableGaugeLong numRequiredDatanodesThreshold;
+  @Metric private MutableCounterLong currentRegisteredDatanodesCount;
+
   public static SafeModeMetrics create() {
     final MetricsSystem ms = DefaultMetricsSystem.instance();
     return ms.register(SOURCE_NAME, "SCM Safemode Metrics", new 
SafeModeMetrics());
@@ -86,6 +93,14 @@ public void 
setNumContainerReportedThreshold(HddsProtos.ReplicationType type, lo
     }
   }
 
+  public void setScmInSafeMode(boolean inSafeMode) {
+    this.scmInSafeMode.set(inSafeMode ? 1 : 0);
+  }
+
+  public void setNumRequiredDatanodesThreshold(long val) {
+    this.numRequiredDatanodesThreshold.set(val);
+  }
+
   public void incCurrentContainersWithOneReplicaReportedCount() {
     this.currentContainersWithOneReplicaReportedCount.incr();
   }
@@ -94,6 +109,10 @@ public void 
incCurrentContainersWithECDataReplicaReportedCount() {
     this.currentContainersWithECDataReplicaReportedCount.incr();
   }
 
+  public void incCurrentRegisteredDatanodesCount() {
+    this.currentRegisteredDatanodesCount.incr();
+  }
+
   MutableGaugeLong getNumHealthyPipelinesThreshold() {
     return numHealthyPipelinesThreshold;
   }
@@ -122,6 +141,14 @@ MutableGaugeLong 
getNumContainerWithECDataReplicaReportedThreshold() {
   MutableCounterLong getCurrentContainersWithOneReplicaReportedCount() {
     return currentContainersWithOneReplicaReportedCount;
   }
+  
+  MutableCounterLong getCurrentRegisteredDatanodesCount() {
+    return currentRegisteredDatanodesCount;
+  }
+
+  MutableGaugeInt getScmInSafeMode() {
+    return scmInSafeMode;
+  }
 
   public void unRegister() {
     MetricsSystem ms = DefaultMetricsSystem.instance();
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
index c62293e7648..011e97aac99 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
@@ -17,6 +17,7 @@
 
 package org.apache.hadoop.hdds.scm.safemode;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -53,6 +54,7 @@ public class TestDataNodeSafeModeRule {
   private EventQueue eventQueue;
   private NodeManager nodeManager;
   private SCMSafeModeManager mockSafeModeManager;
+  private SafeModeMetrics metrics;
 
   private void setup(int requiredDns) throws Exception {
     OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
@@ -65,6 +67,8 @@ private void setup(int requiredDns) throws Exception {
     eventQueue = new EventQueue();
 
     mockSafeModeManager = mock(SCMSafeModeManager.class);
+    metrics = SafeModeMetrics.create();
+    when(mockSafeModeManager.getSafeModeMetrics()).thenReturn(metrics);
 
     rule = new DataNodeSafeModeRule(eventQueue, ozoneConfiguration, 
nodeManager, mockSafeModeManager);
     assertNotNull(rule);
@@ -94,6 +98,7 @@ public void testDataNodeSafeModeRuleWithNoNodes() throws 
Exception {
         "SCM in safe mode. 1 DataNodes registered, 1 required."), 1000, 5000);
 
     assertTrue(rule.validate());
+    assertEquals(1, metrics.getCurrentRegisteredDatanodesCount().value());
   }
 
   @Test
@@ -120,7 +125,7 @@ public void testDataNodeSafeModeRuleWithMultipleNodes() 
throws Exception {
         "SCM in safe mode. 2 DataNodes registered, 3 required."), 1000, 5000);
 
     assertFalse(rule.validate());
-
+    assertEquals(2, metrics.getCurrentRegisteredDatanodesCount().value());
     DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails();
     NodeRegistrationContainerReport nodeReg = 
         new NodeRegistrationContainerReport(dd, null);
@@ -131,6 +136,7 @@ public void testDataNodeSafeModeRuleWithMultipleNodes() 
throws Exception {
         "SCM in safe mode. 3 DataNodes registered, 3 required."), 1000, 5000);
 
     assertTrue(rule.validate());
+    assertEquals(3, metrics.getCurrentRegisteredDatanodesCount().value());
   }
 
   @Test
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
index 1cbd6bc3725..1ef531f8bf8 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
@@ -109,6 +109,9 @@ public void setUp() throws IOException {
 
   @AfterEach
   public void destroyDbStore() throws Exception {
+    if (scmSafeModeManager != null) {
+      scmSafeModeManager.getSafeModeMetrics().unRegister();
+    }
     if (scmMetadataStore.getStore() != null) {
       scmMetadataStore.getStore().close();
     }
@@ -136,6 +139,7 @@ private void testSafeMode(int numContainers) throws 
Exception {
     scmSafeModeManager.start();
 
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
     validateRuleStatus("DatanodeSafeModeRule", "registered datanodes 0");
     SCMDatanodeProtocolServer.NodeRegistrationContainerReport 
nodeRegistrationContainerReport =
         HddsTestUtils.createNodeRegistrationContainerReport(containers);
@@ -151,6 +155,9 @@ private void testSafeMode(int numContainers) throws 
Exception {
 
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         100, 1000 * 5);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
 
     assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics()
         .getCurrentContainersWithOneReplicaReportedCount().value());
@@ -182,6 +189,7 @@ public void testSafeModeExitRule() throws Exception {
         .getNumContainerWithOneReplicaReportedThreshold().value());
 
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
     validateRuleStatus("ContainerSafeModeRule",
         "0.00% of [Ratis] Containers(0 / 100) with at least one reported");
     testContainerThreshold(containers.subList(0, 25), 0.25);
@@ -202,6 +210,9 @@ public void testSafeModeExitRule() throws Exception {
 
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         100, 1000 * 5);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
   }
 
   private OzoneConfiguration createConf(double healthyPercent,
@@ -306,6 +317,7 @@ public void 
testSafeModeExitRuleWithPipelineAvailabilityCheck(
     scmSafeModeManager.start();
 
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
     if (healthyPipelinePercent > 0) {
       validateRuleStatus("HealthyPipelineSafeModeRule",
           "healthy Ratis/THREE pipelines");
@@ -367,6 +379,9 @@ public void 
testSafeModeExitRuleWithPipelineAvailabilityCheck(
 
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         100, 1000 * 5);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
   }
 
   /**
@@ -477,8 +492,10 @@ public void testContainerSafeModeRule() throws Exception {
 
     scmSafeModeManager = new SCMSafeModeManager(config, null, null,
         containerManager, serviceManager, queue, scmContext);
+    scmSafeModeManager.start();
 
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
 
     // When 10 CLOSED containers are reported by DNs, the computed container
     // threshold should be 10/20 as there are only 20 CLOSED NON-EMPTY
@@ -494,6 +511,9 @@ public void testContainerSafeModeRule() throws Exception {
 
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         100, 1000 * 5);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
   }
 
   // We simulate common EC types: EC-2-2-1024K, EC-3-2-1024K, EC-6-3-1024K.
@@ -584,6 +604,7 @@ private void testSafeModeDataNodes(int numOfDns) throws 
Exception {
 
     // Assert SCM is in Safe mode.
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
 
     // Register all DataNodes except last one and assert SCM is in safe mode.
     for (int i = 0; i < numOfDns - 1; i++) {
@@ -606,6 +627,9 @@ private void testSafeModeDataNodes(int numOfDns) throws 
Exception {
         HddsTestUtils.createNodeRegistrationContainerReport(containers));
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         10, 1000 * 10);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
   }
 
   private void testContainerThreshold(List<ContainerInfo> dnContainers,
@@ -700,11 +724,15 @@ public void testSafeModePipelineExitRule() throws 
Exception {
       
 
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
 
     firePipelineEvent(pipelineManager, pipeline);
 
     GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(),
         100, 1000 * 10);
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
     pipelineManager.close();
   }
 
@@ -744,6 +772,7 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() 
throws Exception {
 
     // Assert SCM is in Safe mode.
     assertTrue(scmSafeModeManager.getInSafeMode());
+    assertEquals(1, 
scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value());
 
     // stop background pipeline creator as we manually create
     // pipeline below
@@ -781,5 +810,8 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() 
throws Exception {
     queue.processAll(5000);
     assertTrue(scmSafeModeManager.getPreCheckComplete());
     assertFalse(scmSafeModeManager.getInSafeMode());
+    GenericTestUtils.waitFor(() ->
+            scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() 
== 0,
+        100, 1000 * 5);
   }
 }
diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone 
- SCM Safemode.json 
b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM 
Safemode.json
new file mode 100644
index 00000000000..ac0c291b83a
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM 
Safemode.json    
@@ -0,0 +1,766 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "prometheus"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [],
+  "panels": [
+  {
+    "collapsed": false,
+    "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
+    "id": 100,
+    "panels": [],
+    "title": "SCM Safemode: Summary",
+    "type": "row"
+  },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "short",
+          "decimals": 0,
+          "mappings": [
+            {
+              "options": {
+                "0": {
+                  "color": "green",
+                  "text": "Exited safemode"
+                },
+                "1": {
+                  "color": "red",
+                  "text": "In Safemode"
+                }
+              },
+              "type": "value"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red", 
+                "value": 1
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
+      "id": 101,
+      "options": {
+        "alignValue": "center",
+        "legend": {
+          "displayMode": "hidden",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "mergeValues": true,
+        "rowHeight": 0.9,
+        "showValue": "always",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": "safe_mode_metrics_scm_in_safe_mode",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}}",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "SCM Safemode Status",
+      "type": "state-timeline"
+    },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "decimals": 0,
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Containers",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Target Threshold"
+            },
+            "properties": [
+              {
+                "id": "custom.lineStyle",
+                "value": {
+                  "dash": [10, 10],
+                  "fill": "dash"
+                }
+              },
+              {
+                "id": "custom.lineWidth",
+                "value": 3
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
+      "id": 102,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": 
"max(safe_mode_metrics_num_container_with_one_replica_reported_threshold)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Target Threshold",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": 
"safe_mode_metrics_current_containers_with_one_replica_reported_count",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}} actual",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "Ratis Containers: Target vs Actual",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "decimals": 0,
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Containers",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Target Threshold"
+            },
+            "properties": [
+              {
+                "id": "custom.lineStyle",
+                "value": {
+                  "dash": [10, 10],
+                  "fill": "dash"
+                }
+              },
+              {
+                "id": "custom.lineWidth",
+                "value": 3
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 },
+      "id": 103,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": 
"max(safe_mode_metrics_num_container_with_ec_data_replica_reported_threshold)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Target Threshold",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": 
"safe_mode_metrics_current_containers_with_ec_data_replica_reported_count",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}} actual",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "EC Containers: Target vs Actual",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "decimals": 0,
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Pipelines",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Target Threshold"
+            },
+            "properties": [
+              {
+                "id": "custom.lineStyle",
+                "value": {
+                  "dash": [10, 10],
+                  "fill": "dash"
+                }
+              },
+              {
+                "id": "custom.lineWidth",
+                "value": 3
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 },
+      "id": 104,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "max(safe_mode_metrics_num_healthy_pipelines_threshold)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Target Threshold",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": "safe_mode_metrics_current_healthy_pipelines_count",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}} actual",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "Healthy Pipelines: Target vs Actual",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "decimals": 0,
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Pipelines",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false, 
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red", 
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Target Threshold"
+            },
+            "properties": [
+              {
+                "id": "custom.lineStyle",
+                "value": {
+                  "dash": [10, 10],
+                  "fill": "dash"
+                }
+              },
+              {
+                "id": "custom.lineWidth",
+                "value": 3
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 17 },
+      "id": 105,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": 
"max(safe_mode_metrics_num_pipelines_with_atleast_one_replica_reported_threshold)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Target Threshold",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": 
"safe_mode_metrics_current_pipelines_with_atleast_one_replica_reported_count",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}} actual",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "One-Replica Pipelines: Target vs Actual",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "decimals": 0,
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "DataNodes",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Target Threshold"
+            },
+            "properties": [
+              {
+                "id": "custom.lineStyle",
+                "value": {
+                  "dash": [10, 10],
+                  "fill": "dash"
+                }
+              },
+              {
+                "id": "custom.lineWidth",
+                "value": 3
+              },
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 17 },
+      "id": 106,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "max(safe_mode_metrics_num_required_datanodes_threshold)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Target Threshold",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": "safe_mode_metrics_current_registered_datanodes_count",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "{{hostname}} actual",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "Registered DataNodes: Target vs Actual",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "refresh": "45s",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-30m", 
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Ozone - SCM Safemode",
+  "weekStart": ""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to