This is an automated email from the ASF dual-hosted git repository.
ivandika pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 2c0e0b1563 HDDS-11272. Statistics some node status information (#7025)
2c0e0b1563 is described below
commit 2c0e0b156398aee24e8f2f66c925e857fec34aa4
Author: jianghuazhu <[email protected]>
AuthorDate: Tue Aug 6 18:19:25 2024 +0800
HDDS-11272. Statistics some node status information (#7025)
---
.../apache/hadoop/hdds/scm/node/DatanodeInfo.java | 13 ++++++
.../hadoop/hdds/scm/node/NodeStateManager.java | 52 ++++++++++++++++++++++
.../hadoop/hdds/scm/node/SCMNodeManager.java | 30 +++++++++++++
.../main/resources/webapps/scm/scm-overview.html | 32 ++++++++++++-
.../src/main/resources/webapps/scm/scm.js | 35 +++++++++++----
5 files changed, 152 insertions(+), 10 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java
index ab296fc52b..05ed833edb 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java
@@ -227,6 +227,19 @@ public class DatanodeInfo extends DatanodeDetails {
}
}
+ /**
+ * Returns count of failed volumes reported by the data node.
+ * @return count of failed volumes
+ */
+ public int getFailedVolumeCount() {
+ try {
+ lock.readLock().lock();
+ return failedVolumeCount;
+ } finally {
+ lock.readLock().unlock();
+ }
+ }
+
/**
* Returns count of healthy metadata volumes reported from datanode.
* @return count of healthy metdata log volumes
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
index 3307a292dc..3c3ff8fb83 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
@@ -29,6 +29,7 @@ import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
+import java.util.stream.Collectors;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -477,6 +478,38 @@ public class NodeStateManager implements Runnable,
Closeable {
return getNodes(null, NodeState.DEAD);
}
+ /**
+ * Returns all nodes that are in the decommissioning state.
+ * @return list of decommissioning nodes
+ */
+ public List<DatanodeInfo> getDecommissioningNodes() {
+ return getNodes(NodeOperationalState.DECOMMISSIONING, null);
+ }
+
+ /**
+ * Returns the count of decommissioning nodes.
+ * @return decommissioning node count
+ */
+ public int getDecommissioningNodeCount() {
+ return getDecommissioningNodes().size();
+ }
+
+ /**
+ * Returns all nodes that are in the entering maintenance state.
+ * @return list of entering maintenance nodes
+ */
+ public List<DatanodeInfo> getEnteringMaintenanceNodes() {
+ return getNodes(NodeOperationalState.ENTERING_MAINTENANCE, null);
+ }
+
+ /**
+ * Returns the count of entering maintenance nodes.
+ * @return entering maintenance node count
+ */
+ public int getEnteringMaintenanceNodeCount() {
+ return getEnteringMaintenanceNodes().size();
+ }
+
/**
* Returns all the nodes with the specified status.
*
@@ -501,6 +534,25 @@ public class NodeStateManager implements Runnable,
Closeable {
return nodeStateMap.getDatanodeInfos(opState, health);
}
+ /**
+ * Returns all nodes that contain failed volumes.
+ * @return list of nodes containing failed volumes
+ */
+ public List<DatanodeInfo> getVolumeFailuresNodes() {
+ List<DatanodeInfo> allNodes = nodeStateMap.getAllDatanodeInfos();
+ List<DatanodeInfo> failedVolumeNodes = allNodes.stream().
+ filter(dn -> dn.getFailedVolumeCount() >
0).collect(Collectors.toList());
+ return failedVolumeNodes;
+ }
+
+ /**
+ * Returns the count of nodes containing the failed volume.
+ * @return failed volume node count
+ */
+ public int getVolumeFailuresNodeCount() {
+ return getVolumeFailuresNodes().size();
+ }
+
/**
* Returns all the nodes which have registered to NodeStateManager.
*
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index 038f76b52e..3339b27f2c 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -1223,6 +1223,8 @@ public class SCMNodeManager implements NodeManager {
Map<String, String> nodeStatistics = new HashMap<>();
// Statistics node usaged
nodeUsageStatistics(nodeStatistics);
+ // Statistics node states
+ nodeStateStatistics(nodeStatistics);
// todo: Statistics of other instances
return nodeStatistics;
}
@@ -1265,6 +1267,19 @@ public class SCMNodeManager implements NodeManager {
nodeStatics.put(UsageStatics.STDEV.getLabel(), decimalFormat.format(dev));
}
+ private void nodeStateStatistics(Map<String, String> nodeStatics) {
+ int healthyNodeCount = nodeStateManager.getHealthyNodeCount();
+ int deadNodeCount = nodeStateManager.getDeadNodeCount();
+ int decommissioningNodeCount =
nodeStateManager.getDecommissioningNodeCount();
+ int enteringMaintenanceNodeCount =
nodeStateManager.getEnteringMaintenanceNodeCount();
+ int volumeFailuresNodeCount =
nodeStateManager.getVolumeFailuresNodeCount();
+ nodeStatics.put(StateStatistics.HEALTHY.getLabel(),
String.valueOf(healthyNodeCount));
+ nodeStatics.put(StateStatistics.DEAD.getLabel(),
String.valueOf(deadNodeCount));
+ nodeStatics.put(StateStatistics.DECOMMISSIONING.getLabel(),
String.valueOf(decommissioningNodeCount));
+ nodeStatics.put(StateStatistics.ENTERING_MAINTENANCE.getLabel(),
String.valueOf(enteringMaintenanceNodeCount));
+ nodeStatics.put(StateStatistics.VOLUME_FAILURES.getLabel(),
String.valueOf(volumeFailuresNodeCount));
+ }
+
/**
* Based on the current time and the last heartbeat, calculate the time
difference
* and get a string of the relative value. E.g. "2s ago", "1m 2s ago", etc.
@@ -1346,6 +1361,21 @@ public class SCMNodeManager implements NodeManager {
}
}
+ private enum StateStatistics {
+ HEALTHY("Healthy"),
+ DEAD("Dead"),
+ DECOMMISSIONING("Decommissioning"),
+ ENTERING_MAINTENANCE("EnteringMaintenance"),
+ VOLUME_FAILURES("VolumeFailures");
+ private String label;
+ public String getLabel() {
+ return label;
+ }
+ StateStatistics(String label) {
+ this.label = label;
+ }
+ }
+
/**
* Returns the min of no healthy volumes reported out of the set
* of datanodes constituting the pipeline.
diff --git
a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html
b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html
index 67655b539f..5a4f2ff633 100644
--- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html
+++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html
@@ -28,7 +28,7 @@
</tbody>
</table>
-<h2>Statistics</h2>
+<h2>Usage Statistics</h2>
<table class="table table-bordered table-striped">
<tbody>
<tr>
@@ -54,6 +54,36 @@
</tbody>
</table>
+<h2>State Statistics</h2>
+<table class="table table-bordered table-striped">
+ <tbody>
+ <tr>
+ <th>Datanode State</th>
+ <th>Count</th>
+ </tr>
+ <tr>
+ <td>Healthy Nodes</td>
+ <td>{{statistics.nodes.state.healthy}}</td>
+ </tr>
+ <tr>
+ <td>Dead Nodes</td>
+ <td>{{statistics.nodes.state.dead}}</td>
+ </tr>
+ <tr>
+ <td>Decommissioning Nodes</td>
+ <td>{{statistics.nodes.state.decommissioning}}</td>
+ </tr>
+ <tr>
+ <td>Entering Maintenance Nodes</td>
+ <td>{{statistics.nodes.state.enteringmaintenance}}</td>
+ </tr>
+ <tr>
+ <td>Volume Failures Nodes</td>
+ <td>{{statistics.nodes.state.volumefailures}}</td>
+ </tr>
+ </tbody>
+</table>
+
<h2>Node Status</h2>
<div class="row">
<div class="col-md-6 text-left">
diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js
b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js
index 8ca9fb257c..41dc25cb65 100644
--- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js
+++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js
@@ -39,6 +39,13 @@
max : "N/A",
median : "N/A",
stdev : "N/A"
+ },
+ state : {
+ healthy : "N/A",
+ dead : "N/A",
+ decommissioning : "N/A",
+ enteringmaintenance : "N/A",
+ volumefailures : "N/A"
}
}
}
@@ -92,15 +99,25 @@
$scope.lastIndex = Math.ceil(nodeStatusCopy.length /
$scope.RecordsToDisplay);
$scope.nodeStatus = nodeStatusCopy.slice(0,
$scope.RecordsToDisplay);
-
ctrl.nodemanagermetrics.NodeStatistics.forEach(function(obj) {
- if(obj.key == "Min") {
- $scope.statistics.nodes.usages.min = obj.value;
- } else if(obj.key == "Max") {
- $scope.statistics.nodes.usages.max = obj.value;
- } else if(obj.key == "Median") {
- $scope.statistics.nodes.usages.median = obj.value;
- } else if(obj.key == "Stdev") {
- $scope.statistics.nodes.usages.stdev = obj.value;
+ ctrl.nodemanagermetrics.NodeStatistics.forEach(({key,
value}) => {
+ if(key == "Min") {
+ $scope.statistics.nodes.usages.min = value;
+ } else if(key == "Max") {
+ $scope.statistics.nodes.usages.max = value;
+ } else if(key == "Median") {
+ $scope.statistics.nodes.usages.median = value;
+ } else if(key == "Stdev") {
+ $scope.statistics.nodes.usages.stdev = value;
+ } else if(key == "Healthy") {
+ $scope.statistics.nodes.state.healthy = value;
+ } else if(key == "Dead") {
+ $scope.statistics.nodes.state.dead = value;
+ } else if(key == "Decommissioning") {
+ $scope.statistics.nodes.state.decommissioning =
value;
+ } else if(key == "EnteringMaintenance") {
+ $scope.statistics.nodes.state.enteringmaintenance
= value;
+ } else if(key == "VolumeFailures") {
+ $scope.statistics.nodes.state.volumefailures =
value;
}
});
});
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]