virajjasani commented on code in PR #5349:
URL: https://github.com/apache/hadoop/pull/5349#discussion_r1096487189
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java:
##########
@@ -294,4 +297,107 @@ public void testDataNodeMXBeanSlowDisksEnabled() throws
Exception {
if (cluster != null) {cluster.shutdown();}
}
}
+
+ @Test
+ public void testDataNodeMXBeanLastHeartbeats() throws Exception {
+ Configuration conf = new Configuration();
+ try (MiniDFSCluster cluster = new MiniDFSCluster
+ .Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology(2))
+ .numDataNodes(1)
+ .build()) {
+ cluster.waitActive();
+ cluster.transitionToActive(0);
+ cluster.transitionToStandby(1);
+
+ DataNode datanode = cluster.getDataNodes().get(0);
+
+ MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+ ObjectName mxbeanName = new ObjectName(
+ "Hadoop:service=DataNode,name=DataNodeInfo");
+
+ // Verify and wait until one of the BP service actor identifies active
namenode as active
+ // and another as standby.
+ GenericTestUtils.waitFor(() -> {
+ List<Map<String, String>> bpServiceActorInfo =
datanode.getBPServiceActorInfoMap();
+ Map<String, String> bpServiceActorInfo1 = bpServiceActorInfo.get(0);
+ Map<String, String> bpServiceActorInfo2 = bpServiceActorInfo.get(1);
+ return (HAServiceProtocol.HAServiceState.ACTIVE.toString()
+ .equals(bpServiceActorInfo1.get("NamenodeHaState"))
+ && HAServiceProtocol.HAServiceState.STANDBY.toString()
+ .equals(bpServiceActorInfo2.get("NamenodeHaState")))
+ || (HAServiceProtocol.HAServiceState.ACTIVE.toString()
+ .equals(bpServiceActorInfo2.get("NamenodeHaState"))
+ && HAServiceProtocol.HAServiceState.STANDBY.toString()
+ .equals(bpServiceActorInfo1.get("NamenodeHaState")));
+ },
+ 500,
+ 8000,
+ "No namenode is reported active");
+
+ // basic metrics validation
+ String clusterId = (String) mbs.getAttribute(mxbeanName, "ClusterId");
+ Assert.assertEquals(datanode.getClusterId(), clusterId);
+ String version = (String)mbs.getAttribute(mxbeanName, "Version");
+ Assert.assertEquals(datanode.getVersion(),version);
+ String bpActorInfo = (String) mbs.getAttribute(mxbeanName,
"BPServiceActorInfo");
+ Assert.assertEquals(datanode.getBPServiceActorInfo(), bpActorInfo);
+
+ // Verify that last heartbeat sent to both namenodes in last 5 sec.
+ assertLastHeartbeatSentTime(datanode, "LastHeartbeat");
+ // Verify that last heartbeat response from both namenodes have been
received within
+ // last 5 sec.
+ assertLastHeartbeatSentTime(datanode, "LastHeartbeatResponseTime");
+
+
+ NameNode sbNameNode = cluster.getNameNode(1);
+
+ // Stopping standby namenode
+ sbNameNode.stop();
+
+ // Verify that last heartbeat response time from one of the namenodes
would stay much higher
+ // after stopping one namenode.
+ GenericTestUtils.waitFor(() -> {
+ List<Map<String, String>> bpServiceActorInfo =
datanode.getBPServiceActorInfoMap();
+ Map<String, String> bpServiceActorInfo1 = bpServiceActorInfo.get(0);
+ Map<String, String> bpServiceActorInfo2 = bpServiceActorInfo.get(1);
+
+ long lastHeartbeatResponseTime1 =
+
Long.parseLong(bpServiceActorInfo1.get("LastHeartbeatResponseTime"));
+ long lastHeartbeatResponseTime2 =
+
Long.parseLong(bpServiceActorInfo2.get("LastHeartbeatResponseTime"));
+
+ LOG.info("Last heartbeat response from namenode 1: {}",
lastHeartbeatResponseTime1);
+ LOG.info("Last heartbeat response from namenode 2: {}",
lastHeartbeatResponseTime2);
+
+ return (lastHeartbeatResponseTime1 < 5L && lastHeartbeatResponseTime2
> 5L) || (
+ lastHeartbeatResponseTime1 > 5L && lastHeartbeatResponseTime2 <
5L);
+
+ },
+ 200,
+ 15000,
+ "Last heartbeat response should be higher than 5s for at least one
namenode");
+
+ // Verify that last heartbeat sent to both namenodes in last 5 sec even
though
+ // the last heartbeat received from one of the namenodes is greater than
5 sec ago.
+ assertLastHeartbeatSentTime(datanode, "LastHeartbeat");
+ }
+ }
+
+ private static void assertLastHeartbeatSentTime(DataNode datanode, String
lastHeartbeat) {
+ List<Map<String, String>> bpServiceActorInfo =
datanode.getBPServiceActorInfoMap();
+ Map<String, String> bpServiceActorInfo1 = bpServiceActorInfo.get(0);
+ Map<String, String> bpServiceActorInfo2 = bpServiceActorInfo.get(1);
+
+ long lastHeartbeatSent1 =
+ Long.parseLong(bpServiceActorInfo1.get(lastHeartbeat));
+ long lastHeartbeatSent2 =
+ Long.parseLong(bpServiceActorInfo2.get(lastHeartbeat));
+
+ Assert.assertTrue(lastHeartbeat + " for first bp service actor is higher
than 5s",
+ lastHeartbeatSent1 < 5L);
+ Assert.assertTrue(lastHeartbeat + " for second bp service actor is higher
than 5s",
+ lastHeartbeatSent2 < 5L);
Review Comment:
Nice question, I ran the test little more than 35 times so far locally and
the heartbeat has not been more than 3s, so I feel 5s is good value. But happy
to change if you have any other suggestions.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]