ayushtkn commented on code in PR #5432:
URL: https://github.com/apache/hadoop/pull/5432#discussion_r1121273716


##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java:
##########
@@ -1101,15 +1099,12 @@ public void testReportBadBlocks() throws Exception {
 
       block = DFSTestUtil.getFirstBlock(fs, filePath);
       // Test for the overloaded method reportBadBlocks
-      dataNode.reportBadBlocks(block, dataNode.getFSDataset()
-          .getFsVolumeReferences().get(0));
-      Thread.sleep(3000);
-      BlockManagerTestUtil.updateState(cluster.getNamesystem()
-          .getBlockManager());
-      // Verify the bad block has been reported to namenode
-      Assert.assertEquals(1, 
cluster.getNamesystem().getCorruptReplicaBlocks());
-    } finally {
-      cluster.shutdown();
+      dataNode.reportBadBlocks(block, 
dataNode.getFSDataset().getFsVolumeReferences().get(0));
+      GenericTestUtils.waitFor(() -> {
+        
BlockManagerTestUtil.updateState(cluster.getNamesystem().getBlockManager());
+        // Verify the bad block has been reported to namenode
+        return 1 == cluster.getNamesystem().getCorruptReplicaBlocks();
+      }, 100, 10000, "Corrupted replica blocks could not be found");

Review Comment:
   I gave it a try and was able to repro this. With **triggerHeartbeat**, it 
worked for me. I think that is a standard practice for such cases running since 
legacy time for such cases(At least my time). Viraj I tried to reduce the 
ibrManager.wait(100) to even 1 and it still passes for me. I couldn't repro the 
case you are mentioning, do you intend to say if we put a sleep just before 
processQueueMessages, things should screw up? 
   
   Well touching that util for our use case won't be a good idea. If it works 
for others, the intent of that util is to just make sure the heartbeat got sent 
and if it does that correctly, means doesn't return before sending heartbeat, 
it is good enough to stay as is. If something breaks then it is on the guy 
using it, can't blame this util.
   
   If odds are against triggerHeartbeat, I would still try to avoid this sleep, 
Is something like this an option
   ```
   diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
   index fdd66cb05d3..1dc86adafc8 100644
   --- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
   +++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
   @@ -76,6 +76,10 @@
      private volatile String bpId;
      private final DataNode dn;
    
   +  BPServiceActor getBpServiceToActive() {
   +    return bpServiceToActive;
   +  }
   +
      /**
       * A reference to the BPServiceActor associated with the currently
       * ACTIVE NN. In the case that all NameNodes are in STANDBY mode,
   diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
   index e9f424604b4..456826f630c 100755
   --- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
   +++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
   @@ -1001,7 +1001,8 @@ public void bpThreadEnqueue(BPServiceActorAction 
action) {
        }
      }
    
   -  private void processQueueMessages() {
   +  @VisibleForTesting
   +  void processQueueMessages() {
        LinkedList<BPServiceActorAction> duplicateQueue;
        synchronized (bpThreadQueue) {
          duplicateQueue = new LinkedList<BPServiceActorAction>(bpThreadQueue);
   diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java
   index dba5a146f0c..b70d9afd30e 100644
   --- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java
   +++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java
   @@ -269,4 +269,13 @@ public Boolean get() {
          }
        }, 100, 10000);
      }
   +
   +  /**
   +   * Processes all the enqueued messages to active namenode
   +   * @param dataNode the datanode.
   +   */
   +  public static void processQueueMessagesToActive(DataNode dataNode) {
   +    dataNode.getAllBpOs().forEach(bpos -> 
bpos.getBpServiceToActive().processQueueMessages());
   +  }
   +
    }
   diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
   index d6f42f3d020..a4d400f2777 100644
   --- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
   +++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
   @@ -1101,9 +1101,8 @@ public void testReportBadBlocks() throws Exception {
    
          block = DFSTestUtil.getFirstBlock(fs, filePath);
          // Test for the overloaded method reportBadBlocks
   -      dataNode.reportBadBlocks(block, dataNode.getFSDataset()
   -          .getFsVolumeReferences().get(0));
   -      Thread.sleep(3000);
   +      dataNode.reportBadBlocks(block, 
dataNode.getFSDataset().getFsVolumeReferences().get(0));
   +      DataNodeTestUtils.processQueueMessagesToActive(dataNode);
          BlockManagerTestUtil.updateState(cluster.getNamesystem()
              .getBlockManager());
          // Verify the bad block has been reported to namenode
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to