goiri commented on code in PR #6018:
URL: https://github.com/apache/hadoop/pull/6018#discussion_r1315126577


##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java:
##########
@@ -199,28 +202,36 @@ private int doRun() throws IOException {
       return ERR_CODE_FAILED_CONNECT;
     }
 
-    if (!checkLayoutVersion(nsInfo)) {
-      LOG.error("Layout version on remote node (" + nsInfo.getLayoutVersion()
-          + ") does not match " + "this node's layout version ("
-          + HdfsServerConstants.NAMENODE_LAYOUT_VERSION + ")");
+    if (!checkLayoutVersion(nsInfo, isRollingUpgrade)) {
+      String msg = isRollingUpgrade ? "Layout version on remote node in 
rolling"

Review Comment:
   This is not very readable to be honest.
   Let's add a clear if and use the logger {} if possible.



##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java:
##########
@@ -405,8 +422,16 @@ private boolean checkLogsAvailableForRead(FSImage image, 
long imageTxId,
     }
   }
 
-  private boolean checkLayoutVersion(NamespaceInfo nsInfo) throws IOException {
-    return (nsInfo.getLayoutVersion() == 
HdfsServerConstants.NAMENODE_LAYOUT_VERSION);
+  private boolean checkLayoutVersion(NamespaceInfo nsInfo, boolean 
isRollingUpgrade) {
+    if (isRollingUpgrade) {
+      // During a rolling upgrade the service layout versions may be different,
+      // but we should check that the layout version being sent is compatible
+      return nsInfo.getLayoutVersion() <
+          HdfsServerConstants.MINIMUM_COMPATIBLE_NAMENODE_LAYOUT_VERSION;
+    } else {
+      return nsInfo.getLayoutVersion() ==

Review Comment:
   Avoid the else and single line.



##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java:
##########
@@ -172,6 +182,67 @@ public void testDownloadingLaterCheckpoint() throws 
Exception {
     restartNameNodesFromIndex(1);
   }
 
+  /**
+   * Test for downloading a checkpoint made at a later checkpoint
+   * from the active.

Review Comment:
   I guess there is more to this than this.



##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java:
##########
@@ -172,6 +182,67 @@ public void testDownloadingLaterCheckpoint() throws 
Exception {
     restartNameNodesFromIndex(1);
   }
 
+  /**
+   * Test for downloading a checkpoint made at a later checkpoint
+   * from the active.
+   */
+  @Test
+  public void testRollingUpgradeBootstrapStandby() throws Exception {
+    removeStandbyNameDirs();
+
+    int futureVersion = NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1;
+
+    DistributedFileSystem fs = cluster.getFileSystem(0);
+    fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+    fs.saveNamespace();
+    fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+
+    // Setup BootstrapStandby to think it is a future NameNode version
+    BootstrapStandby bs = spy(new BootstrapStandby());
+    Answer<NNStorage> storageAnswer = storage ->  {
+      NNStorage storageSpy = (NNStorage) spy(storage.callRealMethod());
+      doReturn(futureVersion).when(storageSpy).getServiceLayoutVersion();
+      return storageSpy;
+    };
+    doAnswer(storageAnswer).when(bs).getStorage(any(), any(), any());
+
+    bs.setConf(cluster.getConfiguration(1));
+
+    // Start rolling upgrade
+    fs.rollingUpgrade(RollingUpgradeAction.PREPARE);
+    nn0 = spy(nn0);
+
+    // Make nn0 think it is a future version
+    Answer<FSImage> fsImageAnswer = fsImage -> {
+      FSImage fsImageSpy = (FSImage) spy(fsImage.callRealMethod());
+      doAnswer(storageAnswer).when(fsImageSpy).getStorage();
+      return fsImageSpy;
+    };
+    doAnswer(fsImageAnswer).when(nn0).getFSImage();
+
+    // Roll edit logs a few times to inflate txid
+    nn0.getRpcServer().rollEditLog();
+    nn0.getRpcServer().rollEditLog();
+    // Make checkpoint
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    NameNodeAdapter.saveNamespace(nn0);
+    NameNodeAdapter.leaveSafeMode(nn0);
+
+    long expectedCheckpointTxId = NameNodeAdapter.getNamesystem(nn0)
+        .getFSImage().getMostRecentCheckpointTxId();
+    assertEquals(11, expectedCheckpointTxId);
+
+    bs.run(new String[]{"-force"});
+    FSImageTestUtil.assertNNHasCheckpoints(cluster, 1,
+        ImmutableList.of((int) expectedCheckpointTxId));
+
+    // Make sure the bootstrap was successful
+    FSImageTestUtil.assertNNFilesMatch(cluster);
+
+    // We should now be able to start the standby successfully
+    cluster.restartNameNode(1, true, "-rollingUpgrade", "started");

Review Comment:
   Anything we should assert right after?



##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java:
##########
@@ -1133,11 +1140,13 @@ LayoutVersion.Feature.TXID_BASED_LAYOUT, 
getLayoutVersion())) {
 
   @Override
   public NamespaceInfo getNamespaceInfo() {
-    return new NamespaceInfo(
+    NamespaceInfo nsInfo = new NamespaceInfo(

Review Comment:
   At some point it may make sense to extend the constructor to support storage 
info.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to