GlenGeng commented on a change in pull request #1936:
URL: https://github.com/apache/ozone/pull/1936#discussion_r586249039



##########
File path: 
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBStoreHAManager.java
##########
@@ -19,6 +19,7 @@
 
 import org.apache.hadoop.hdds.utils.db.Table;
 
+

Review comment:
       Please undo the change.

##########
File path: 
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
##########
@@ -163,4 +163,6 @@ public static OzoneConfiguration removeSelfId(
     }
     return conf;
   }
+

Review comment:
       Please undo the change.

##########
File path: 
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
##########
@@ -82,4 +98,185 @@ public static ScmBlockLocationProtocol getScmBlockClient(
         .createProxy(scmBlockLocationClient, ScmBlockLocationProtocol.class,
             conf);
   }
+
+  /**
+   * Replace the current DB with the new DB checkpoint.
+   *
+   * @param lastAppliedIndex the last applied index in the current SCM DB.
+   * @param checkpointPath   path to the new DB checkpoint
+   * @return location of backup of the original DB
+   * @throws Exception
+   */
+  public static File replaceDBWithCheckpoint(long lastAppliedIndex,
+      File oldDB, Path checkpointPath, String dbPrefix) throws IOException {
+
+    // Take a backup of the current DB
+    String dbBackupName =
+        dbPrefix + lastAppliedIndex + "_" + System
+            .currentTimeMillis();
+    File dbDir = oldDB.getParentFile();
+    File dbBackup = new File(dbDir, dbBackupName);
+
+    try {
+      Files.move(oldDB.toPath(), dbBackup.toPath());
+    } catch (IOException e) {
+      LOG.error("Failed to create a backup of the current DB. Aborting "
+          + "snapshot installation.");
+      throw e;
+    }
+
+    // Move the new DB checkpoint into the metadata dir
+    Path markerFile = new File(dbDir, DB_TRANSIENT_MARKER).toPath();
+    try {
+      // Create a Transient Marker file. This file will be deleted if the
+      // checkpoint DB is successfully moved to the old DB location or if the
+      // old DB backup is reset to its location. If not, then the DB is in
+      // an inconsistent state and this marker file will fail it from
+      // starting up.
+      Files.createFile(markerFile);
+      Files.move(checkpointPath, oldDB.toPath());
+      Files.deleteIfExists(markerFile);
+    } catch (IOException e) {
+      LOG.error("Failed to move downloaded DB checkpoint {} to metadata "
+              + "directory {}. Resetting to original DB.", checkpointPath,
+          oldDB.toPath());
+      try {
+        Files.move(dbBackup.toPath(), oldDB.toPath());
+        Files.deleteIfExists(markerFile);
+      } catch (IOException ex) {
+        String errorMsg = "Failed to reset to original DB. SCM is in an "
+            + "inconsistent state.";
+        ExitUtils.terminate(1, errorMsg, ex, LOG);
+      }
+      throw e;
+    }
+    return dbBackup;
+  }
+
+  /**
+   * Obtain SCMTransactionInfo from Checkpoint.
+   */
+  public static TransactionInfo getTrxnInfoFromCheckpoint(
+      OzoneConfiguration conf, Path dbPath, DBDefinition definition)
+      throws Exception {
+
+    if (dbPath != null) {
+      Path dbDir = dbPath.getParent();
+      Path dbFile = dbPath.getFileName();
+      if (dbDir != null && dbFile != null) {
+        return getTransactionInfoFromDB(conf, dbDir, dbFile.toString(),
+            definition);
+      }
+    }
+
+    throw new IOException("Checkpoint " + dbPath + " does not have proper " +
+        "DB location");
+  }
+
+  /**
+   * Obtain Transaction info from DB.
+   * @param tempConfig
+   * @param dbDir path to DB
+   * @return TransactionInfo
+   * @throws Exception
+   */
+  private static TransactionInfo getTransactionInfoFromDB(
+      OzoneConfiguration tempConfig, Path dbDir, String dbName,
+      DBDefinition definition)
+      throws Exception {
+    DBStore dbStore = loadDB(tempConfig, dbDir.toFile(),
+        dbName, definition);
+
+    // Get the table name with TransactionInfo as the value. The transaction
+    // info table name are different in SCM and SCM.
+
+    // In case, a new table gets added where the value is TransactionInfo, this
+    // logic may not work.
+
+
+    Table<String, TransactionInfo> transactionInfoTable =
+        getTransactionInfoTable(dbStore, definition);
+
+    TransactionInfo transactionInfo =
+        transactionInfoTable.get(TRANSACTION_INFO_KEY);
+    dbStore.close();
+
+    if (transactionInfo == null) {
+      throw new IOException("Failed to read TransactionInfo from DB " +
+          definition.getName() + " at " + dbDir);
+    }
+    return transactionInfo;
+  }
+
+  public static Table<String, TransactionInfo> getTransactionInfoTable(
+      DBStore dbStore, DBDefinition definition) throws IOException {
+    return Arrays.stream(definition.getColumnFamilies())
+        .filter(t -> t.getValueType() == TransactionInfo.class).findFirst()
+        .get().getTable(dbStore);
+  }
+  /**

Review comment:
       Add a line here.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
##########
@@ -248,4 +248,9 @@ public boolean shouldRun() {
   public String getServiceName() {
     return SCMBlockDeletingService.class.getSimpleName();
   }
+
+  @Override
+  public void stop() {
+    shutdown();

Review comment:
       Replace `shutdown();` with `throw new RuntimeException("Un-supported 
operation.");`, in case it is called in future by someone accidentally.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java
##########
@@ -206,6 +206,15 @@ public void onFlush() {
     skippingRetryTxIDs.clear();
   }
 
+  @Override
+  public void reinitialize(
+      Table<Long, DeletedBlocksTransaction> deletedBlocksTXTable) {
+    // Before Reinitilization, flush will be called from Ratis StateMachine.
+    // Just the DeletedDb will be loaded here.
+    Preconditions.checkArgument(deletingTxIDs.isEmpty());
+    this.deletedTable = deletedBlocksTXTable;

Review comment:
       ```
   // We don't need to handle transactionBuffer, deletingTxIDs and 
skippingRetryTxIDs here, 
   // since onFlush() will be called before reinitialization. Just update 
deletedTable here.
   ```

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java
##########
@@ -128,6 +124,21 @@ public void addContainer(final ContainerInfo info)
     }
   }
 
+  /**
+   * The idea here is to rebuild the containerStateMap once the SCM 
StateMachine
+   * reloads from the the DBCheckpoint downloaded from leader SCM.
+   *
+   * Replica Map won't be rebuild but will remain as it is.
+   */
+  private void loadFromDB(final ContainerInfo info) throws SCMException {

Review comment:
       I guess we don't need this change, right ?
   and `loadFromDB` is not a straightforward name, since no db here in the 
function.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java
##########
@@ -16,6 +16,7 @@
  */
 package org.apache.hadoop.hdds.scm.ha;
 
+

Review comment:
       undo this change.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
##########
@@ -1110,7 +1114,6 @@ public void stop() {
 
     scmSafeModeManager.stop();
   }
-

Review comment:
       better have the empty line

##########
File path: 
hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ha/utils/package-info.java
##########
@@ -0,0 +1,17 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.

Review comment:
       An empty dir, better remove it.

##########
File path: 
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
##########
@@ -24,13 +25,28 @@
 import 
org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
 import org.apache.hadoop.hdds.tracing.TracingUtil;
+import org.apache.hadoop.hdds.utils.db.*;

Review comment:
       expand the `db.*`

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
##########
@@ -342,6 +342,12 @@ public int getNumOfValidTransactions() throws IOException {
     }
   }
 
+  @Override
+  public void reinitialize(
+      Table<Long, DeletedBlocksTransaction> deletedBlocksTXTable) {
+

Review comment:
       in case it is called accidentally in future.
   ```
   throw new RuntimeException("Un-supported operation.");
   ```

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/MockSCMHAManager.java
##########
@@ -127,72 +133,55 @@ public void start() {
 
     @Override
     public void registerStateMachineHandler(final RequestType handlerType,
-                                            final Object handler) {
+        final Object handler) {
       handlers.put(handlerType, handler);
     }
 
     @Override
     public SCMRatisResponse submitRequest(final SCMRatisRequest request)
         throws IOException {
-      final RaftGroupMemberId raftId = RaftGroupMemberId.valueOf(
-          RaftPeerId.valueOf("peer"), RaftGroupId.randomId());
+      final RaftGroupMemberId raftId = RaftGroupMemberId

Review comment:
       Please undo the change within this file. There are quite a few affected 
code, but no real change.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/MockSCMHAManager.java
##########
@@ -204,32 +193,29 @@ private Message process(final SCMRatisRequest request)
       }
     }
 
-    @Override
-    public void stop() {
+    @Override public void stop() {

Review comment:
       Undo such changes ?

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImplV2.java
##########
@@ -298,6 +298,13 @@ public int getNumOfValidTransactions() throws IOException {
     }
   }
 
+  @Override
+  public void reinitialize(
+      Table<Long, DeletedBlocksTransaction> deletedTable) {
+    this.deletedBlocksTXTable = deletedTable;

Review comment:
       no need `this.deletedBlocksTXTable = deletedTable;`
   
   it is not used in DeletedBlockLogImplV2, and already deleted in PR 
`HDDS-4651: Distributed Sequence ID Gen`.
   
   Please add a comment
   ```
   // we don't need handle transactionToDNsCommitMap and 
deletedBlockLogStateManager, since they will be cleared when becoming leader.
   ```

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManager.java
##########
@@ -52,4 +53,6 @@
   void shutdown() throws IOException;
 
   boolean addSCM(AddSCMRequest request) throws IOException;
+
+  TermIndex installSnapshotFromLeader(String leaderId);

Review comment:
       miss java doc

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java
##########
@@ -498,6 +498,22 @@ public void removeContainer(final HddsProtos.ContainerID 
id)
     }
   }
 
+  @Override
+  public void reinitialize(
+      Table<ContainerID, ContainerInfo> store) throws IOException {
+    lock.writeLock().lock();
+    try {
+      close();
+      containers = null;

Review comment:
       remove `containers = null;`

##########
File path: 
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateServer.java
##########
@@ -130,6 +131,9 @@ X509Certificate getCertificate(String certSerialId)
   List<X509Certificate> listCertificate(NodeType role,
       long startSerialId, int count, boolean isRevoked) throws IOException;
 
+
+  void reinitialize(SCMMetadataStore scmMetadataStore);

Review comment:
       miss java doc.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerV2.java
##########
@@ -173,6 +174,9 @@ ContainerInfo getMatchingContainer(long size, String owner,
   void removeContainer(HddsProtos.ContainerID containerInfo)
       throws IOException;
 
+  void reinitialize(Table<ContainerID, ContainerInfo> containerStore)

Review comment:
       How about add a
   ```
   /**
    *
    */
   ```

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerV2.java
##########
@@ -40,6 +41,8 @@
 public interface ContainerManagerV2 extends Closeable {
   // TODO: Rename this to ContainerManager
 
+  void reinitialize(Table<ContainerID, ContainerInfo> containerStore)

Review comment:
       miss java doc

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
##########
@@ -122,4 +123,7 @@ default void waitPipelineReady(PipelineID pipelineID, long 
timeout)
    * @return boolean
    */
   boolean getSafeModeStatus();
+
+  void reinitialize(Table<PipelineID, Pipeline> pipelineStore)

Review comment:
       miss java doc

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java
##########
@@ -121,6 +121,20 @@ public ContainerManagerImpl(
     this.scmContainerManagerMetrics = SCMContainerManagerMetrics.create();
   }
 
+  @Override
+  public void reinitialize(Table<ContainerID, ContainerInfo> containerStore)
+      throws IOException {
+    lock.lock();
+    try {
+      containerStateManager.reinitialize(containerStore);
+    } catch (IOException ioe) {
+      LOG.error("Failed to reinitialize container Manager", ioe);

Review comment:
       `container Manager` -> `ContainerManager`

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java
##########
@@ -62,4 +63,8 @@ default void notifyEventTriggered(Event event) {
     NEW_NODE_HANDLER_TRIGGERED,
     UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED
   }
+
+  void start();
+
+  void stop();

Review comment:
       miss java doc

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/DBTransactionBuffer.java
##########
@@ -41,4 +41,8 @@
   void setLatestSnapshot(SnapshotInfo latestSnapshot);
 
   void flush() throws IOException;
+
+  void init() throws IOException;
+
+  void close() throws IOException;

Review comment:
       remove close here, since already `extends Closeable`.

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManagerV2Impl.java
##########
@@ -326,8 +326,25 @@ public void deactivatePipeline(PipelineID pipelineID) 
throws IOException {
     throw new IOException("Not supported.");
   }
 
-  // legacy interfaces end
+  @Override
+  public void reinitialize(Table<PipelineID, Pipeline> store)
+      throws IOException {
+    lock.writeLock().lock();
+    try {
+      pipelineStore.close();
+      this.pipelineStateMap = null;

Review comment:
       why need explicitly set `this.pipelineStateMap ` to null 

##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java
##########
@@ -759,6 +759,11 @@ public boolean getSafeModeStatus() {
     return this.isInSafeMode.get();
   }
 
+  @Override
+  public void reinitialize(Table<PipelineID, Pipeline> store)
+      throws IOException {
+  }

Review comment:
       For safety,
   ```
   throw new RuntimeException("Un-supported operation.");
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to