This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new e2f2aeb2c6 HDDS-11548. Add some logging to the StateMachine (#7291)
e2f2aeb2c6 is described below

commit e2f2aeb2c691aa9ff9a83f86ca75f8ef77762d3f
Author: jianghuazhu <[email protected]>
AuthorDate: Fri Oct 11 18:49:00 2024 +0800

    HDDS-11548. Add some logging to the StateMachine (#7291)
---
 .../server/ratis/ContainerStateMachine.java        |  1 +
 .../hdds/scm/ha/SCMHADBTransactionBufferImpl.java  |  6 ++++
 .../apache/hadoop/hdds/scm/ha/SCMStateMachine.java |  7 +++++
 .../ozone/om/ratis/OzoneManagerStateMachine.java   | 32 ++++++++++++++--------
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index e7eb0aa781..3a48ecaee5 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -284,6 +284,7 @@ public class ContainerStateMachine extends BaseStateMachine 
{
     storage.init(raftStorage);
     ratisServer.notifyGroupAdd(gid);
 
+    LOG.info("{}: initialize {}", server.getId(), id);
     loadSnapshot(storage.getLatestSnapshot());
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
index f1ee76a198..cd0346d72f 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.hdds.utils.TransactionInfo;
 import org.apache.hadoop.hdds.utils.db.BatchOperation;
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.ratis.statemachine.SnapshotInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicLong;
@@ -41,6 +43,8 @@ import static 
org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
  * operation in DB.
  */
 public class SCMHADBTransactionBufferImpl implements SCMHADBTransactionBuffer {
+
+  public static final Logger LOG = 
LoggerFactory.getLogger(SCMHADBTransactionBufferImpl.class);
   private final StorageContainerManager scm;
   private SCMMetadataStore metadataStore;
   private BatchOperation currentBatchOperation;
@@ -107,6 +111,8 @@ public class SCMHADBTransactionBufferImpl implements 
SCMHADBTransactionBuffer {
 
   @Override
   public void setLatestSnapshot(SnapshotInfo latestSnapshot) {
+    LOG.info("{}: Set latest Snapshot to {}",
+        scm.getScmHAManager().getRatisServer().getDivision().getId(), 
latestSnapshot);
     this.latestSnapshot.set(latestSnapshot);
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
index 1128accd2f..5805fe67e4 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
@@ -137,6 +137,7 @@ public class SCMStateMachine extends BaseStateMachine {
     getLifeCycle().startAndTransition(() -> {
       super.initialize(server, id, raftStorage);
       storage.init(raftStorage);
+      LOG.info("{}: initialize {}", server.getId(), id);
     });
   }
 
@@ -149,6 +150,9 @@ public class SCMStateMachine extends BaseStateMachine {
       final SCMRatisRequest request = SCMRatisRequest.decode(
           Message.valueOf(trx.getStateMachineLogEntry().getLogData()));
 
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("{}: applyTransaction {}", getId(), 
TermIndex.valueOf(trx.getLogEntry()));
+      }
       try {
         applyTransactionFuture.complete(process(request));
       } catch (SCMException ex) {
@@ -389,6 +393,7 @@ public class SCMStateMachine extends BaseStateMachine {
   @Override
   public void pause() {
     final LifeCycle lc = getLifeCycle();
+    LOG.info("{}: Try to pause from current LifeCycle state {}", getId(), lc);
     if (lc.getCurrentState() != LifeCycle.State.NEW) {
       lc.transition(LifeCycle.State.PAUSING);
       lc.transition(LifeCycle.State.PAUSED);
@@ -414,6 +419,8 @@ public class SCMStateMachine extends BaseStateMachine {
       throw new IOException(e);
     }
 
+    LOG.info("{}: SCMStateMachine is reinitializing. newTermIndex = {}", 
getId(), termIndex);
+
     // re-initialize the DBTransactionBuffer and update the lastAppliedIndex.
     try {
       transactionBuffer.init();
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
index 463afba942..aa7211fa03 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
@@ -46,6 +46,7 @@ import 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRespo
 import org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler;
 import org.apache.hadoop.ozone.protocolPB.RequestHandler;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.concurrent.HadoopExecutors;
 import org.apache.ratis.proto.RaftProtos;
 import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto;
@@ -88,7 +89,6 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
       new SimpleStateMachineStorage();
   private final OzoneManager ozoneManager;
   private RequestHandler handler;
-  private RaftGroupId raftGroupId;
   private volatile OzoneManagerDoubleBuffer ozoneManagerDoubleBuffer;
   private final ExecutorService executorService;
   private final ExecutorService installSnapshotExecutor;
@@ -134,8 +134,8 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
       RaftStorage raftStorage) throws IOException {
     getLifeCycle().startAndTransition(() -> {
       super.initialize(server, id, raftStorage);
-      this.raftGroupId = id;
       storage.init(raftStorage);
+      LOG.info("{}: initialize {} with {}", getId(), id, 
getLastAppliedTermIndex());
     });
   }
 
@@ -143,8 +143,9 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
   public synchronized void reinitialize() throws IOException {
     loadSnapshotInfoFromDB();
     if (getLifeCycleState() == LifeCycle.State.PAUSED) {
-      unpause(getLastAppliedTermIndex().getIndex(),
-          getLastAppliedTermIndex().getTerm());
+      final TermIndex lastApplied = getLastAppliedTermIndex();
+      unpause(lastApplied.getIndex(), lastApplied.getTerm());
+      LOG.info("{}: reinitialize {} with {}", getId(), getGroupId(), 
lastApplied);
     }
   }
 
@@ -160,6 +161,7 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
                                   RaftPeerId newLeaderId) {
     // Initialize OMHAMetrics
     ozoneManager.omHAMetricsInit(newLeaderId.toString());
+    LOG.info("{}: leader changed to {}", groupMemberId, newLeaderId);
   }
 
   /** Notified by Ratis for non-StateMachine term-index update. */
@@ -263,7 +265,7 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
         messageContent);
 
     Preconditions.checkArgument(raftClientRequest.getRaftGroupId().equals(
-        raftGroupId));
+        getGroupId()));
     try {
       handler.validateRequest(omRequest);
     } catch (IOException ioe) {
@@ -293,6 +295,10 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
 
     OzoneManagerPrepareState prepareState = ozoneManager.getPrepareState();
 
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("{}: preAppendTransaction {}", getId(), 
TermIndex.valueOf(trx.getLogEntry()));
+    }
+
     if (cmdType == OzoneManagerProtocolProtos.Type.Prepare) {
       // Must authenticate prepare requests here, since we must determine
       // whether or not to apply the prepare gate before proceeding with the
@@ -303,8 +309,7 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
       if (ozoneManager.getAclsEnabled()
           && !ozoneManager.isAdmin(userGroupInformation)) {
         String message = "Access denied for user " + userGroupInformation
-            + ". "
-            + "Superuser privilege is required to prepare ozone managers.";
+            + ". Superuser privilege is required to prepare 
upgrade/downgrade.";
         OMException cause =
             new OMException(message, OMException.ResultCodes.ACCESS_DENIED);
         // Leader should not step down because of this failure.
@@ -341,6 +346,7 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
           : OMRatisHelper.convertByteStringToOMRequest(
           trx.getStateMachineLogEntry().getLogData());
       final TermIndex termIndex = TermIndex.valueOf(trx.getLogEntry());
+      LOG.debug("{}: applyTransaction {}", getId(), termIndex);
       // In the current approach we have one single global thread executor.
       // with single thread. Right now this is being done for correctness, as
       // applyTransaction will be run on multiple OM's we want to execute the
@@ -427,12 +433,14 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
    */
   public synchronized void unpause(long newLastAppliedSnaphsotIndex,
       long newLastAppliedSnapShotTermIndex) {
-    LOG.info("OzoneManagerStateMachine is un-pausing");
     if (statePausedCount.decrementAndGet() == 0) {
       getLifeCycle().startAndTransition(() -> {
         this.ozoneManagerDoubleBuffer = buildDoubleBufferForRatis();
         this.setLastAppliedTermIndex(TermIndex.valueOf(
             newLastAppliedSnapShotTermIndex, newLastAppliedSnaphsotIndex));
+        LOG.info("{}: OzoneManagerStateMachine un-pause completed. " +
+            "newLastAppliedSnaphsotIndex: {}, newLastAppliedSnapShotTermIndex: 
{}",
+                getId(), newLastAppliedSnaphsotIndex, 
newLastAppliedSnapShotTermIndex);
       });
     }
   }
@@ -482,15 +490,15 @@ public class OzoneManagerStateMachine extends 
BaseStateMachine {
     final TermIndex applied = getLastAppliedTermIndex();
     final TermIndex notified = getLastNotifiedTermIndex();
     final TermIndex snapshot = applied.compareTo(notified) > 0 ? applied : 
notified;
-    LOG.info(" applied = {}", applied);
-    LOG.info(" skipped = {}", lastSkippedIndex);
-    LOG.info("notified = {}", notified);
-    LOG.info("snapshot = {}", snapshot);
 
+    long startTime = Time.monotonicNow();
     final TransactionInfo transactionInfo = TransactionInfo.valueOf(snapshot);
     ozoneManager.setTransactionInfo(transactionInfo);
     
ozoneManager.getMetadataManager().getTransactionInfoTable().put(TRANSACTION_INFO_KEY,
 transactionInfo);
     ozoneManager.getMetadataManager().getStore().flushDB();
+    LOG.info("{}: taking snapshot. applied = {}, skipped = {}, " +
+        "notified = {}, current snapshot index = {}, took {} ms",
+            getId(), applied, lastSkippedIndex, notified, snapshot, 
Time.monotonicNow() - startTime);
     return snapshot.getIndex();
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to