This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e2f2aeb2c6 HDDS-11548. Add some logging to the StateMachine (#7291)
e2f2aeb2c6 is described below
commit e2f2aeb2c691aa9ff9a83f86ca75f8ef77762d3f
Author: jianghuazhu <[email protected]>
AuthorDate: Fri Oct 11 18:49:00 2024 +0800
HDDS-11548. Add some logging to the StateMachine (#7291)
---
.../server/ratis/ContainerStateMachine.java | 1 +
.../hdds/scm/ha/SCMHADBTransactionBufferImpl.java | 6 ++++
.../apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 7 +++++
.../ozone/om/ratis/OzoneManagerStateMachine.java | 32 ++++++++++++++--------
4 files changed, 34 insertions(+), 12 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index e7eb0aa781..3a48ecaee5 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -284,6 +284,7 @@ public class ContainerStateMachine extends BaseStateMachine
{
storage.init(raftStorage);
ratisServer.notifyGroupAdd(gid);
+ LOG.info("{}: initialize {}", server.getId(), id);
loadSnapshot(storage.getLatestSnapshot());
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
index f1ee76a198..cd0346d72f 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.hdds.utils.TransactionInfo;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.ratis.statemachine.SnapshotInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
@@ -41,6 +43,8 @@ import static
org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
* operation in DB.
*/
public class SCMHADBTransactionBufferImpl implements SCMHADBTransactionBuffer {
+
+ public static final Logger LOG =
LoggerFactory.getLogger(SCMHADBTransactionBufferImpl.class);
private final StorageContainerManager scm;
private SCMMetadataStore metadataStore;
private BatchOperation currentBatchOperation;
@@ -107,6 +111,8 @@ public class SCMHADBTransactionBufferImpl implements
SCMHADBTransactionBuffer {
@Override
public void setLatestSnapshot(SnapshotInfo latestSnapshot) {
+ LOG.info("{}: Set latest Snapshot to {}",
+ scm.getScmHAManager().getRatisServer().getDivision().getId(),
latestSnapshot);
this.latestSnapshot.set(latestSnapshot);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
index 1128accd2f..5805fe67e4 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
@@ -137,6 +137,7 @@ public class SCMStateMachine extends BaseStateMachine {
getLifeCycle().startAndTransition(() -> {
super.initialize(server, id, raftStorage);
storage.init(raftStorage);
+ LOG.info("{}: initialize {}", server.getId(), id);
});
}
@@ -149,6 +150,9 @@ public class SCMStateMachine extends BaseStateMachine {
final SCMRatisRequest request = SCMRatisRequest.decode(
Message.valueOf(trx.getStateMachineLogEntry().getLogData()));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("{}: applyTransaction {}", getId(),
TermIndex.valueOf(trx.getLogEntry()));
+ }
try {
applyTransactionFuture.complete(process(request));
} catch (SCMException ex) {
@@ -389,6 +393,7 @@ public class SCMStateMachine extends BaseStateMachine {
@Override
public void pause() {
final LifeCycle lc = getLifeCycle();
+ LOG.info("{}: Try to pause from current LifeCycle state {}", getId(), lc);
if (lc.getCurrentState() != LifeCycle.State.NEW) {
lc.transition(LifeCycle.State.PAUSING);
lc.transition(LifeCycle.State.PAUSED);
@@ -414,6 +419,8 @@ public class SCMStateMachine extends BaseStateMachine {
throw new IOException(e);
}
+ LOG.info("{}: SCMStateMachine is reinitializing. newTermIndex = {}",
getId(), termIndex);
+
// re-initialize the DBTransactionBuffer and update the lastAppliedIndex.
try {
transactionBuffer.init();
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
index 463afba942..aa7211fa03 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
@@ -46,6 +46,7 @@ import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRespo
import org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler;
import org.apache.hadoop.ozone.protocolPB.RequestHandler;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.ratis.proto.RaftProtos;
import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto;
@@ -88,7 +89,6 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
new SimpleStateMachineStorage();
private final OzoneManager ozoneManager;
private RequestHandler handler;
- private RaftGroupId raftGroupId;
private volatile OzoneManagerDoubleBuffer ozoneManagerDoubleBuffer;
private final ExecutorService executorService;
private final ExecutorService installSnapshotExecutor;
@@ -134,8 +134,8 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
RaftStorage raftStorage) throws IOException {
getLifeCycle().startAndTransition(() -> {
super.initialize(server, id, raftStorage);
- this.raftGroupId = id;
storage.init(raftStorage);
+ LOG.info("{}: initialize {} with {}", getId(), id,
getLastAppliedTermIndex());
});
}
@@ -143,8 +143,9 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
public synchronized void reinitialize() throws IOException {
loadSnapshotInfoFromDB();
if (getLifeCycleState() == LifeCycle.State.PAUSED) {
- unpause(getLastAppliedTermIndex().getIndex(),
- getLastAppliedTermIndex().getTerm());
+ final TermIndex lastApplied = getLastAppliedTermIndex();
+ unpause(lastApplied.getIndex(), lastApplied.getTerm());
+ LOG.info("{}: reinitialize {} with {}", getId(), getGroupId(),
lastApplied);
}
}
@@ -160,6 +161,7 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
RaftPeerId newLeaderId) {
// Initialize OMHAMetrics
ozoneManager.omHAMetricsInit(newLeaderId.toString());
+ LOG.info("{}: leader changed to {}", groupMemberId, newLeaderId);
}
/** Notified by Ratis for non-StateMachine term-index update. */
@@ -263,7 +265,7 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
messageContent);
Preconditions.checkArgument(raftClientRequest.getRaftGroupId().equals(
- raftGroupId));
+ getGroupId()));
try {
handler.validateRequest(omRequest);
} catch (IOException ioe) {
@@ -293,6 +295,10 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
OzoneManagerPrepareState prepareState = ozoneManager.getPrepareState();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("{}: preAppendTransaction {}", getId(),
TermIndex.valueOf(trx.getLogEntry()));
+ }
+
if (cmdType == OzoneManagerProtocolProtos.Type.Prepare) {
// Must authenticate prepare requests here, since we must determine
// whether or not to apply the prepare gate before proceeding with the
@@ -303,8 +309,7 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
if (ozoneManager.getAclsEnabled()
&& !ozoneManager.isAdmin(userGroupInformation)) {
String message = "Access denied for user " + userGroupInformation
- + ". "
- + "Superuser privilege is required to prepare ozone managers.";
+ + ". Superuser privilege is required to prepare
upgrade/downgrade.";
OMException cause =
new OMException(message, OMException.ResultCodes.ACCESS_DENIED);
// Leader should not step down because of this failure.
@@ -341,6 +346,7 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
: OMRatisHelper.convertByteStringToOMRequest(
trx.getStateMachineLogEntry().getLogData());
final TermIndex termIndex = TermIndex.valueOf(trx.getLogEntry());
+ LOG.debug("{}: applyTransaction {}", getId(), termIndex);
// In the current approach we have one single global thread executor.
// with single thread. Right now this is being done for correctness, as
// applyTransaction will be run on multiple OM's we want to execute the
@@ -427,12 +433,14 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
*/
public synchronized void unpause(long newLastAppliedSnaphsotIndex,
long newLastAppliedSnapShotTermIndex) {
- LOG.info("OzoneManagerStateMachine is un-pausing");
if (statePausedCount.decrementAndGet() == 0) {
getLifeCycle().startAndTransition(() -> {
this.ozoneManagerDoubleBuffer = buildDoubleBufferForRatis();
this.setLastAppliedTermIndex(TermIndex.valueOf(
newLastAppliedSnapShotTermIndex, newLastAppliedSnaphsotIndex));
+ LOG.info("{}: OzoneManagerStateMachine un-pause completed. " +
+ "newLastAppliedSnaphsotIndex: {}, newLastAppliedSnapShotTermIndex:
{}",
+ getId(), newLastAppliedSnaphsotIndex,
newLastAppliedSnapShotTermIndex);
});
}
}
@@ -482,15 +490,15 @@ public class OzoneManagerStateMachine extends
BaseStateMachine {
final TermIndex applied = getLastAppliedTermIndex();
final TermIndex notified = getLastNotifiedTermIndex();
final TermIndex snapshot = applied.compareTo(notified) > 0 ? applied :
notified;
- LOG.info(" applied = {}", applied);
- LOG.info(" skipped = {}", lastSkippedIndex);
- LOG.info("notified = {}", notified);
- LOG.info("snapshot = {}", snapshot);
+ long startTime = Time.monotonicNow();
final TransactionInfo transactionInfo = TransactionInfo.valueOf(snapshot);
ozoneManager.setTransactionInfo(transactionInfo);
ozoneManager.getMetadataManager().getTransactionInfoTable().put(TRANSACTION_INFO_KEY,
transactionInfo);
ozoneManager.getMetadataManager().getStore().flushDB();
+ LOG.info("{}: taking snapshot. applied = {}, skipped = {}, " +
+ "notified = {}, current snapshot index = {}, took {} ms",
+ getId(), applied, lastSkippedIndex, notified, snapshot,
Time.monotonicNow() - startTime);
return snapshot.getIndex();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]