This is an automated email from the ASF dual-hosted git repository.
rexxiong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 37f83a4a0 [CELEBORN-1087] Remove SimpleStateMachineStorageUtil in
master module
37f83a4a0 is described below
commit 37f83a4a0334fe16228e83f93174af844cdef01a
Author: xleoken <[email protected]>
AuthorDate: Thu Oct 26 20:49:57 2023 +0800
[CELEBORN-1087] Remove SimpleStateMachineStorageUtil in master module
### What changes were proposed in this pull request?
To complement the functionality of ratis, we added
SimpleStateMachineStorageUtil class in the master module, which contains two
functions, one is findLatestSnapshot and the other is getSmDir. We can
implement these functions in a more elegant way.
refer to https://github.com/apache/ratis/tree/master/ratis-examples
### How was this patch tested?
Local tested. 3 master with 1 worker.
**Test case one**
After patch, we can get the correct tmp directory as before.

**Test case two**
I stop one of the masters and clean up the ratis data directory, then
restart the stopped master, everything works fine.
Closes #2037 from xleoken/rm-ratis.
Authored-by: xleoken <[email protected]>
Signed-off-by: Shuang <[email protected]>
---
.../deploy/master/clustermeta/ha/HAHelper.java | 3 +-
.../deploy/master/clustermeta/ha/StateMachine.java | 27 ++++++++++++++----
.../impl/SimpleStateMachineStorageUtil.java | 33 ----------------------
3 files changed, 23 insertions(+), 40 deletions(-)
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAHelper.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAHelper.java
index 962f19d9d..fdd22443e 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAHelper.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAHelper.java
@@ -23,7 +23,6 @@ import java.io.IOException;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.ratis.protocol.Message;
import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage;
-import org.apache.ratis.statemachine.impl.SimpleStateMachineStorageUtil;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.celeborn.common.client.MasterNotLeaderException;
@@ -121,7 +120,7 @@ public class HAHelper {
* @throws IOException if error occurred while creating the snapshot file
*/
public static File createTempSnapshotFile(SimpleStateMachineStorage storage)
throws IOException {
- File tempDir = new
File(SimpleStateMachineStorageUtil.getSmDir(storage).getParentFile(), "tmp");
+ File tempDir = storage.getTmpDir();
if (!tempDir.isDirectory() && !tempDir.mkdir()) {
throw new IOException(
"Cannot create temporary snapshot directory at " +
tempDir.getAbsolutePath());
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/StateMachine.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/StateMachine.java
index db537671e..21d9bdb18 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/StateMachine.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/StateMachine.java
@@ -39,13 +39,13 @@ import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.server.RaftServer;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.server.raftlog.RaftLog;
+import org.apache.ratis.server.storage.FileInfo;
import org.apache.ratis.server.storage.RaftStorage;
import org.apache.ratis.statemachine.SnapshotInfo;
import org.apache.ratis.statemachine.StateMachineStorage;
import org.apache.ratis.statemachine.TransactionContext;
import org.apache.ratis.statemachine.impl.BaseStateMachine;
import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage;
-import org.apache.ratis.statemachine.impl.SimpleStateMachineStorageUtil;
import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.util.ExitUtils;
@@ -61,7 +61,22 @@ import
org.apache.celeborn.service.deploy.master.clustermeta.ResourceProtos.Reso
public class StateMachine extends BaseStateMachine {
private static final Logger LOG =
LoggerFactory.getLogger(StateMachine.class);
- private final SimpleStateMachineStorage storage = new
SimpleStateMachineStorage();
+ private final SimpleStateMachineStorage storage =
+ new SimpleStateMachineStorage() {
+
+ File tmpDir = null;
+
+ @Override
+ public void init(RaftStorage storage) throws IOException {
+ super.init(storage);
+ tmpDir = storage.getStorageDir().getTmpDir();
+ }
+
+ @Override
+ public File getTmpDir() {
+ return tmpDir;
+ }
+ };
private final HARaftServer masterRatisServer;
private RaftGroupId raftGroupId;
@@ -98,7 +113,6 @@ public class StateMachine extends BaseStateMachine {
public void reinitialize() throws IOException {
LOG.info("Reinitializing state machine.");
getLifeCycle().compareAndTransition(PAUSED, STARTING);
-
storage.updateLatestSnapshot(SimpleStateMachineStorageUtil.findLatestSnapshot(storage));
loadSnapshot(storage.getLatestSnapshot());
getLifeCycle().compareAndTransition(STARTING, RUNNING);
}
@@ -253,7 +267,7 @@ public class StateMachine extends BaseStateMachine {
LOG.warn("Failed to create temp snapshot file.", e);
return RaftLog.INVALID_LOG_INDEX;
}
- LOG.debug("Taking a snapshot to file {}.", tempFile);
+ LOG.info("Taking a snapshot to file {}.", tempFile);
final File snapshotFile =
storage.getSnapshotFile(lastTermIndex.getTerm(),
lastTermIndex.getIndex());
try {
@@ -266,7 +280,10 @@ public class StateMachine extends BaseStateMachine {
LOG.warn("Failed to rename snapshot from {} to {}.", tempFile,
snapshotFile);
return RaftLog.INVALID_LOG_INDEX;
}
-
storage.updateLatestSnapshot(SimpleStateMachineStorageUtil.findLatestSnapshot(storage));
+
+ // update storage
+ final FileInfo info = new FileInfo(snapshotFile.toPath(), digest);
+ storage.updateLatestSnapshot(new SingleFileSnapshotInfo(info,
lastTermIndex));
} catch (Exception e) {
tempFile.delete();
LOG.warn("Failed to complete snapshot: {}.", snapshotFile, e);
diff --git
a/master/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorageUtil.java
b/master/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorageUtil.java
deleted file mode 100644
index ceea13a74..000000000
---
a/master/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorageUtil.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ratis.statemachine.impl;
-
-import java.io.File;
-import java.io.IOException;
-
-public interface SimpleStateMachineStorageUtil {
- static SingleFileSnapshotInfo findLatestSnapshot(SimpleStateMachineStorage
storage)
- throws IOException {
- final File dir = storage.getStateMachineDir();
- return SimpleStateMachineStorage.findLatestSnapshot(dir.toPath());
- }
-
- static File getSmDir(SimpleStateMachineStorage storage) {
- return storage.getStateMachineDir();
- }
-}