This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git


The following commit(s) were added to refs/heads/master by this push:
     new 35615d936 RATIS-2261. Intermittent failure in 
TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287)
35615d936 is described below

commit 35615d9368c6299195071d6c0383a977eb0040cd
Author: Tsz-Wo Nicholas Sze <szets...@apache.org>
AuthorDate: Mon Sep 22 10:12:59 2025 -0700

    RATIS-2261. Intermittent failure in 
TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287)
---
 .../ratis/statemachine/RaftSnapshotBaseTest.java   | 81 ++++++++++------------
 .../impl/SimpleStateMachine4Testing.java           | 11 +--
 .../ratis/grpc/TestRaftSnapshotWithGrpc.java       | 12 +---
 .../ratis/netty/TestRaftSnapshotWithNetty.java     | 11 ++-
 .../TestRaftSnapshotWithSimulatedRpc.java          | 11 ++-
 5 files changed, 54 insertions(+), 72 deletions(-)

diff --git 
a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
 
b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
index 2c4ac2eee..44ae74c4c 100644
--- 
a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
+++ 
b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
@@ -49,15 +49,12 @@ import org.apache.ratis.util.FileUtils;
 import org.apache.ratis.util.JavaUtils;
 import org.apache.ratis.util.LifeCycle;
 import org.apache.ratis.util.Slf4jUtils;
-import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Optional;
@@ -67,11 +64,18 @@ import java.util.stream.LongStream;
 import org.apache.ratis.thirdparty.com.codahale.metrics.Timer;
 import org.slf4j.event.Level;
 
-public abstract class RaftSnapshotBaseTest extends BaseTest {
+public abstract class RaftSnapshotBaseTest<CLUSTER extends MiniRaftCluster>
+    extends BaseTest
+    implements MiniRaftCluster.Factory.Get<CLUSTER> {
   {
     Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG);
     Slf4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG);
-    Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG);
+
+    final RaftProperties p = getProperties();
+    p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, 
SimpleStateMachine4Testing.class, StateMachine.class);
+    RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(p, 
SNAPSHOT_TRIGGER_THRESHOLD);
+    RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(p, true);
+    RaftServerConfigKeys.LeaderElection.setMemberMajorityAdd(p, true);
   }
 
   static final Logger LOG = 
LoggerFactory.getLogger(RaftSnapshotBaseTest.class);
@@ -119,29 +123,6 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
     }
   }
 
-  private MiniRaftCluster cluster;
-
-  public abstract MiniRaftCluster.Factory<?> getFactory();
-
-  @BeforeEach
-  public void setup() throws IOException {
-    final RaftProperties prop = new RaftProperties();
-    prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
-        SimpleStateMachine4Testing.class, StateMachine.class);
-    RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(
-        prop, SNAPSHOT_TRIGGER_THRESHOLD);
-    RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(prop, true);
-    this.cluster = getFactory().newCluster(1, prop);
-    cluster.start();
-  }
-
-  @AfterEach
-  public void tearDown() {
-    if (cluster != null) {
-      cluster.shutdown();
-    }
-  }
-
   /**
    * Keep generating writing traffic and make sure snapshots are taken.
    * We then restart the whole raft peer and check if it can correctly load
@@ -149,8 +130,13 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
    */
   @Test
   public void testRestartPeer() throws Exception {
-    RaftTestUtil.waitForLeader(cluster);
-    final RaftPeerId leaderId = cluster.getLeader().getId();
+    runWithNewCluster(1, this::runTestRestartPeer);
+
+  }
+
+  void runTestRestartPeer(CLUSTER cluster) throws Exception {
+    LOG.info("runTestRestartPeer");
+    final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
     int i = 0;
     try(final RaftClient client = cluster.createClient(leaderId)) {
       for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -180,7 +166,7 @@ public abstract class RaftSnapshotBaseTest extends BaseTest 
{
 
   public static boolean exists(File f) {
     if (f.exists()) {
-      LOG.info("File exists: " + f);
+      LOG.info("File exists: {}", f);
       return true;
     }
     return false;
@@ -193,11 +179,15 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
    */
   @Test
   public void testBasicInstallSnapshot() throws Exception {
+    runWithNewCluster(1, this::runTestBasicInstallSnapshot);
+  }
+
+  void runTestBasicInstallSnapshot(CLUSTER cluster) throws Exception {
+    LOG.info("runTestBasicInstallSnapshot");
     final List<LogSegmentPath> logs;
     int i = 0;
     try {
-      RaftTestUtil.waitForLeader(cluster);
-      final RaftPeerId leaderId = cluster.getLeader().getId();
+      final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
 
       try(final RaftClient client = cluster.createClient(leaderId)) {
         for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -236,16 +226,14 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
         Assertions.assertTrue(client.io().send(new SimpleMessage("m" + 
i)).isSuccess());
       }
 
-      // add two more peers
-      String[] newPeers = new String[]{"s3", "s4"};
-      MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
-          newPeers, true, false);
+      // add a new peer
+      final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, 
true);
       // trigger setConfiguration
       RaftServerTestUtil.runWithMinorityPeers(cluster, 
Arrays.asList(change.allPeersInNewConf),
           peers -> 
cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
 
-      for (String newPeer : newPeers) {
-        final RaftServer.Division s = 
cluster.getDivision(RaftPeerId.valueOf(newPeer));
+      for (RaftPeer newPeer : change.newPeers) {
+        final RaftServer.Division s = cluster.getDivision(newPeer.getId());
         SimpleStateMachine4Testing simpleStateMachine = 
SimpleStateMachine4Testing.get(s);
         Assertions.assertSame(LifeCycle.State.RUNNING, 
simpleStateMachine.getLifeCycleState());
       }
@@ -275,6 +263,11 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
    */
   @Test
   public void testInstallSnapshotDuringBootstrap() throws Exception {
+    runWithNewCluster(1, this::runTestInstallSnapshotDuringBootstrap);
+  }
+
+  void runTestInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception 
{
+    LOG.info("runTestInstallSnapshotDuringBootstrap");
     int i = 0;
     try {
       RaftTestUtil.waitForLeader(cluster);
@@ -299,16 +292,14 @@ public abstract class RaftSnapshotBaseTest extends 
BaseTest {
 
       assertLeaderContent(cluster);
 
-      // add two more peers
-      String[] newPeers = new String[]{"s3", "s4"};
-      MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
-          newPeers, true, false);
+      // add a new peer
+      final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, 
true);
       // trigger setConfiguration
       RaftServerTestUtil.runWithMinorityPeers(cluster, 
Arrays.asList(change.allPeersInNewConf),
           peers -> 
cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
 
-      for (String newPeer : newPeers) {
-        final RaftServer.Division s = 
cluster.getDivision(RaftPeerId.valueOf(newPeer));
+      for (RaftPeer newPeer : change.newPeers) {
+        final RaftServer.Division s = cluster.getDivision(newPeer.getId());
         SimpleStateMachine4Testing simpleStateMachine = 
SimpleStateMachine4Testing.get(s);
         Assertions.assertSame(LifeCycle.State.RUNNING, 
simpleStateMachine.getLifeCycleState());
       }
diff --git 
a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
 
b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
index afab27680..1ffbdbcb9 100644
--- 
a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
+++ 
b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
@@ -210,7 +210,7 @@ public class SimpleStateMachine4Testing extends 
BaseStateMachine {
   @Override
   public synchronized void initialize(RaftServer server, RaftGroupId 
raftGroupId,
       RaftStorage raftStorage) throws IOException {
-    LOG.info("Initializing " + this);
+    LOG.info("Initializing {}", this);
     this.groupId = raftGroupId;
     getLifeCycle().startAndTransition(() -> {
       super.initialize(server, raftGroupId, raftStorage);
@@ -233,7 +233,10 @@ public class SimpleStateMachine4Testing extends 
BaseStateMachine {
 
   @Override
   public synchronized void reinitialize() throws IOException {
-    LOG.info("Reinitializing " + this);
+    LOG.info("Reinitializing {}", this);
+    indexMap.clear();
+    dataMap.clear();
+
     loadSnapshot(storage.getLatestSnapshot());
     if (getLifeCycleState() == LifeCycle.State.PAUSED) {
       getLifeCycle().transition(LifeCycle.State.STARTING);
@@ -328,14 +331,14 @@ public class SimpleStateMachine4Testing extends 
BaseStateMachine {
     final String string = request.getContent().toStringUtf8();
     Exception exception;
     try {
-      LOG.info("query " + string);
+      LOG.info("query {}", string);
       final LogEntryProto entry = dataMap.get(string);
       if (entry != null) {
         return 
CompletableFuture.completedFuture(Message.valueOf(entry.toByteString()));
       }
       exception = new IndexOutOfBoundsException(getId() + ": LogEntry not 
found for query " + string);
     } catch (Exception e) {
-      LOG.warn("Failed request " + request, e);
+      LOG.warn("Failed request {}", request, e);
       exception = e;
     }
     return JavaUtils.completeExceptionally(new StateMachineException(
diff --git 
a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java 
b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
index e6c2f6613..7c94fb3bf 100644
--- 
a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
+++ 
b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
@@ -20,22 +20,16 @@ package org.apache.ratis.grpc;
 import java.util.Optional;
 
 import org.apache.ratis.metrics.LongCounter;
-import org.apache.ratis.server.impl.MiniRaftCluster;
 import org.apache.ratis.metrics.MetricRegistries;
 import org.apache.ratis.metrics.MetricRegistryInfo;
 import org.apache.ratis.metrics.RatisMetricRegistry;
 import org.apache.ratis.server.RaftServer;
 import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
-import org.apache.ratis.test.tag.Flaky;
 import org.junit.jupiter.api.Assertions;
 
-@Flaky("RATIS-2261")
-public class TestRaftSnapshotWithGrpc extends RaftSnapshotBaseTest {
-  @Override
-  public MiniRaftCluster.Factory<?> getFactory() {
-    return MiniRaftClusterWithGrpc.FACTORY;
-  }
-
+public class TestRaftSnapshotWithGrpc
+    extends RaftSnapshotBaseTest<MiniRaftClusterWithGrpc>
+    implements MiniRaftClusterWithGrpc.FactoryGet {
   @Override
   protected void verifyInstallSnapshotMetric(RaftServer.Division leader) {
     MetricRegistryInfo info = new 
MetricRegistryInfo(leader.getMemberId().toString(),
diff --git 
a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
 
b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
index f1340efc7..ae16f41ed 100644
--- 
a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
+++ 
b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,12 +17,9 @@
  */
 package org.apache.ratis.netty;
 
-import org.apache.ratis.server.impl.MiniRaftCluster;
 import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
 
-public class TestRaftSnapshotWithNetty extends RaftSnapshotBaseTest {
-  @Override
-  public MiniRaftCluster.Factory<?> getFactory() {
-    return MiniRaftClusterWithNetty.FACTORY;
-  }
+public class TestRaftSnapshotWithNetty
+    extends RaftSnapshotBaseTest<MiniRaftClusterWithNetty>
+    implements MiniRaftClusterWithNetty.FactoryGet {
 }
diff --git 
a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
 
b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
index 1c76f7b00..62ee387de 100644
--- 
a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
+++ 
b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,12 +17,9 @@
  */
 package org.apache.ratis.server.simulation;
 
-import org.apache.ratis.server.impl.MiniRaftCluster;
 import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
 
-public class TestRaftSnapshotWithSimulatedRpc extends RaftSnapshotBaseTest {
-  @Override
-  public MiniRaftCluster.Factory<?> getFactory() {
-    return MiniRaftClusterWithSimulatedRpc.FACTORY;
-  }
+public class TestRaftSnapshotWithSimulatedRpc
+    extends RaftSnapshotBaseTest<MiniRaftClusterWithSimulatedRpc>
+    implements MiniRaftClusterWithSimulatedRpc.FactoryGet {
 }

Reply via email to