This is an automated email from the ASF dual-hosted git repository. szetszwo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ratis.git
The following commit(s) were added to refs/heads/master by this push: new 35615d936 RATIS-2261. Intermittent failure in TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287) 35615d936 is described below commit 35615d9368c6299195071d6c0383a977eb0040cd Author: Tsz-Wo Nicholas Sze <szets...@apache.org> AuthorDate: Mon Sep 22 10:12:59 2025 -0700 RATIS-2261. Intermittent failure in TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287) --- .../ratis/statemachine/RaftSnapshotBaseTest.java | 81 ++++++++++------------ .../impl/SimpleStateMachine4Testing.java | 11 +-- .../ratis/grpc/TestRaftSnapshotWithGrpc.java | 12 +--- .../ratis/netty/TestRaftSnapshotWithNetty.java | 11 ++- .../TestRaftSnapshotWithSimulatedRpc.java | 11 ++- 5 files changed, 54 insertions(+), 72 deletions(-) diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java index 2c4ac2eee..44ae74c4c 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java @@ -49,15 +49,12 @@ import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Slf4jUtils; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -67,11 +64,18 @@ import java.util.stream.LongStream; import org.apache.ratis.thirdparty.com.codahale.metrics.Timer; import org.slf4j.event.Level; -public abstract class RaftSnapshotBaseTest extends BaseTest { +public abstract class RaftSnapshotBaseTest<CLUSTER extends MiniRaftCluster> + extends BaseTest + implements MiniRaftCluster.Factory.Get<CLUSTER> { { Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); Slf4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG); - Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG); + + final RaftProperties p = getProperties(); + p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); + RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(p, SNAPSHOT_TRIGGER_THRESHOLD); + RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(p, true); + RaftServerConfigKeys.LeaderElection.setMemberMajorityAdd(p, true); } static final Logger LOG = LoggerFactory.getLogger(RaftSnapshotBaseTest.class); @@ -119,29 +123,6 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { } } - private MiniRaftCluster cluster; - - public abstract MiniRaftCluster.Factory<?> getFactory(); - - @BeforeEach - public void setup() throws IOException { - final RaftProperties prop = new RaftProperties(); - prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, - SimpleStateMachine4Testing.class, StateMachine.class); - RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold( - prop, SNAPSHOT_TRIGGER_THRESHOLD); - RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(prop, true); - this.cluster = getFactory().newCluster(1, prop); - cluster.start(); - } - - @AfterEach - public void tearDown() { - if (cluster != null) { - cluster.shutdown(); - } - } - /** * Keep generating writing traffic and make sure snapshots are taken. * We then restart the whole raft peer and check if it can correctly load @@ -149,8 +130,13 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { */ @Test public void testRestartPeer() throws Exception { - RaftTestUtil.waitForLeader(cluster); - final RaftPeerId leaderId = cluster.getLeader().getId(); + runWithNewCluster(1, this::runTestRestartPeer); + + } + + void runTestRestartPeer(CLUSTER cluster) throws Exception { + LOG.info("runTestRestartPeer"); + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); int i = 0; try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { @@ -180,7 +166,7 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { public static boolean exists(File f) { if (f.exists()) { - LOG.info("File exists: " + f); + LOG.info("File exists: {}", f); return true; } return false; @@ -193,11 +179,15 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { */ @Test public void testBasicInstallSnapshot() throws Exception { + runWithNewCluster(1, this::runTestBasicInstallSnapshot); + } + + void runTestBasicInstallSnapshot(CLUSTER cluster) throws Exception { + LOG.info("runTestBasicInstallSnapshot"); final List<LogSegmentPath> logs; int i = 0; try { - RaftTestUtil.waitForLeader(cluster); - final RaftPeerId leaderId = cluster.getLeader().getId(); + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { @@ -236,16 +226,14 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { Assertions.assertTrue(client.io().send(new SimpleMessage("m" + i)).isSuccess()); } - // add two more peers - String[] newPeers = new String[]{"s3", "s4"}; - MiniRaftCluster.PeerChanges change = cluster.addNewPeers( - newPeers, true, false); + // add a new peer + final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, true); // trigger setConfiguration RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); - for (String newPeer : newPeers) { - final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer)); + for (RaftPeer newPeer : change.newPeers) { + final RaftServer.Division s = cluster.getDivision(newPeer.getId()); SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s); Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); } @@ -275,6 +263,11 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { */ @Test public void testInstallSnapshotDuringBootstrap() throws Exception { + runWithNewCluster(1, this::runTestInstallSnapshotDuringBootstrap); + } + + void runTestInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception { + LOG.info("runTestInstallSnapshotDuringBootstrap"); int i = 0; try { RaftTestUtil.waitForLeader(cluster); @@ -299,16 +292,14 @@ public abstract class RaftSnapshotBaseTest extends BaseTest { assertLeaderContent(cluster); - // add two more peers - String[] newPeers = new String[]{"s3", "s4"}; - MiniRaftCluster.PeerChanges change = cluster.addNewPeers( - newPeers, true, false); + // add a new peer + final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, true); // trigger setConfiguration RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); - for (String newPeer : newPeers) { - final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer)); + for (RaftPeer newPeer : change.newPeers) { + final RaftServer.Division s = cluster.getDivision(newPeer.getId()); SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s); Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); } diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java index afab27680..1ffbdbcb9 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java @@ -210,7 +210,7 @@ public class SimpleStateMachine4Testing extends BaseStateMachine { @Override public synchronized void initialize(RaftServer server, RaftGroupId raftGroupId, RaftStorage raftStorage) throws IOException { - LOG.info("Initializing " + this); + LOG.info("Initializing {}", this); this.groupId = raftGroupId; getLifeCycle().startAndTransition(() -> { super.initialize(server, raftGroupId, raftStorage); @@ -233,7 +233,10 @@ public class SimpleStateMachine4Testing extends BaseStateMachine { @Override public synchronized void reinitialize() throws IOException { - LOG.info("Reinitializing " + this); + LOG.info("Reinitializing {}", this); + indexMap.clear(); + dataMap.clear(); + loadSnapshot(storage.getLatestSnapshot()); if (getLifeCycleState() == LifeCycle.State.PAUSED) { getLifeCycle().transition(LifeCycle.State.STARTING); @@ -328,14 +331,14 @@ public class SimpleStateMachine4Testing extends BaseStateMachine { final String string = request.getContent().toStringUtf8(); Exception exception; try { - LOG.info("query " + string); + LOG.info("query {}", string); final LogEntryProto entry = dataMap.get(string); if (entry != null) { return CompletableFuture.completedFuture(Message.valueOf(entry.toByteString())); } exception = new IndexOutOfBoundsException(getId() + ": LogEntry not found for query " + string); } catch (Exception e) { - LOG.warn("Failed request " + request, e); + LOG.warn("Failed request {}", request, e); exception = e; } return JavaUtils.completeExceptionally(new StateMachineException( diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java index e6c2f6613..7c94fb3bf 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java @@ -20,22 +20,16 @@ package org.apache.ratis.grpc; import java.util.Optional; import org.apache.ratis.metrics.LongCounter; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.metrics.MetricRegistries; import org.apache.ratis.metrics.MetricRegistryInfo; import org.apache.ratis.metrics.RatisMetricRegistry; import org.apache.ratis.server.RaftServer; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -import org.apache.ratis.test.tag.Flaky; import org.junit.jupiter.api.Assertions; -@Flaky("RATIS-2261") -public class TestRaftSnapshotWithGrpc extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory<?> getFactory() { - return MiniRaftClusterWithGrpc.FACTORY; - } - +public class TestRaftSnapshotWithGrpc + extends RaftSnapshotBaseTest<MiniRaftClusterWithGrpc> + implements MiniRaftClusterWithGrpc.FactoryGet { @Override protected void verifyInstallSnapshotMetric(RaftServer.Division leader) { MetricRegistryInfo info = new MetricRegistryInfo(leader.getMemberId().toString(), diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java index f1340efc7..ae16f41ed 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,9 @@ */ package org.apache.ratis.netty; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -public class TestRaftSnapshotWithNetty extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory<?> getFactory() { - return MiniRaftClusterWithNetty.FACTORY; - } +public class TestRaftSnapshotWithNetty + extends RaftSnapshotBaseTest<MiniRaftClusterWithNetty> + implements MiniRaftClusterWithNetty.FactoryGet { } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java index 1c76f7b00..62ee387de 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,9 @@ */ package org.apache.ratis.server.simulation; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -public class TestRaftSnapshotWithSimulatedRpc extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory<?> getFactory() { - return MiniRaftClusterWithSimulatedRpc.FACTORY; - } +public class TestRaftSnapshotWithSimulatedRpc + extends RaftSnapshotBaseTest<MiniRaftClusterWithSimulatedRpc> + implements MiniRaftClusterWithSimulatedRpc.FactoryGet { }