Cyrill commented on code in PR #7286:
URL: https://github.com/apache/ignite-3/pull/7286#discussion_r2650925392
##########
modules/raft/src/test/java/org/apache/ignite/internal/raft/client/PhysicalTopologyAwareRaftGroupServiceRunTest.java:
##########
@@ -436,6 +472,217 @@ void
testBoundedTimeoutSucceedsWhenLeaderAppearsBeforeTimeout() {
assertThat(result, willCompleteSuccessfully());
}
+ /**
+ * Tests that with bounded timeout, the command fails if peer responses
are slow and exceed the deadline.
+ */
+ @Test
+ void testBoundedTimeoutExpiresWhileTryingPeers() {
+ // Each peer response takes 200ms and returns EPERM (no leader).
+ // With 3 peers and 500ms timeout, we should exceed the deadline.
+ when(messagingService.invoke(
+ any(InternalClusterNode.class),
+ argThat(this::isTestWriteCommand),
+ anyLong()
+ )).thenAnswer(invocation -> {
+ return CompletableFuture.supplyAsync(() -> {
+ try {
+ Thread.sleep(200);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ return FACTORY.errorResponse()
+ .errorCode(RaftError.EPERM.getNumber())
+ .build();
+ }, executor);
+ });
+
+ PhysicalTopologyAwareRaftGroupService svc = startService();
+
+ // With 500ms timeout and 3 peers each taking 200ms, total time ~600ms
> 500ms.
+ CompletableFuture<Object> result = svc.run(testWriteCommand(), 500);
+
+ assertThat(result,
willThrow(ReplicationGroupUnavailableException.class, 2, TimeUnit.SECONDS));
+ }
+
+ /**
+ * Tests that individual peer request timeouts are bounded by the deadline.
+ *
+ * <p>If the deadline is 500ms but the default response timeout is 3000ms,
the first peer request
+ * should timeout at ~500ms, not wait the full 3000ms.
+ */
+ @Test
+ void testBoundedTimeoutLimitsIndividualPeerRequestTimeout() {
+ // First peer never responds (simulating network issue).
+ // Default response timeout is 3000ms, but our deadline is 500ms.
+ when(messagingService.invoke(
+ any(InternalClusterNode.class),
+ argThat(this::isTestWriteCommand),
+ anyLong()
+ )).thenReturn(new CompletableFuture<>()); // Never completes
+
+ PhysicalTopologyAwareRaftGroupService svc = startService();
+
+ long startTime = System.currentTimeMillis();
+
+ // With 500ms timeout, should fail within ~600ms (500ms + some
margin), not 3000ms.
+ CompletableFuture<Object> result = svc.run(testWriteCommand(), 500);
+
+ assertThat(result,
willThrow(ReplicationGroupUnavailableException.class, 1, TimeUnit.SECONDS));
+
+ long elapsed = System.currentTimeMillis() - startTime;
+ // Should complete much faster than the default response timeout
(3000ms).
+ assertTrue(elapsed < 1500, "Expected to fail within 1500ms but took "
+ elapsed + "ms");
+ }
+
+ // ==================== Shutdown Tests ====================
+
+ /**
+ * Tests that with timeout=0, if the client is shutting down during peer
trying,
+ * the future completes with ComponentStoppingException.
+ */
+ @Test
+ void testZeroTimeoutShutdownDuringPeerTrying() throws Exception {
+ List<CompletableFuture<Object>> pendingInvokes = new
CopyOnWriteArrayList<>();
+ CountDownLatch invokeStarted = new CountDownLatch(1);
+
+ // Return a future that doesn't complete immediately - we'll complete
it after shutdown.
+ when(messagingService.invoke(
+ any(InternalClusterNode.class),
+ argThat(this::isTestWriteCommand),
+ anyLong()
+ )).thenAnswer(invocation -> {
+ var pendingFuture = new CompletableFuture<>();
+ pendingInvokes.add(pendingFuture);
+ invokeStarted.countDown();
+ return pendingFuture;
+ });
+
+ PhysicalTopologyAwareRaftGroupService svc = startService();
+
+ // Start the command with timeout=0.
+ CompletableFuture<Object> result = svc.run(testWriteCommand(), 0);
+
+ // Wait for at least one invoke to be pending.
+ assertTrue(invokeStarted.await(5, TimeUnit.SECONDS), "Invoke should
start");
+
+ // Shutdown the service.
+ svc.shutdown();
+
+ // Complete the pending invoke with EPERM - the callback should see
shutdown.
+ for (var pending : pendingInvokes) {
+ pending.complete(FACTORY.errorResponse()
+ .errorCode(RaftError.EPERM.getNumber())
+ .build());
+ }
+
+ // The result should complete with ComponentStoppingException.
+ assertThat(result, willThrow(ComponentStoppingException.class, 5,
TimeUnit.SECONDS));
Review Comment:
Well, not really. See usages of StoppingExceptionFactories.
But for system groups indicateNodeStop() method is used, so will change it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]