This is an automated email from the ASF dual-hosted git repository.

davidarthur pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 71875ec58eb KAFKA-18845: Fix flaky 
QuorumControllerTest#testUncleanShutdownBrokerElrEnabled (#19240)
71875ec58eb is described below

commit 71875ec58eb922773163c61335bcdb9980d4bf34
Author: PoAn Yang <[email protected]>
AuthorDate: Fri Mar 21 01:37:09 2025 +0800

    KAFKA-18845: Fix flaky 
QuorumControllerTest#testUncleanShutdownBrokerElrEnabled (#19240)
    
    There're two root causes:
    1. When we unclean shutdown `brokerToBeTheLeader`, we didn't wait for
    the result. That means when we send heartbeat to unfence broker, it has
    chance to use stale broker epoch to send the request. [0]
    2. We use different replica directory to unclean shutdown broker. Even
    if broker is unfenced, it cannot get an online directory, so the
    `brokerToBeTheLeader` cannot be elected as a new leader. [1]
    
    
    [0]
    
https://github.com/apache/kafka/blob/a5325e029e2493f22925af99482ad9fa1eb06947/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java#L484-L497
    
    [1]
    
https://github.com/apache/kafka/blob/a5325e029e2493f22925af99482ad9fa1eb06947/metadata/src/main/java/org/apache/kafka/controller/ReplicationControlManager.java#L2470-L2477
    
    Reviewers: David Arthur <[email protected]>
---
 .../apache/kafka/controller/QuorumControllerTest.java  | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git 
a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java 
b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
index 3c2ae307955..908a850652c 100644
--- 
a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
+++ 
b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
@@ -380,6 +380,9 @@ public class QuorumControllerTest {
     @Test
     public void testUncleanShutdownBrokerElrEnabled() throws Throwable {
         List<Integer> allBrokers = List.of(1, 2, 3);
+        Map<Integer, Uuid> brokerLogDirs = allBrokers.stream().collect(
+            Collectors.toMap(identity(), brokerId -> Uuid.randomUuid())
+        );
         short replicationFactor = (short) allBrokers.size();
         long sessionTimeoutMillis = 500;
 
@@ -406,7 +409,7 @@ public class QuorumControllerTest {
                         setClusterId(active.clusterId()).
                         setFeatures(features).
                         setIncarnationId(Uuid.randomUuid()).
-                        setLogDirs(List.of(Uuid.randomUuid())).
+                        setLogDirs(List.of(brokerLogDirs.get(brokerId))).
                         setListeners(listeners));
                 brokerEpochs.put(brokerId, reply.get().epoch());
             }
@@ -474,7 +477,7 @@ public class QuorumControllerTest {
                     setClusterId(active.clusterId()).
                     setFeatures(features).
                     setIncarnationId(Uuid.randomUuid()).
-                    setLogDirs(List.of(Uuid.randomUuid())).
+                    
setLogDirs(List.of(brokerLogDirs.get(brokerToUncleanShutdown))).
                     setListeners(listeners));
             brokerEpochs.put(brokerToUncleanShutdown, reply.get().epoch());
             partition = active.replicationControl().getPartition(topicIdFoo, 
0);
@@ -482,7 +485,7 @@ public class QuorumControllerTest {
             assertArrayEquals(lastKnownElr, partition.lastKnownElr, 
partition.toString());
 
             // Unclean shutdown should not remove the last known ELR members.
-            active.registerBroker(
+            CompletableFuture<BrokerRegistrationReply> replyLeader = 
active.registerBroker(
                 anonymousContextFor(ApiKeys.BROKER_REGISTRATION),
                 new BrokerRegistrationRequestData().
                     setBrokerId(brokerToBeTheLeader).
@@ -490,8 +493,15 @@ public class QuorumControllerTest {
                     setFeatures(features).
                     setIncarnationId(Uuid.randomUuid()).
                     
setPreviousBrokerEpoch(brokerEpochs.get(brokerToBeTheLeader)).
-                    setLogDirs(List.of(Uuid.randomUuid())).
+                    
setLogDirs(List.of(brokerLogDirs.get(brokerToBeTheLeader))).
                     setListeners(listeners));
+            brokerEpochs.put(brokerToBeTheLeader, replyLeader.get().epoch());
+            partition = active.replicationControl().getPartition(topicIdFoo, 
0);
+            int[] expectedIsr = {brokerToBeTheLeader};
+            assertArrayEquals(expectedIsr, partition.elr, "The ELR for topic 
partition foo-0 was " + Arrays.toString(partition.elr) +
+                ". It is expected to be " + Arrays.toString(expectedIsr));
+            assertArrayEquals(lastKnownElr, partition.lastKnownElr, "The last 
known ELR for topic partition foo-0 was " + 
Arrays.toString(partition.lastKnownElr) +
+                ". It is expected to be " + Arrays.toString(lastKnownElr));
 
             // Unfence the last one in the ELR, it should be elected.
             sendBrokerHeartbeatToUnfenceBrokers(active, 
List.of(brokerToBeTheLeader), brokerEpochs);

Reply via email to