junrao commented on code in PR #13451: URL: https://github.com/apache/kafka/pull/13451#discussion_r1501252737
########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) + + // Make sure broker 1 is elected as leader (preferred) of partition tp automatically + // even though the reassignment is still ongoing. + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 3, Review Comment: autoLeaderRebalanceEnable is false. So, the leader may never move to broker 1. Perhaps we could do a manual preferred leader election first. ########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) Review Comment: Controller could have been on broker 2. Could we keep obtaining the controller until it's not on broker 2 before using it? ########## core/src/main/scala/kafka/controller/KafkaController.scala: ########## @@ -1894,10 +1894,16 @@ class KafkaController(val config: KafkaConfig, val currentAssignment = controllerContext.partitionFullReplicaAssignment(topicPartition) val newAssignment = currentAssignment.reassignTo(replicas) val areNewReplicasAlive = newAssignment.addingReplicas.toSet.subsetOf(controllerContext.liveBrokerIds) + val currentLeader = controllerContext.partitionLeadershipInfo(topicPartition).get.leaderAndIsr.leader + val unneededAddingReplicas = currentAssignment.replicas.diff(newAssignment.replicas) if (!areNewReplicasAlive) Some(new ApiError(Errors.INVALID_REPLICA_ASSIGNMENT, s"Replica assignment has brokers that are not alive. Replica list: " + s"${newAssignment.addingReplicas}, live broker list: ${controllerContext.liveBrokerIds}")) + else if (unneededAddingReplicas.contains(currentLeader)) Review Comment: Thanks. This change looks fine then. ########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) + + // Make sure broker 1 is elected as leader (preferred) of partition tp automatically + // even though the reassignment is still ongoing. + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 3, + "failed to get expected partition state after auto preferred replica leader election") + + // Submit another reassignment to replace the current leader 1 with broker 3 which + // should be rejected. + val reassignment_update = Map(tp -> Some(Seq(3, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment_update, _ => ())) + + // Start broker 2 and make sure the reassignment which was stuck can be fulfilled. + servers(2).startup() + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 4, + "failed to get expected partition state after broker startup") + + TestUtils.waitUntilTrue(() => { + val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp)) + leaderIsrAndControllerEpochMap.contains(tp) && + isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 6) && Review Comment: Why does the epoch go up by 2? Does it always happen? ########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) + + // Make sure broker 1 is elected as leader (preferred) of partition tp automatically + // even though the reassignment is still ongoing. + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 3, + "failed to get expected partition state after auto preferred replica leader election") + + // Submit another reassignment to replace the current leader 1 with broker 3 which + // should be rejected. + val reassignment_update = Map(tp -> Some(Seq(3, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment_update, _ => ())) + + // Start broker 2 and make sure the reassignment which was stuck can be fulfilled. + servers(2).startup() + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 4, + "failed to get expected partition state after broker startup") + + TestUtils.waitUntilTrue(() => { + val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp)) + leaderIsrAndControllerEpochMap.contains(tp) && + isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 6) && + leaderIsrAndControllerEpochMap(tp).leaderAndIsr.isr.toSet == Set(1, 0, 2) + }, "failed to get expected partition state for assignment") + + controller.eventManager.put(ListPartitionReassignments(None, { + case Left(results) => assert(results.isEmpty) Review Comment: This is called asynchronously. How do we make that it's called before the test ends? ########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) + + // Make sure broker 1 is elected as leader (preferred) of partition tp automatically + // even though the reassignment is still ongoing. + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 3, + "failed to get expected partition state after auto preferred replica leader election") + + // Submit another reassignment to replace the current leader 1 with broker 3 which + // should be rejected. + val reassignment_update = Map(tp -> Some(Seq(3, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment_update, _ => ())) + + // Start broker 2 and make sure the reassignment which was stuck can be fulfilled. + servers(2).startup() + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 4, + "failed to get expected partition state after broker startup") + + TestUtils.waitUntilTrue(() => { + val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp)) + leaderIsrAndControllerEpochMap.contains(tp) && + isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 6) && + leaderIsrAndControllerEpochMap(tp).leaderAndIsr.isr.toSet == Set(1, 0, 2) + }, "failed to get expected partition state for assignment") + + controller.eventManager.put(ListPartitionReassignments(None, { + case Left(results) => assert(results.isEmpty) + case Right(e) => assertEquals(Errors.NOT_CONTROLLER, e.error()) Review Comment: Why would we get Errors.NOT_CONTROLLER? The controller hasn't changed, right? ########## core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala: ########## @@ -572,6 +572,48 @@ class ControllerIntegrationTest extends QuorumTestHarness { "failed to remove reassign partitions path after completion") } + @Test + def testAutoPreferredReplicaLeaderElectionFollowedByAnotherNewReassignment(): Unit = { + servers = makeServers(5, autoLeaderRebalanceEnable = true) + val controller = getController().kafkaController + val tp = new TopicPartition("t", 0) + val assignment = Map(tp.partition -> Seq(0, 2, 4)) + TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers) + + // Shutdown broker 2 and reassign partition tp from [0, 2, 4] to [1, 0, 2] to create a stuck reassignment. + servers(2).shutdown() + servers(2).awaitShutdown() + val reassignment = Map(tp -> Some(Seq(1, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment, _ => ())) + + // Make sure broker 1 is elected as leader (preferred) of partition tp automatically + // even though the reassignment is still ongoing. + waitForPartitionState(tp, firstControllerEpoch, 1, LeaderAndIsr.InitialLeaderEpoch + 3, + "failed to get expected partition state after auto preferred replica leader election") + + // Submit another reassignment to replace the current leader 1 with broker 3 which + // should be rejected. + val reassignment_update = Map(tp -> Some(Seq(3, 0, 2))) + controller.eventManager.put(ApiPartitionReassignment(reassignment_update, _ => ())) Review Comment: Could we check in the callback that we get an error for the new re-assignment? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org