soarez commented on code in PR #14790: URL: https://github.com/apache/kafka/pull/14790#discussion_r1398483277
########## core/src/main/java/kafka/server/AssignmentsManager.java: ########## @@ -146,6 +153,9 @@ public void run() throws Exception { log.debug("Received new assignment {}", this); } pending.put(partition, this); + if (callback != null) { + callback.accept(DirectoryEventRequestState.QUEUED); + } Review Comment: Is this necessary? Can't we simply assume it's queued as soon as `onAssingment` returns? ########## core/src/main/java/kafka/server/AssignmentsManager.java: ########## @@ -336,6 +356,27 @@ private static boolean responseIsError(ClientResponse response) { return false; } + private static void applyCallbackOnComplete( + AssignReplicasToDirsResponseData data, + Map<TopicIdPartition, AssignmentEvent> sent) { + for (AssignReplicasToDirsResponseData.DirectoryData directory : data.directories()) { + for (AssignReplicasToDirsResponseData.TopicData topic : directory.topics()) { + for (AssignReplicasToDirsResponseData.PartitionData partition : topic.partitions()) { + TopicIdPartition topicPartition = new TopicIdPartition(topic.topicId(), partition.partitionIndex()); + AssignmentEvent event = sent.get(topicPartition); + if (event == null) { + log.error("AssignReplicasToDirsResponse contains unexpected partition {} into directory {}. No callback to apply.", partition, directory.id()); + } else { + Errors error = Errors.forCode(partition.errorCode()); + if (error == Errors.NONE && event.callback != null) { + event.callback.accept(DirectoryEventRequestState.COMPLETED); + } Review Comment: Instead of repeating the processing of the response for errors we can Set.diff the result of `filterFailures` against `inFlight` at the calling site for this function. ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") + + partitionRequestState match { + case None => + // Schedule assignment request and don't promote the future replica yet until the controller accepted the request. + partition.maybeFutureReplicaCaughtUp(_ => { + partition.futureReplicaDirectoryId() + .map { + directoryEventHandler.handleAssignment(new TopicIdPartition(topicId.get, topicPartition.partition()), _, + updatedAssignmentRequestStat(topicPartition)(_)) Review Comment: ```suggestion updatedAssignmentRequestState(topicPartition)(_)) ``` ########## core/src/main/java/kafka/server/AssignmentsManager.java: ########## @@ -210,6 +220,9 @@ public void run() throws Exception { channelManager.sendRequest(new AssignReplicasToDirsRequest.Builder( buildRequestData(brokerId, brokerEpochSupplier.get(), assignment)), new AssignReplicasToDirsRequestCompletionHandler()); + inflight.values().stream() + .filter(assignmentEvent -> assignmentEvent.callback != null) + .forEach(assignmentEvent -> assignmentEvent.callback.accept(DirectoryEventRequestState.DISPATCHED)); Review Comment: I don't think we need the state in the callback. It makes sense to distinguish between the 3 states, but I don't see why we callback should fire for anything but `COMPLETED`. ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -40,6 +44,8 @@ class ReplicaAlterLogDirsThread(name: String, isInterruptible = false, brokerTopicStats) { + private val assignmentRequestStates: ConcurrentHashMap[TopicPartition, DirectoryEventRequestState] = new ConcurrentHashMap() Review Comment: Don't we also need to consider this state when the reassignment is cancelled? ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") + + partitionRequestState match { + case None => + // Schedule assignment request and don't promote the future replica yet until the controller accepted the request. + partition.maybeFutureReplicaCaughtUp(_ => { + partition.futureReplicaDirectoryId() + .map { + directoryEventHandler.handleAssignment(new TopicIdPartition(topicId.get, topicPartition.partition()), _, + updatedAssignmentRequestStat(topicPartition)(_)) + } + }) + case Some(DirectoryEventRequestState.COMPLETED) => + // Promote future replica if controller accepted the request and the replica caught-up with the original log. + if (partition.maybeReplaceCurrentWithFutureReplica()) { + removePartitions(Set(topicPartition)) + assignmentRequestStates.remove(topicPartition) + } + case _ => Review Comment: It seems we don't actually care about `DISPATCHED`, maybe we don't need that state. ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") + + partitionRequestState match { + case None => + // Schedule assignment request and don't promote the future replica yet until the controller accepted the request. Review Comment: ```suggestion // Schedule assignment request and don't promote the future replica yet until the controller has accepted the request. ``` ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { Review Comment: ```suggestion def updatedAssignmentRequestState(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { ``` ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") Review Comment: ```suggestion throw new IllegalStateException(s"Topic ${topicPartition.topic()} does not have an ID") ``` ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } Review Comment: Perhaps we make DirectoryEventHandler.NOOP immediately callback with success so we don't need this extra logic. ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") + + partitionRequestState match { + case None => + // Schedule assignment request and don't promote the future replica yet until the controller accepted the request. + partition.maybeFutureReplicaCaughtUp(_ => { + partition.futureReplicaDirectoryId() + .map { + directoryEventHandler.handleAssignment(new TopicIdPartition(topicId.get, topicPartition.partition()), _, + updatedAssignmentRequestStat(topicPartition)(_)) + } + }) + case Some(DirectoryEventRequestState.COMPLETED) => + // Promote future replica if controller accepted the request and the replica caught-up with the original log. + if (partition.maybeReplaceCurrentWithFutureReplica()) { + removePartitions(Set(topicPartition)) + assignmentRequestStates.remove(topicPartition) + } + case _ => + log.info("Waiting for AssignmentRequest to succeed before promoting the future replica.") Review Comment: ```suggestion log.trace("Waiting for AssignmentRequest to succeed before promoting the future replica.") ``` ########## core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala: ########## @@ -76,13 +82,49 @@ class ReplicaAlterLogDirsThread(name: String, futureLog.updateHighWatermark(partitionData.highWatermark) futureLog.maybeIncrementLogStartOffset(partitionData.logStartOffset, LogStartOffsetIncrementReason.LeaderOffsetIncremented) - if (partition.maybeReplaceCurrentWithFutureReplica()) - removePartitions(Set(topicPartition)) + directoryEventHandler match { + case DirectoryEventHandler.NOOP => + if (partition.maybeReplaceCurrentWithFutureReplica()) + removePartitions(Set(topicPartition)) + case _ => + maybePromoteFutureReplica(topicPartition, partition) + } quota.record(records.sizeInBytes) logAppendInfo } + // Visible for testing + def updatedAssignmentRequestStat(topicPartition: TopicPartition)(state: DirectoryEventRequestState): Unit = { + assignmentRequestStates.put(topicPartition, state) + } + private def maybePromoteFutureReplica(topicPartition: TopicPartition, partition: Partition) = { + val partitionRequestState = Option(assignmentRequestStates.get(topicPartition)) + val topicId = partition.topicId + if (topicId.isEmpty) + throw new IllegalStateException(s"Topic ${topicPartition.topic()} exists but its ID doesn't exist.") + + partitionRequestState match { + case None => + // Schedule assignment request and don't promote the future replica yet until the controller accepted the request. + partition.maybeFutureReplicaCaughtUp(_ => { + partition.futureReplicaDirectoryId() + .map { + directoryEventHandler.handleAssignment(new TopicIdPartition(topicId.get, topicPartition.partition()), _, + updatedAssignmentRequestStat(topicPartition)(_)) Review Comment: I think we can just update the state to `QUEUED` after the ` directoryEventHandler.handleAssignment()` call. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org