C0urante commented on code in PR #13178: URL: https://github.com/apache/kafka/pull/13178#discussion_r1104643146
########## connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSyncStore.java: ########## @@ -16,40 +16,76 @@ */ package org.apache.kafka.connect.mirror; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.utils.Time; import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.connect.util.KafkaBasedLog; +import org.apache.kafka.connect.util.TopicAdmin; import java.util.Map; import java.util.HashMap; -import java.util.Collections; import java.time.Duration; import java.util.Optional; import java.util.OptionalLong; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; /** Used internally by MirrorMaker. Stores offset syncs and performs offset translation. */ class OffsetSyncStore implements AutoCloseable { - private final KafkaConsumer<byte[], byte[]> consumer; + private final KafkaBasedLog<byte[], byte[]> backingStore; private final Map<TopicPartition, OffsetSync> offsetSyncs = new HashMap<>(); - private final TopicPartition offsetSyncTopicPartition; + private final TopicAdmin admin; OffsetSyncStore(MirrorCheckpointConfig config) { - consumer = new KafkaConsumer<>(config.offsetSyncsTopicConsumerConfig(), - new ByteArrayDeserializer(), new ByteArrayDeserializer()); - offsetSyncTopicPartition = new TopicPartition(config.offsetSyncsTopic(), 0); Review Comment: This leads to a change in behavior since we'll end up consuming from all partitions in the offset syncs topic instead of just partition 0. We intentionally [write every offset sync to partition zero](https://github.com/apache/kafka/blob/8cfafba2794562840b0f1c537e304f084b9359cf/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorSourceTask.java#L249) and [create the topic with a single partition](https://github.com/apache/kafka/blob/8cfafba2794562840b0f1c537e304f084b9359cf/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorSourceConnector.java#L361), but the topic may have been created out-of-band and there may be other information in it which has not been produced by MM2 that we shouldn't consume. Could we expand the `KafkaBasedLog` API to support reading from a specific subset of the partitions for a topic, possibly by adding a `protected List<TopicPartitions> assignedPartitions(List<PartitionInfo> partitionInfos)` method that can be overridden by subclasses? This would allow us to completely preserve the existing behavior. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org