kowshik commented on a change in pull request #10579: URL: https://github.com/apache/kafka/pull/10579#discussion_r643693257
########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadata) throws KafkaException { + ensureNotClosed(); + + int metadataPartitionNo = topicPartitioner.metadataPartition(topicIdPartition); + log.debug("Publishing metadata message of partition:[{}] into metadata topic partition:[{}] with payload: [{}]", + topicIdPartition, metadataPartitionNo, remoteLogMetadata); + + ProducerCallback callback = new ProducerCallback(); + try { + if (metadataPartitionNo >= rlmmConfig.metadataTopicPartitionsCount()) { + // This should never occur as long as metadata partitions always remain the same. + throw new KafkaException("Chosen partition no " + metadataPartitionNo + + " is more than the partition count: " + rlmmConfig.metadataTopicPartitionsCount()); + } + producer.send(new ProducerRecord<>(rlmmConfig.remoteLogMetadataTopicName(), metadataPartitionNo, null, + serde.serialize(remoteLogMetadata)), callback).get(); + } catch (KafkaException e) { + throw e; + } catch (Exception e) { + throw new KafkaException("Exception occurred while publishing message for topicIdPartition: " + topicIdPartition, e); + } + + if (callback.exception() != null) { + Exception ex = callback.exception(); + if (ex instanceof KafkaException) { + throw (KafkaException) ex; + } else { + throw new KafkaException(ex); + } + } else { + return callback.recordMetadata(); + } + } + + private void ensureNotClosed() { + if (close) { + throw new IllegalStateException("This instance is already set in close state."); Review comment: s/set in close state/closed ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadata) throws KafkaException { Review comment: It appears you could eliminate the additional `topicIdPartition` parameter and instead use the value returned by `remoteLogMetadata.topicIdPartition()` API. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; Review comment: It seems that we have internal topics specified in `org.apache.kafka.common.internals.Topic` class. Don't we want this new internal topic to be defined in the `Topic` class, together with other internal topics? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP = "remote.log.metadata.topic.replication.factor"; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP = "remote.log.metadata.topic.num.partitions"; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP = "remote.log.metadata.topic.retention.ms"; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP = "remote.log.metadata.publish.wait.ms"; + + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS = 50; + public static final long DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS = -1L; + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR = 3; + public static final long DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS = 60 * 1000L; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC = "Replication factor of remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC = "The number of partitions for remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC = "Remote log metadata topic log retention in milli seconds." + + "Default: -1, that means unlimited. Users can configure this value based on their use cases. " + + "To avoid any data loss, this value should be more than the maximum retention period of any topic enabled with " + + "tiered storage in the cluster."; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC = "The amount of time in milli seconds to wait for the local consumer to " + Review comment: Is this the timeout for how long we want the client to wait to consume the message that it produces to `__remote_log_metadata` topic? If yes, then don't we want this timeout to be unlimited i.e. we wait as long as it takes to consume the published event? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; Review comment: Can you pls document the state does this boolean represents? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerTask.java ########## @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_NAME; + +/** + * This class is responsible for consuming messages from remote log metadata topic ({@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}) + * partitions and maintain the state of the remote log segment metadata. It gives an API to add or remove + * for what topic partition's metadata should be consumed by this instance using + * {{@link #addAssignmentsForPartitions(Set)}} and {@link #removeAssignmentsForPartitions(Set)} respectively. + * <p> + * When a broker is started, controller sends topic partitions that this broker is leader or follower for and the + * partitions to be deleted. This class receives those notifications with + * {@link #addAssignmentsForPartitions(Set)} and {@link #removeAssignmentsForPartitions(Set)} assigns consumer for the + * respective remote log metadata partitions by using {@link RemoteLogMetadataTopicPartitioner#metadataPartition(TopicIdPartition)}. + * Any leadership changes later are called through the same API. We will remove the partitions that are deleted from + * this broker which are received through {@link #removeAssignmentsForPartitions(Set)}. + * <p> + * After receiving these events it invokes {@link RemotePartitionMetadataEventHandler#handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata)}, + * which maintains in-memory representation of the state of {@link RemoteLogSegmentMetadata}. + */ +class ConsumerTask implements Runnable, Closeable { + private static final Logger log = LoggerFactory.getLogger(ConsumerTask.class); + + private static final long POLL_INTERVAL_MS = 30L; + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaConsumer<byte[], byte[]> consumer; + private final RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + + private volatile boolean close = false; + private volatile boolean assignPartitions = false; + + private final Object assignPartitionsLock = new Object(); + + // Remote log metadata topic partitions that consumer is assigned to. + private volatile Set<Integer> assignedMetaPartitions = Collections.emptySet(); + + // User topic partitions that this broker is a leader/follower for. + private Set<TopicIdPartition> assignedTopicPartitions = Collections.emptySet(); + + // Map of remote log metadata topic partition to consumed offsets. + private final Map<Integer, Long> partitionToConsumedOffsets = new ConcurrentHashMap<>(); + + public ConsumerTask(KafkaConsumer<byte[], byte[]> consumer, + RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler, + RemoteLogMetadataTopicPartitioner topicPartitioner) { + this.consumer = consumer; + this.remotePartitionMetadataEventHandler = remotePartitionMetadataEventHandler; + this.topicPartitioner = topicPartitioner; + } + + @Override + public void run() { + log.info("Started Consumer task thread."); + try { + while (!close) { + Set<Integer> assignedMetaPartitionsSnapshot = maybeWaitForPartitionsAssignment(); + + if (!assignedMetaPartitionsSnapshot.isEmpty()) { + executeReassignment(assignedMetaPartitionsSnapshot); + } + + log.info("Polling consumer to receive remote log metadata topic records"); + ConsumerRecords<byte[], byte[]> consumerRecords + = consumer.poll(Duration.ofSeconds(POLL_INTERVAL_MS)); + for (ConsumerRecord<byte[], byte[]> record : consumerRecords) { Review comment: Is it useful to assert that `record.key()` is empty before the key is ignored below? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataTopicPartitioner.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; + +public class RemoteLogMetadataTopicPartitioner { + public static final Logger log = LoggerFactory.getLogger(RemoteLogMetadataTopicPartitioner.class); + private final int noOfMetadataTopicPartitions; + + public RemoteLogMetadataTopicPartitioner(int noOfMetadataTopicPartitions) { Review comment: s/noOfMetadataTopicPartitions/numMetadataTopicPartitions ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemotePartitionMetadataStore.java ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteState; +import org.apache.kafka.server.log.remote.storage.RemoteResourceNotFoundException; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +public class RemotePartitionMetadataStore extends RemotePartitionMetadataEventHandler implements Closeable { + private static final Logger log = LoggerFactory.getLogger(RemotePartitionMetadataStore.class); + + private Map<TopicIdPartition, RemotePartitionDeleteMetadata> idToPartitionDeleteMetadata = Review comment: It seems these 2 attributes can be marked final if you call `Map.clear()` in `close()` instead of replacing the reference. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { Review comment: It appears this class does not have unit tests currently. Is there a plan to add unit tests? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { Review comment: Can we add a comment for this class please? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadata) throws KafkaException { + ensureNotClosed(); + + int metadataPartitionNo = topicPartitioner.metadataPartition(topicIdPartition); Review comment: s/metadataPartitionNo/metadataPartition ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataTopicPartitioner.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; + +public class RemoteLogMetadataTopicPartitioner { + public static final Logger log = LoggerFactory.getLogger(RemoteLogMetadataTopicPartitioner.class); + private final int noOfMetadataTopicPartitions; + + public RemoteLogMetadataTopicPartitioner(int noOfMetadataTopicPartitions) { + this.noOfMetadataTopicPartitions = noOfMetadataTopicPartitions; + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + Objects.requireNonNull(topicIdPartition, "TopicPartition can not be null"); + + int partitionNo = Utils.toPositive(Utils.murmur2(toBytes(topicIdPartition))) % noOfMetadataTopicPartitions; Review comment: s/partitionNo/partition ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP = "remote.log.metadata.topic.replication.factor"; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP = "remote.log.metadata.topic.num.partitions"; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP = "remote.log.metadata.topic.retention.ms"; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP = "remote.log.metadata.publish.wait.ms"; + + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS = 50; + public static final long DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS = -1L; + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR = 3; + public static final long DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS = 60 * 1000L; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC = "Replication factor of remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC = "The number of partitions for remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC = "Remote log metadata topic log retention in milli seconds." + + "Default: -1, that means unlimited. Users can configure this value based on their use cases. " + + "To avoid any data loss, this value should be more than the maximum retention period of any topic enabled with " + + "tiered storage in the cluster."; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC = "The amount of time in milli seconds to wait for the local consumer to " + + "receive the published event."; + + public static final String REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX = "remote.log.metadata.common.client."; + public static final String REMOTE_LOG_METADATA_PRODUCER_PREFIX = "remote.log.metadata.producer."; + public static final String REMOTE_LOG_METADATA_CONSUMER_PREFIX = "remote.log.metadata.consumer."; + + private static final String REMOTE_LOG_METADATA_CLIENT_PREFIX = "__remote_log_metadata_client"; + private static final String BROKER_ID = "broker.id"; + + private static final ConfigDef CONFIG = new ConfigDef(); + static { + CONFIG.define(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS, LOW, + REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC) + .define(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS, atLeast(0), LOW, + REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC); + } + + private final String clientIdPrefix; + private final int metadataTopicPartitionsCount; + private final String bootstrapServers; + private final long consumeWaitMs; + private final long metadataTopicRetentionMillis; Review comment: s/Millis/Ms ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP = "remote.log.metadata.topic.replication.factor"; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP = "remote.log.metadata.topic.num.partitions"; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP = "remote.log.metadata.topic.retention.ms"; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP = "remote.log.metadata.publish.wait.ms"; + + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS = 50; + public static final long DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS = -1L; + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR = 3; + public static final long DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS = 60 * 1000L; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC = "Replication factor of remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC = "The number of partitions for remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC = "Remote log metadata topic log retention in milli seconds." + + "Default: -1, that means unlimited. Users can configure this value based on their use cases. " + + "To avoid any data loss, this value should be more than the maximum retention period of any topic enabled with " + + "tiered storage in the cluster."; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC = "The amount of time in milli seconds to wait for the local consumer to " + + "receive the published event."; + + public static final String REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX = "remote.log.metadata.common.client."; + public static final String REMOTE_LOG_METADATA_PRODUCER_PREFIX = "remote.log.metadata.producer."; + public static final String REMOTE_LOG_METADATA_CONSUMER_PREFIX = "remote.log.metadata.consumer."; + + private static final String REMOTE_LOG_METADATA_CLIENT_PREFIX = "__remote_log_metadata_client"; + private static final String BROKER_ID = "broker.id"; + + private static final ConfigDef CONFIG = new ConfigDef(); + static { + CONFIG.define(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS, LOW, + REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC) + .define(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS, atLeast(0), LOW, + REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC); + } + + private final String clientIdPrefix; + private final int metadataTopicPartitionsCount; + private final String bootstrapServers; + private final long consumeWaitMs; + private final long metadataTopicRetentionMillis; + + private Map<String, Object> consumerProps; + private Map<String, Object> producerProps; + + public TopicBasedRemoteLogMetadataManagerConfig(Map<String, ?> props) { + log.info("Received props: [{}]", props); + Objects.requireNonNull(props, "props can not be null"); + + Map<String, Object> parsedConfigs = CONFIG.parse(props); + + bootstrapServers = (String) props.get(BOOTSTRAP_SERVERS_CONFIG); + if (bootstrapServers == null || bootstrapServers.isEmpty()) { + throw new IllegalArgumentException(BOOTSTRAP_SERVERS_CONFIG + " config must not be null or empty."); + } + + consumeWaitMs = (long) parsedConfigs.get(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP); + metadataTopicPartitionsCount = (int) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP); + metadataTopicRetentionMillis = (long) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP); + if (metadataTopicRetentionMillis != -1 && metadataTopicRetentionMillis <= 0) { + throw new IllegalArgumentException("Invalid metadata topic retention in millis: " + metadataTopicRetentionMillis); + } + + clientIdPrefix = REMOTE_LOG_METADATA_CLIENT_PREFIX + "_" + props.get(BROKER_ID); + + initializeProducerConsumerProperties(props); + } + + private void initializeProducerConsumerProperties(Map<String, ?> configs) { + Map<String, Object> commonClientConfigs = new HashMap<>(); + commonClientConfigs.put(BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + + Map<String, Object> producerOnlyConfigs = new HashMap<>(); + Map<String, Object> consumerOnlyConfigs = new HashMap<>(); + + for (Map.Entry<String, ?> entry : configs.entrySet()) { + String key = entry.getKey(); + if (key.startsWith(REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX)) { + commonClientConfigs.put(key.substring(REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(REMOTE_LOG_METADATA_PRODUCER_PREFIX)) { + producerOnlyConfigs.put(key.substring(REMOTE_LOG_METADATA_PRODUCER_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(REMOTE_LOG_METADATA_CONSUMER_PREFIX)) { + consumerOnlyConfigs.put(key.substring(REMOTE_LOG_METADATA_CONSUMER_PREFIX.length()), entry.getValue()); + } + } + + HashMap<String, Object> allProducerConfigs = new HashMap<>(commonClientConfigs); + allProducerConfigs.putAll(producerOnlyConfigs); + producerProps = createProducerProps(allProducerConfigs); + + HashMap<String, Object> allConsumerConfigs = new HashMap<>(commonClientConfigs); + allConsumerConfigs.putAll(consumerOnlyConfigs); + consumerProps = createConsumerProps(allConsumerConfigs); + } + + public String remoteLogMetadataTopicName() { + return REMOTE_LOG_METADATA_TOPIC_NAME; + } + + public int metadataTopicPartitionsCount() { + return metadataTopicPartitionsCount; + } + + public long consumeWaitMs() { + return consumeWaitMs; + } + + public Map<String, Object> consumerProperties() { + return consumerProps; + } + + public Map<String, Object> producerProperties() { + return producerProps; + } + + private Map<String, Object> createConsumerProps(HashMap<String, Object> allConsumerConfigs) { + Map<String, Object> props = new HashMap<>(allConsumerConfigs); + + props.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientIdPrefix + "_consumer"); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_CONFIG, false); Review comment: Hmm, why do we need this exclusion to be true? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadata) throws KafkaException { + ensureNotClosed(); + + int metadataPartitionNo = topicPartitioner.metadataPartition(topicIdPartition); + log.debug("Publishing metadata message of partition:[{}] into metadata topic partition:[{}] with payload: [{}]", + topicIdPartition, metadataPartitionNo, remoteLogMetadata); + + ProducerCallback callback = new ProducerCallback(); + try { + if (metadataPartitionNo >= rlmmConfig.metadataTopicPartitionsCount()) { + // This should never occur as long as metadata partitions always remain the same. + throw new KafkaException("Chosen partition no " + metadataPartitionNo + + " is more than the partition count: " + rlmmConfig.metadataTopicPartitionsCount()); + } + producer.send(new ProducerRecord<>(rlmmConfig.remoteLogMetadataTopicName(), metadataPartitionNo, null, + serde.serialize(remoteLogMetadata)), callback).get(); + } catch (KafkaException e) { + throw e; + } catch (Exception e) { + throw new KafkaException("Exception occurred while publishing message for topicIdPartition: " + topicIdPartition, e); + } + + if (callback.exception() != null) { + Exception ex = callback.exception(); + if (ex instanceof KafkaException) { + throw (KafkaException) ex; + } else { Review comment: For readability, it'll be useful to place the positive case under `if` and negative case under `else`, such as: ``` if (callback.exception() == null) { ... } else { ... } ``` ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP = "remote.log.metadata.topic.replication.factor"; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP = "remote.log.metadata.topic.num.partitions"; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP = "remote.log.metadata.topic.retention.ms"; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP = "remote.log.metadata.publish.wait.ms"; + + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS = 50; + public static final long DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS = -1L; + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR = 3; + public static final long DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS = 60 * 1000L; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC = "Replication factor of remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC = "The number of partitions for remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC = "Remote log metadata topic log retention in milli seconds." + + "Default: -1, that means unlimited. Users can configure this value based on their use cases. " + + "To avoid any data loss, this value should be more than the maximum retention period of any topic enabled with " + + "tiered storage in the cluster."; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC = "The amount of time in milli seconds to wait for the local consumer to " + + "receive the published event."; + + public static final String REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX = "remote.log.metadata.common.client."; + public static final String REMOTE_LOG_METADATA_PRODUCER_PREFIX = "remote.log.metadata.producer."; + public static final String REMOTE_LOG_METADATA_CONSUMER_PREFIX = "remote.log.metadata.consumer."; + + private static final String REMOTE_LOG_METADATA_CLIENT_PREFIX = "__remote_log_metadata_client"; + private static final String BROKER_ID = "broker.id"; + + private static final ConfigDef CONFIG = new ConfigDef(); + static { + CONFIG.define(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS, LOW, + REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC) + .define(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS, atLeast(0), LOW, + REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC); + } + + private final String clientIdPrefix; + private final int metadataTopicPartitionsCount; + private final String bootstrapServers; + private final long consumeWaitMs; + private final long metadataTopicRetentionMillis; + + private Map<String, Object> consumerProps; + private Map<String, Object> producerProps; + + public TopicBasedRemoteLogMetadataManagerConfig(Map<String, ?> props) { + log.info("Received props: [{}]", props); + Objects.requireNonNull(props, "props can not be null"); + + Map<String, Object> parsedConfigs = CONFIG.parse(props); + + bootstrapServers = (String) props.get(BOOTSTRAP_SERVERS_CONFIG); + if (bootstrapServers == null || bootstrapServers.isEmpty()) { + throw new IllegalArgumentException(BOOTSTRAP_SERVERS_CONFIG + " config must not be null or empty."); + } + + consumeWaitMs = (long) parsedConfigs.get(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP); + metadataTopicPartitionsCount = (int) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP); + metadataTopicRetentionMillis = (long) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP); + if (metadataTopicRetentionMillis != -1 && metadataTopicRetentionMillis <= 0) { + throw new IllegalArgumentException("Invalid metadata topic retention in millis: " + metadataTopicRetentionMillis); + } + + clientIdPrefix = REMOTE_LOG_METADATA_CLIENT_PREFIX + "_" + props.get(BROKER_ID); + + initializeProducerConsumerProperties(props); + } + + private void initializeProducerConsumerProperties(Map<String, ?> configs) { + Map<String, Object> commonClientConfigs = new HashMap<>(); + commonClientConfigs.put(BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + + Map<String, Object> producerOnlyConfigs = new HashMap<>(); + Map<String, Object> consumerOnlyConfigs = new HashMap<>(); + + for (Map.Entry<String, ?> entry : configs.entrySet()) { + String key = entry.getKey(); + if (key.startsWith(REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX)) { + commonClientConfigs.put(key.substring(REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(REMOTE_LOG_METADATA_PRODUCER_PREFIX)) { + producerOnlyConfigs.put(key.substring(REMOTE_LOG_METADATA_PRODUCER_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(REMOTE_LOG_METADATA_CONSUMER_PREFIX)) { + consumerOnlyConfigs.put(key.substring(REMOTE_LOG_METADATA_CONSUMER_PREFIX.length()), entry.getValue()); + } + } + + HashMap<String, Object> allProducerConfigs = new HashMap<>(commonClientConfigs); + allProducerConfigs.putAll(producerOnlyConfigs); + producerProps = createProducerProps(allProducerConfigs); + + HashMap<String, Object> allConsumerConfigs = new HashMap<>(commonClientConfigs); + allConsumerConfigs.putAll(consumerOnlyConfigs); + consumerProps = createConsumerProps(allConsumerConfigs); + } + + public String remoteLogMetadataTopicName() { + return REMOTE_LOG_METADATA_TOPIC_NAME; + } + + public int metadataTopicPartitionsCount() { + return metadataTopicPartitionsCount; + } + + public long consumeWaitMs() { + return consumeWaitMs; + } + + public Map<String, Object> consumerProperties() { + return consumerProps; + } + + public Map<String, Object> producerProperties() { + return producerProps; + } + + private Map<String, Object> createConsumerProps(HashMap<String, Object> allConsumerConfigs) { + Map<String, Object> props = new HashMap<>(allConsumerConfigs); + + props.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientIdPrefix + "_consumer"); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_CONFIG, false); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + return props; + } + + private Map<String, Object> createProducerProps(HashMap<String, Object> allProducerConfigs) { + Map<String, Object> props = new HashMap<>(allProducerConfigs); + + props.put(ProducerConfig.CLIENT_ID_CONFIG, clientIdPrefix + "_producer"); + props.put(ProducerConfig.ACKS_CONFIG, "all"); + props.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, 1); + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + + return Collections.unmodifiableMap(props); + } + + @Override + public String toString() { + return "TopicBasedRemoteLogMetadataManagerConfig{" + + "clientIdPrefix='" + clientIdPrefix + '\'' + + ", metadataTopicPartitionsCount=" + metadataTopicPartitionsCount + + ", bootstrapServers='" + bootstrapServers + '\'' + + ", consumeWaitMs=" + consumeWaitMs + + ", metadataTopicRetentionMillis=" + metadataTopicRetentionMillis + + ", consumerProps=" + consumerProps + Review comment: `consumerProps` and `producerProps` are of type `Map`, therefore the `.toString()` is probably not readable. So we need to convert these into a comma-separated list sth like `K1=V1,K2=V2,...Kn=Vn`. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataTopicPartitioner.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; + +public class RemoteLogMetadataTopicPartitioner { + public static final Logger log = LoggerFactory.getLogger(RemoteLogMetadataTopicPartitioner.class); + private final int noOfMetadataTopicPartitions; + + public RemoteLogMetadataTopicPartitioner(int noOfMetadataTopicPartitions) { + this.noOfMetadataTopicPartitions = noOfMetadataTopicPartitions; + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + Objects.requireNonNull(topicIdPartition, "TopicPartition can not be null"); + + int partitionNo = Utils.toPositive(Utils.murmur2(toBytes(topicIdPartition))) % noOfMetadataTopicPartitions; + log.debug("No of partitions [{}], partitionNo: [{}] for given topic: [{}]", noOfMetadataTopicPartitions, partitionNo, topicIdPartition); Review comment: s/No of.../Num of... Also it feels overkill to me to log this message for each call. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerTask.java ########## @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_NAME; + +/** + * This class is responsible for consuming messages from remote log metadata topic ({@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}) + * partitions and maintain the state of the remote log segment metadata. It gives an API to add or remove + * for what topic partition's metadata should be consumed by this instance using + * {{@link #addAssignmentsForPartitions(Set)}} and {@link #removeAssignmentsForPartitions(Set)} respectively. + * <p> + * When a broker is started, controller sends topic partitions that this broker is leader or follower for and the + * partitions to be deleted. This class receives those notifications with + * {@link #addAssignmentsForPartitions(Set)} and {@link #removeAssignmentsForPartitions(Set)} assigns consumer for the + * respective remote log metadata partitions by using {@link RemoteLogMetadataTopicPartitioner#metadataPartition(TopicIdPartition)}. + * Any leadership changes later are called through the same API. We will remove the partitions that are deleted from + * this broker which are received through {@link #removeAssignmentsForPartitions(Set)}. + * <p> + * After receiving these events it invokes {@link RemotePartitionMetadataEventHandler#handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata)}, + * which maintains in-memory representation of the state of {@link RemoteLogSegmentMetadata}. + */ +class ConsumerTask implements Runnable, Closeable { + private static final Logger log = LoggerFactory.getLogger(ConsumerTask.class); + + private static final long POLL_INTERVAL_MS = 30L; + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaConsumer<byte[], byte[]> consumer; + private final RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + + private volatile boolean close = false; + private volatile boolean assignPartitions = false; + + private final Object assignPartitionsLock = new Object(); Review comment: Do you really need this explicit lock? It seems you could just use `wait()` and `notify()` APIs on the `ConsumerTask` object instead, combined with `synchronized` keyword. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerTask.java ########## @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_NAME; + +/** + * This class is responsible for consuming messages from remote log metadata topic ({@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}) + * partitions and maintain the state of the remote log segment metadata. It gives an API to add or remove + * for what topic partition's metadata should be consumed by this instance using + * {{@link #addAssignmentsForPartitions(Set)}} and {@link #removeAssignmentsForPartitions(Set)} respectively. + * <p> + * When a broker is started, controller sends topic partitions that this broker is leader or follower for and the + * partitions to be deleted. This class receives those notifications with + * {@link #addAssignmentsForPartitions(Set)} and {@link #removeAssignmentsForPartitions(Set)} assigns consumer for the + * respective remote log metadata partitions by using {@link RemoteLogMetadataTopicPartitioner#metadataPartition(TopicIdPartition)}. + * Any leadership changes later are called through the same API. We will remove the partitions that are deleted from + * this broker which are received through {@link #removeAssignmentsForPartitions(Set)}. + * <p> + * After receiving these events it invokes {@link RemotePartitionMetadataEventHandler#handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata)}, + * which maintains in-memory representation of the state of {@link RemoteLogSegmentMetadata}. + */ +class ConsumerTask implements Runnable, Closeable { + private static final Logger log = LoggerFactory.getLogger(ConsumerTask.class); + + private static final long POLL_INTERVAL_MS = 30L; + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaConsumer<byte[], byte[]> consumer; + private final RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + + private volatile boolean close = false; + private volatile boolean assignPartitions = false; + + private final Object assignPartitionsLock = new Object(); + + // Remote log metadata topic partitions that consumer is assigned to. + private volatile Set<Integer> assignedMetaPartitions = Collections.emptySet(); + + // User topic partitions that this broker is a leader/follower for. + private Set<TopicIdPartition> assignedTopicPartitions = Collections.emptySet(); + + // Map of remote log metadata topic partition to consumed offsets. + private final Map<Integer, Long> partitionToConsumedOffsets = new ConcurrentHashMap<>(); + + public ConsumerTask(KafkaConsumer<byte[], byte[]> consumer, + RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler, + RemoteLogMetadataTopicPartitioner topicPartitioner) { + this.consumer = consumer; + this.remotePartitionMetadataEventHandler = remotePartitionMetadataEventHandler; + this.topicPartitioner = topicPartitioner; + } + + @Override + public void run() { + log.info("Started Consumer task thread."); + try { + while (!close) { + Set<Integer> assignedMetaPartitionsSnapshot = maybeWaitForPartitionsAssignment(); + + if (!assignedMetaPartitionsSnapshot.isEmpty()) { + executeReassignment(assignedMetaPartitionsSnapshot); + } + + log.info("Polling consumer to receive remote log metadata topic records"); + ConsumerRecords<byte[], byte[]> consumerRecords + = consumer.poll(Duration.ofSeconds(POLL_INTERVAL_MS)); + for (ConsumerRecord<byte[], byte[]> record : consumerRecords) { + handleRemoteLogMetadata(serde.deserialize(record.value())); + partitionToConsumedOffsets.put(record.partition(), record.offset()); + } + } + } catch (Exception e) { + log.error("Error occurred in consumer task, close:[{}]", close, e); + } + + closeConsumer(); Review comment: Hmm, should you be setting `close` to true here? (it depends on the meaning of `close`, which I don't fully understand....) ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemotePartitionMetadataStore.java ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteState; +import org.apache.kafka.server.log.remote.storage.RemoteResourceNotFoundException; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +public class RemotePartitionMetadataStore extends RemotePartitionMetadataEventHandler implements Closeable { + private static final Logger log = LoggerFactory.getLogger(RemotePartitionMetadataStore.class); + + private Map<TopicIdPartition, RemotePartitionDeleteMetadata> idToPartitionDeleteMetadata = + new ConcurrentHashMap<>(); + + private Map<TopicIdPartition, RemoteLogMetadataCache> idToRemoteLogMetadataCache = + new ConcurrentHashMap<>(); + + @Override + public void handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) { + log.debug("Adding remote log segment : [{}]", remoteLogSegmentMetadata); + + RemoteLogSegmentId remoteLogSegmentId = remoteLogSegmentMetadata.remoteLogSegmentId(); + + idToRemoteLogMetadataCache + .computeIfAbsent(remoteLogSegmentId.topicIdPartition(), id -> new RemoteLogMetadataCache()) + .addCopyInProgressSegment(remoteLogSegmentMetadata); + } + + @Override + public void handleRemoteLogSegmentMetadataUpdate(RemoteLogSegmentMetadataUpdate rlsmUpdate) { + log.debug("Updating remote log segment: [{}]", rlsmUpdate); + RemoteLogSegmentId remoteLogSegmentId = rlsmUpdate.remoteLogSegmentId(); + TopicIdPartition topicIdPartition = remoteLogSegmentId.topicIdPartition(); + RemoteLogMetadataCache remoteLogMetadataCache = idToRemoteLogMetadataCache.get(topicIdPartition); + if (remoteLogMetadataCache != null) { + try { + remoteLogMetadataCache.updateRemoteLogSegmentMetadata(rlsmUpdate); + } catch (RemoteResourceNotFoundException e) { + log.error("Error occurred while updating the remote logs segment."); Review comment: s/logs/log ? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerConfig.java ########## @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.common.config.ConfigDef.Importance.LOW; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; +import static org.apache.kafka.common.config.ConfigDef.Type.INT; +import static org.apache.kafka.common.config.ConfigDef.Type.LONG; + +public final class TopicBasedRemoteLogMetadataManagerConfig { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerConfig.class.getName()); + + public static final String REMOTE_LOG_METADATA_TOPIC_NAME = "__remote_log_metadata"; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP = "remote.log.metadata.topic.replication.factor"; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP = "remote.log.metadata.topic.num.partitions"; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP = "remote.log.metadata.topic.retention.ms"; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP = "remote.log.metadata.publish.wait.ms"; + + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS = 50; + public static final long DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS = -1L; + public static final int DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR = 3; + public static final long DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS = 60 * 1000L; + + public static final String REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC = "Replication factor of remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC = "The number of partitions for remote log metadata Topic."; + public static final String REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC = "Remote log metadata topic log retention in milli seconds." + + "Default: -1, that means unlimited. Users can configure this value based on their use cases. " + + "To avoid any data loss, this value should be more than the maximum retention period of any topic enabled with " + + "tiered storage in the cluster."; + public static final String REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC = "The amount of time in milli seconds to wait for the local consumer to " + + "receive the published event."; + + public static final String REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX = "remote.log.metadata.common.client."; + public static final String REMOTE_LOG_METADATA_PRODUCER_PREFIX = "remote.log.metadata.producer."; + public static final String REMOTE_LOG_METADATA_CONSUMER_PREFIX = "remote.log.metadata.consumer."; + + private static final String REMOTE_LOG_METADATA_CLIENT_PREFIX = "__remote_log_metadata_client"; + private static final String BROKER_ID = "broker.id"; + + private static final ConfigDef CONFIG = new ConfigDef(); + static { + CONFIG.define(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, INT, DEFAULT_REMOTE_LOG_METADATA_TOPIC_PARTITIONS, atLeast(1), LOW, + REMOTE_LOG_METADATA_TOPIC_PARTITIONS_DOC) + .define(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS, LOW, + REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_DOC) + .define(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP, LONG, DEFAULT_REMOTE_LOG_METADATA_CONSUME_WAIT_MS, atLeast(0), LOW, + REMOTE_LOG_METADATA_CONSUME_WAIT_MS_DOC); + } + + private final String clientIdPrefix; + private final int metadataTopicPartitionsCount; + private final String bootstrapServers; + private final long consumeWaitMs; + private final long metadataTopicRetentionMillis; + + private Map<String, Object> consumerProps; + private Map<String, Object> producerProps; + + public TopicBasedRemoteLogMetadataManagerConfig(Map<String, ?> props) { + log.info("Received props: [{}]", props); + Objects.requireNonNull(props, "props can not be null"); + + Map<String, Object> parsedConfigs = CONFIG.parse(props); + + bootstrapServers = (String) props.get(BOOTSTRAP_SERVERS_CONFIG); + if (bootstrapServers == null || bootstrapServers.isEmpty()) { + throw new IllegalArgumentException(BOOTSTRAP_SERVERS_CONFIG + " config must not be null or empty."); + } + + consumeWaitMs = (long) parsedConfigs.get(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP); + metadataTopicPartitionsCount = (int) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP); + metadataTopicRetentionMillis = (long) parsedConfigs.get(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP); + if (metadataTopicRetentionMillis != -1 && metadataTopicRetentionMillis <= 0) { + throw new IllegalArgumentException("Invalid metadata topic retention in millis: " + metadataTopicRetentionMillis); + } + + clientIdPrefix = REMOTE_LOG_METADATA_CLIENT_PREFIX + "_" + props.get(BROKER_ID); + + initializeProducerConsumerProperties(props); + } + + private void initializeProducerConsumerProperties(Map<String, ?> configs) { + Map<String, Object> commonClientConfigs = new HashMap<>(); + commonClientConfigs.put(BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + + Map<String, Object> producerOnlyConfigs = new HashMap<>(); + Map<String, Object> consumerOnlyConfigs = new HashMap<>(); + + for (Map.Entry<String, ?> entry : configs.entrySet()) { + String key = entry.getKey(); + if (key.startsWith(REMOTE_LOG_METADATA_COMMON_CLIENT_PREFIX)) { Review comment: I had the same question. It appears better to just duplicate the properties. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataTopicPartitioner.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; + +public class RemoteLogMetadataTopicPartitioner { + public static final Logger log = LoggerFactory.getLogger(RemoteLogMetadataTopicPartitioner.class); + private final int noOfMetadataTopicPartitions; + + public RemoteLogMetadataTopicPartitioner(int noOfMetadataTopicPartitions) { + this.noOfMetadataTopicPartitions = noOfMetadataTopicPartitions; + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + Objects.requireNonNull(topicIdPartition, "TopicPartition can not be null"); + + int partitionNo = Utils.toPositive(Utils.murmur2(toBytes(topicIdPartition))) % noOfMetadataTopicPartitions; + log.debug("No of partitions [{}], partitionNo: [{}] for given topic: [{}]", noOfMetadataTopicPartitions, partitionNo, topicIdPartition); + return partitionNo; + } + + private byte[] toBytes(TopicIdPartition topicIdPartition) { + // We do not want to depend upon hash code generation of Uuid as that may change. + int hash = Objects.hash(topicIdPartition.topicId().getLeastSignificantBits(), + topicIdPartition.topicId().getMostSignificantBits(), + topicIdPartition.topicPartition().topic(), Review comment: Is topic necessary here, when UUID and partition is already sufficient input for the hash? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerTask.java ########## @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_NAME; + +/** + * This class is responsible for consuming messages from remote log metadata topic ({@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}) + * partitions and maintain the state of the remote log segment metadata. It gives an API to add or remove + * for what topic partition's metadata should be consumed by this instance using + * {{@link #addAssignmentsForPartitions(Set)}} and {@link #removeAssignmentsForPartitions(Set)} respectively. + * <p> + * When a broker is started, controller sends topic partitions that this broker is leader or follower for and the + * partitions to be deleted. This class receives those notifications with + * {@link #addAssignmentsForPartitions(Set)} and {@link #removeAssignmentsForPartitions(Set)} assigns consumer for the + * respective remote log metadata partitions by using {@link RemoteLogMetadataTopicPartitioner#metadataPartition(TopicIdPartition)}. + * Any leadership changes later are called through the same API. We will remove the partitions that are deleted from + * this broker which are received through {@link #removeAssignmentsForPartitions(Set)}. + * <p> + * After receiving these events it invokes {@link RemotePartitionMetadataEventHandler#handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata)}, + * which maintains in-memory representation of the state of {@link RemoteLogSegmentMetadata}. + */ +class ConsumerTask implements Runnable, Closeable { + private static final Logger log = LoggerFactory.getLogger(ConsumerTask.class); + + private static final long POLL_INTERVAL_MS = 30L; + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaConsumer<byte[], byte[]> consumer; + private final RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + + private volatile boolean close = false; Review comment: Could you pls mention what state does this boolean represent? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + private volatile boolean configured = false; + + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean close = new AtomicBoolean(false); Review comment: Could you pls add a comment on what state does `close` represent? ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerTask.java ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_NAME; + +/** + * This class is responsible for consuming messages from remote log metadata topic ({@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}) + * partitions and maintain the state of the remote log segment metadata. It gives an API to add or remove + * for what topic partition's metadata should be consumed by this instance using + * {{@link #addAssignmentsForPartitions(Set)}} and {@link #removeAssignmentsForPartitions(Set)} respectively. + * <p> + * When a broker is started, controller sends topic partitions that this broker is leader or follower for and the + * partitions to be deleted. This class receives those notifications with + * {@link #addAssignmentsForPartitions(Set)} and {@link #removeAssignmentsForPartitions(Set)} assigns consumer for the + * respective remote log metadata partitions by using {@link RemoteLogMetadataTopicPartitioner#metadataPartition(TopicIdPartition)}. + * Any leadership changes later are called through the same API. We will remove the partitions that ard deleted from + * this broker which are received through {@link #removeAssignmentsForPartitions(Set)}. + * <p> + * After receiving these events it invokes {@link RemotePartitionMetadataEventHandler#handleRemoteLogSegmentMetadata(RemoteLogSegmentMetadata)}, + * which maintains in-memory representation of the state of {@link RemoteLogSegmentMetadata}. + */ +class ConsumerTask implements Runnable, Closeable { + private static final Logger log = LoggerFactory.getLogger(ConsumerTask.class); + + private static final long POLL_INTERVAL_MS = 30L; + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaConsumer<byte[], byte[]> consumer; + private final RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + + private volatile boolean close = false; + private volatile boolean assignPartitions = false; + + private final Object assignPartitionsLock = new Object(); + + // Remote log metadata topic partitions that consumer is assigned to. + private volatile Set<Integer> assignedMetaPartitions = Collections.emptySet(); + + // User topic partitions that this broker is a leader/follower for. + private Set<TopicIdPartition> assignedTopicPartitions = Collections.emptySet(); + + // Map of remote log metadata topic partition to target end offsets to be consumed. + private final Map<Integer, Long> partitionToTargetEndOffsets = new ConcurrentHashMap<>(); + + // Map of remote log metadata topic partition to consumed offsets. + private final Map<Integer, Long> partitionToConsumedOffsets = new ConcurrentHashMap<>(); + + public ConsumerTask(KafkaConsumer<byte[], byte[]> consumer, + RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler, + RemoteLogMetadataTopicPartitioner topicPartitioner) { + this.consumer = consumer; + this.remotePartitionMetadataEventHandler = remotePartitionMetadataEventHandler; + this.topicPartitioner = topicPartitioner; + } + + @Override + public void run() { + log.info("Started Consumer task thread."); + try { + while (!close) { + Set<Integer> assignedMetaPartitionsSnapshot = maybeWaitForPartitionsAssignment(); + + if (!assignedMetaPartitionsSnapshot.isEmpty()) { + executeReassignment(assignedMetaPartitionsSnapshot); + } + + log.info("Polling consumer to receive remote log metadata topic records"); + ConsumerRecords<byte[], byte[]> consumerRecords + = consumer.poll(Duration.ofSeconds(POLL_INTERVAL_MS)); + for (ConsumerRecord<byte[], byte[]> record : consumerRecords) { + handleRemoteLogMetadata(serde.deserialize(record.value())); + partitionToConsumedOffsets.put(record.partition(), record.offset()); + } + + // Check whether messages are received till end offsets or not for the assigned metadata partitions. + if (!partitionToTargetEndOffsets.isEmpty()) { + for (Map.Entry<Integer, Long> entry : partitionToTargetEndOffsets.entrySet()) { + final Long offset = partitionToConsumedOffsets.getOrDefault(entry.getKey(), 0L); + if (offset >= entry.getValue()) { + partitionToTargetEndOffsets.remove(entry.getKey()); + } + } + } + } + } catch (Exception e) { + log.error("Error occurred in consumer task, close:[{}]", close, e); + } + + closeConsumer(); + log.info("Exiting from consumer task thread"); + } + + private void closeConsumer() { + log.info("Closing the consumer instance"); + if (consumer != null) { + try { + consumer.close(Duration.ofSeconds(30)); + } catch (Exception e) { + log.error("Error encountered while closing the consumer", e); + } + } + } + + private Set<Integer> maybeWaitForPartitionsAssignment() { + Set<Integer> assignedMetaPartitionsSnapshot = Collections.emptySet(); + synchronized (assignPartitionsLock) { + while (assignedMetaPartitions.isEmpty()) { + // If no partitions are assigned, wait until they are assigned. + log.info("Waiting for assigned remote log metadata partitions.."); + try { + assignPartitionsLock.wait(); + } catch (InterruptedException e) { + throw new KafkaException(e); + } + } + + if (assignPartitions) { + assignedMetaPartitionsSnapshot = new HashSet<>(assignedMetaPartitions); + assignPartitions = false; + } + } + return assignedMetaPartitionsSnapshot; + } + + private void handleRemoteLogMetadata(RemoteLogMetadata remoteLogMetadata) { + remotePartitionMetadataEventHandler.handleRemoteLogMetadata(remoteLogMetadata); + } + + private void executeReassignment(Set<Integer> assignedMetaPartitionsSnapshot) { + Set<TopicPartition> assignedMetaTopicPartitions = assignedMetaPartitionsSnapshot.stream() + .map(partitionNum -> new TopicPartition(REMOTE_LOG_METADATA_TOPIC_NAME, partitionNum)) + .collect(Collectors.toSet()); + log.info("Reassigning partitions to consumer task [{}]", assignedMetaTopicPartitions); + consumer.assign(assignedMetaTopicPartitions); + + log.debug("Fetching end offsets to consumer task [{}]", assignedMetaTopicPartitions); + Map<TopicPartition, Long> endOffsets; + while (true) { + try { + endOffsets = consumer.endOffsets(assignedMetaTopicPartitions, Duration.ofSeconds(30)); + break; + } catch (Exception e) { + // ignore exception + log.debug("Error encountered in fetching end offsets", e); + } + } + log.debug("Fetched end offsets to consumer task [{}]", endOffsets); + + for (Map.Entry<TopicPartition, Long> entry : endOffsets.entrySet()) { + if (entry.getValue() > 0) { + partitionToTargetEndOffsets.put(entry.getKey().partition(), entry.getValue()); + } + } + } + + public void addAssignmentsForPartitions(Set<TopicIdPartition> updatedPartitions) { + updateAssignmentsForPartitions(updatedPartitions, Collections.emptySet()); + } + + public void removeAssignmentsForPartitions(Set<TopicIdPartition> partitions) { + updateAssignmentsForPartitions(Collections.emptySet(), partitions); + } + + private void updateAssignmentsForPartitions(Set<TopicIdPartition> addedPartitions, + Set<TopicIdPartition> removedPartitions) { + log.info("Updating assignments for addedPartitions: {} and removedPartition: {}", addedPartitions, removedPartitions); + ensureNotClosed(); + + Objects.requireNonNull(addedPartitions, "addedPartitions must not be null"); + Objects.requireNonNull(removedPartitions, "removedPartitions must not be null"); + + if (addedPartitions.isEmpty() && removedPartitions.isEmpty()) { + return; + } + + synchronized (assignPartitionsLock) { + Set<TopicIdPartition> updatedReassignedPartitions = new HashSet<>(assignedTopicPartitions); + updatedReassignedPartitions.addAll(addedPartitions); + updatedReassignedPartitions.removeAll(removedPartitions); + Set<Integer> updatedAssignedMetaPartitions = new HashSet<>(); + for (TopicIdPartition tp : updatedReassignedPartitions) { + updatedAssignedMetaPartitions.add(topicPartitioner.metadataPartition(tp)); + } + + if (!updatedAssignedMetaPartitions.equals(assignedMetaPartitions)) { + assignedTopicPartitions = Collections.unmodifiableSet(updatedReassignedPartitions); + assignedMetaPartitions = Collections.unmodifiableSet(updatedAssignedMetaPartitions); Review comment: It appears that once this logic is implemented, there is probably no need to wait for `maybeWaitForPartitionsAssignment`. The reason is that whenever a partition is assigned, we will bootstrap the remote state by consuming from the beginning. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + // Take these as configs with the respective default values. + private static final long INITIALIZATION_RETRY_INTERVAL_MS = 30_000L; + + private volatile boolean configured = false; + + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean close = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + + private Thread initializationThread; + private Time time = Time.SYSTEM; + private ProducerManager producerManager; + private ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + ensureInitializedAndNotClosed(); + + try { + // Publish the message to the topic. + RecordMetadata recordMetadata = producerManager.publishMessage(topicIdPartition, + remoteLogMetadata); + // Wait until the consumer catches up with this offset. This will ensure read-after-write consistency + // semantics. + consumerManager.waitTillConsumptionCatchesUp(recordMetadata); Review comment: I agree with the question here. This can become more expensive than it seems. The alternative is to pursue an asynchronous notification model to improve the throughput. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org