mimaison commented on code in PR #19762: URL: https://github.com/apache/kafka/pull/19762#discussion_r2140129567
########## raft/src/main/java/org/apache/kafka/raft/KafkaRaftLog.java: ########## @@ -0,0 +1,843 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.raft; + +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.errors.CorruptRecordException; +import org.apache.kafka.common.errors.InvalidConfigurationException; +import org.apache.kafka.common.record.MemoryRecords; +import org.apache.kafka.common.record.Records; +import org.apache.kafka.common.utils.LogContext; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.common.OffsetAndEpoch; +import org.apache.kafka.server.config.ServerLogConfigs; +import org.apache.kafka.server.storage.log.FetchIsolation; +import org.apache.kafka.server.util.Scheduler; +import org.apache.kafka.snapshot.FileRawSnapshotReader; +import org.apache.kafka.snapshot.FileRawSnapshotWriter; +import org.apache.kafka.snapshot.NotifyingRawSnapshotWriter; +import org.apache.kafka.snapshot.RawSnapshotReader; +import org.apache.kafka.snapshot.RawSnapshotWriter; +import org.apache.kafka.snapshot.SnapshotPath; +import org.apache.kafka.snapshot.Snapshots; +import org.apache.kafka.storage.internals.log.AppendOrigin; +import org.apache.kafka.storage.internals.log.FetchDataInfo; +import org.apache.kafka.storage.internals.log.LogConfig; +import org.apache.kafka.storage.internals.log.LogDirFailureChannel; +import org.apache.kafka.storage.internals.log.LogStartOffsetIncrementReason; +import org.apache.kafka.storage.internals.log.ProducerStateManagerConfig; +import org.apache.kafka.storage.internals.log.UnifiedLog; +import org.apache.kafka.storage.log.metrics.BrokerTopicStats; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Properties; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +public class KafkaRaftLog implements ReplicatedLog { + + private static final Logger LOG = LoggerFactory.getLogger(KafkaRaftLog.class); + + private final Logger logger; + private final UnifiedLog log; + private final Time time; + private final Scheduler scheduler; + // Access to this object needs to be synchronized because it is used by the snapshotting thread to notify the + // polling thread when snapshots are created. This object is also used to store any opened snapshot reader. + private final TreeMap<OffsetAndEpoch, Optional<FileRawSnapshotReader>> snapshots; + private final TopicPartition topicPartition; + private final MetadataLogConfig config; + private final String logIdent; + + public static KafkaRaftLog createLog( + TopicPartition topicPartition, + Uuid topicId, + File dataDir, + Time time, + Scheduler scheduler, + MetadataLogConfig config, + int nodeId) throws IOException { + Properties props = new Properties(); + props.setProperty(TopicConfig.MAX_MESSAGE_BYTES_CONFIG, String.valueOf(config.maxBatchSizeInBytes())); + props.setProperty(TopicConfig.SEGMENT_BYTES_CONFIG, String.valueOf(config.logSegmentBytes())); + props.setProperty(TopicConfig.SEGMENT_MS_CONFIG, String.valueOf(config.logSegmentMillis())); + props.setProperty(TopicConfig.FILE_DELETE_DELAY_MS_CONFIG, String.valueOf(ServerLogConfigs.LOG_DELETE_DELAY_MS_DEFAULT)); + + // Disable time and byte retention when deleting segments + props.setProperty(TopicConfig.RETENTION_MS_CONFIG, "-1"); + props.setProperty(TopicConfig.RETENTION_BYTES_CONFIG, "-1"); + LogConfig.validate(props); + LogConfig defaultLogConfig = new LogConfig(props); + + if (config.logSegmentBytes() < config.logSegmentMinBytes()) { + throw new InvalidConfigurationException( + "Cannot set " + MetadataLogConfig.METADATA_LOG_SEGMENT_BYTES_CONFIG + " below " + config.logSegmentMinBytes() + ": " + config.logSegmentBytes() + ); + } else if (defaultLogConfig.retentionMs >= 0) { + throw new InvalidConfigurationException( + "Cannot set " + TopicConfig.RETENTION_MS_CONFIG + " above -1: " + defaultLogConfig.retentionMs + ); + } else if (defaultLogConfig.retentionSize >= 0) { + throw new InvalidConfigurationException( + "Cannot set " + TopicConfig.RETENTION_BYTES_CONFIG + " above -1: " + defaultLogConfig.retentionSize + ); + } + + UnifiedLog log = UnifiedLog.create( + dataDir, + defaultLogConfig, + 0L, + 0L, + scheduler, + new BrokerTopicStats(), + time, + Integer.MAX_VALUE, + new ProducerStateManagerConfig(Integer.MAX_VALUE, false), + Integer.MAX_VALUE, + new LogDirFailureChannel(5), + false, + Optional.of(topicId) + ); + + KafkaRaftLog metadataLog = new KafkaRaftLog( + log, + time, + scheduler, + recoverSnapshots(log), + topicPartition, + config, + nodeId + ); + + // Print a warning if users have overridden the internal config + if (config.logSegmentMinBytes() != KafkaRaftClient.MAX_BATCH_SIZE_BYTES) { + metadataLog.logger.error("Overriding " + MetadataLogConfig.METADATA_LOG_SEGMENT_MIN_BYTES_CONFIG + + " is only supported for testing. Setting this value too low may lead to an inability to write batches of metadata records."); + } + + // When recovering, truncate fully if the latest snapshot is after the log end offset. This can happen to a follower + // when the follower crashes after downloading a snapshot from the leader but before it could truncate the log fully. + metadataLog.truncateToLatestSnapshot(); + + return metadataLog; + } + + public KafkaRaftLog( + UnifiedLog log, + Time time, + Scheduler scheduler, + // Access to this object needs to be synchronized because it is used by the snapshotting thread to notify the + // polling thread when snapshots are created. This object is also used to store any opened snapshot reader. + TreeMap<OffsetAndEpoch, Optional<FileRawSnapshotReader>> snapshots, + TopicPartition topicPartition, + MetadataLogConfig config, + int nodeId) { + this.log = log; + this.time = time; + this.scheduler = scheduler; + this.snapshots = snapshots; + this.topicPartition = topicPartition; + this.config = config; + this.logIdent = "[MetadataLog partition=" + topicPartition + ", nodeId=" + nodeId + "] "; + this.logger = new LogContext(logIdent).logger(KafkaRaftLog.class); + } + + // for testing + UnifiedLog log() { + return log; + } + + @Override + public LogFetchInfo read(long startOffset, Isolation readIsolation) { + FetchIsolation isolation = switch (readIsolation) { + case COMMITTED -> FetchIsolation.HIGH_WATERMARK; + case UNCOMMITTED -> FetchIsolation.LOG_END; + }; + + try { + FetchDataInfo fetchInfo = log.read(startOffset, config.maxFetchSizeInBytes(), isolation, true); + return new LogFetchInfo( + fetchInfo.records, + new LogOffsetMetadata( + fetchInfo.fetchOffsetMetadata.messageOffset, + Optional.of(new SegmentPosition( + fetchInfo.fetchOffsetMetadata.segmentBaseOffset, + fetchInfo.fetchOffsetMetadata.relativePositionInSegment)) + ) + ); + } catch (IOException ioe) { + throw new UncheckedIOException(ioe); Review Comment: The reason I used `UncheckedIOException` is that none of the method definitions in `RaftLog` are marked as throwing exceptions. None of the calling logic, mostly in `KafkaRaftClient`, has logic to handle checked exceptions. So any exception throw will be passed back up the call stack to `KafkaRadftClientDriver.doWork()` that has catches `Throwable`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org