TanYuxin-tyx commented on code in PR #22833: URL: https://github.com/apache/flink/pull/22833#discussion_r1241087057
########## flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/hybrid/tiered/storage/SortBufferAccumulator.java: ########## @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage; + +import org.apache.flink.core.memory.MemorySegment; +import org.apache.flink.runtime.io.network.buffer.Buffer; +import org.apache.flink.runtime.io.network.buffer.BufferBuilder; +import org.apache.flink.runtime.io.network.buffer.BufferRecycler; +import org.apache.flink.runtime.io.network.buffer.NetworkBuffer; +import org.apache.flink.runtime.io.network.partition.hybrid.tiered.common.TieredStorageSubpartitionId; + +import org.apache.commons.lang3.tuple.Pair; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.function.BiConsumer; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * The sort-based implementation of the {@link BufferAccumulator}. The {@link BufferAccumulator} + * receives the records from {@link TieredStorageProducerClient} and the records will accumulate and + * transform to finished buffers. The accumulated buffers will be transferred to the corresponding + * tier dynamically. + * + * <p>The {@link BufferAccumulator} can help use less buffers to accumulate data, which decouples + * the buffer usage with the number of parallelism. The number of buffers used by the {@link + * SortBufferAccumulator} will be numBuffers at most. Once the {@link SortBufferContainer} is full, + * or receiving a different type of buffer, or receiving the end-of-partition event, the buffer in + * the sort buffer container will be flushed to the tiers. + * + * <p>Note that this class need not be thread-safe, because it should only be accessed from the main + * thread. + */ +public class SortBufferAccumulator implements BufferAccumulator { + + /** The number of the subpartitions. */ + private final int numSubpartitions; + + /** The total number of the buffers used by the {@link SortBufferAccumulator}. */ + private final int numBuffers; + + /** The byte size of one single buffer. */ + private final int bufferSizeBytes; + + /** The empty buffers without storing data. */ + private final LinkedList<MemorySegment> freeSegments = new LinkedList<>(); + + /** The memory manager of the tiered storage. */ + private final TieredStorageMemoryManager storeMemoryManager; + + /** The number of buffers for sorting used in the {@link SortBufferContainer}. */ + private int numBuffersForSort; + + /** + * The {@link SortBufferContainer} for accumulating broadcast data. Note that this can be null + * before using it to store records, and this buffer container will be released once flushed. + */ + @Nullable private SortBufferContainer broadcastDataBuffer; + + /** + * The {@link SortBufferContainer} for accumulating non-broadcast data. Note that this can be + * null before using it to store records, and this buffer container will be released once + * flushed. + */ + @Nullable private SortBufferContainer unicastDataBuffer; + + /** + * The buffer recycler. Note that this can be null before requesting buffers from the memory + * manager. + */ + @Nullable private BufferRecycler bufferRecycler; + + /** + * The {@link SortBufferAccumulator}'s accumulated buffer flusher is not prepared during + * construction, requiring the field to be initialized during setup. Therefore, it is necessary + * to verify whether this field is null before using it. + */ + @Nullable + private BiConsumer<TieredStorageSubpartitionId, List<Buffer>> accumulatedBufferFlusher; + + public SortBufferAccumulator( + int numSubpartitions, + int numBuffers, + int bufferSizeBytes, + TieredStorageMemoryManager storeMemoryManager) { + this.numSubpartitions = numSubpartitions; + this.bufferSizeBytes = bufferSizeBytes; + this.numBuffers = numBuffers; + this.storeMemoryManager = storeMemoryManager; + } + + @Override + public void setup(BiConsumer<TieredStorageSubpartitionId, List<Buffer>> bufferFlusher) { + this.accumulatedBufferFlusher = bufferFlusher; + } + + @Override + public void receive( + ByteBuffer record, + TieredStorageSubpartitionId subpartitionId, + Buffer.DataType dataType, + boolean isBroadcast) + throws IOException { + int targetSubpartition = subpartitionId.getSubpartitionId(); + SortBufferContainer sortBufferContainer = + isBroadcast ? getBroadcastDataBuffer() : getUnicastDataBuffer(); + if (!sortBufferContainer.writeRecord(record, targetSubpartition, dataType)) { + return; + } + + if (!sortBufferContainer.hasRemaining()) { + sortBufferContainer.release(); + writeLargeRecord(record, targetSubpartition, dataType); + return; + } + + flushDataBuffer(sortBufferContainer); + sortBufferContainer.release(); + if (record.hasRemaining()) { Review Comment: yes, replace this with a `checkState`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
