[ https://issues.apache.org/jira/browse/HADOOP-19613?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18009295#comment-18009295 ]
ASF GitHub Bot commented on HADOOP-19613: ----------------------------------------- anujmodi2021 commented on code in PR #7801: URL: https://github.com/apache/hadoop/pull/7801#discussion_r2225893577 ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV1.java: ########## @@ -0,0 +1,612 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.concurrent.CountDownLatch; + +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.classification.VisibleForTesting; + +/** + * The Read Buffer Manager for Rest AbfsClient. + * V1 implementation of ReadBufferManager. + */ +final class ReadBufferManagerV1 extends ReadBufferManager { + + private static final int NUM_BUFFERS = 16; + private static final int NUM_THREADS = 8; + private static final int DEFAULT_THRESHOLD_AGE_MILLISECONDS = 3000; + + private Thread[] threads = new Thread[NUM_THREADS]; + private byte[][] buffers; + private static ReadBufferManagerV1 bufferManager; + + // hide instance constructor + private ReadBufferManagerV1() { + LOGGER.trace("Creating readbuffer manager with HADOOP-18546 patch"); + } + + /** + * Sets the read buffer manager configurations. + * @param readAheadBlockSize the size of the read-ahead block in bytes + */ + static void setReadBufferManagerConfigs(int readAheadBlockSize) { + if (bufferManager == null) { + LOGGER.debug( + "ReadBufferManagerV1 not initialized yet. Overriding readAheadBlockSize as {}", + readAheadBlockSize); + setReadAheadBlockSize(readAheadBlockSize); + setThresholdAgeMilliseconds(DEFAULT_THRESHOLD_AGE_MILLISECONDS); + } + } + + /** + * Returns the singleton instance of ReadBufferManagerV1. + * @return the singleton instance of ReadBufferManagerV1 + */ + static ReadBufferManagerV1 getBufferManager() { + if (bufferManager == null) { + LOCK.lock(); + try { + if (bufferManager == null) { + bufferManager = new ReadBufferManagerV1(); + bufferManager.init(); + } + } finally { + LOCK.unlock(); + } + } + return bufferManager; + } + + /** + * {@inheritDoc} + */ + @Override + void init() { + buffers = new byte[NUM_BUFFERS][]; + for (int i = 0; i < NUM_BUFFERS; i++) { + buffers[i] = new byte[getReadAheadBlockSize()]; // same buffers are reused. The byte array never goes back to GC + getFreeList().add(i); + } + for (int i = 0; i < NUM_THREADS; i++) { + Thread t = new Thread(new ReadBufferWorker(i, this)); + t.setDaemon(true); + threads[i] = t; + t.setName("ABFS-prefetch-" + i); + t.start(); + } + ReadBufferWorker.UNLEASH_WORKERS.countDown(); + } + + /** + * {@inheritDoc} + */ + @Override + public void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, final int requestedLength, + TracingContext tracingContext) { + if (LOGGER.isTraceEnabled()) { Review Comment: Not very sure but i think it will be computationally faster to check and not post the log if trace is anyway disabled. Also, this was always there in RBM as multiple threads hit this part and logs can grow really large. Therefore, decided to retain it. > ABFS: [ReadAheadV2] Refactor ReadBufferManager to isolate new code with the > current working code > ------------------------------------------------------------------------------------------------ > > Key: HADOOP-19613 > URL: https://issues.apache.org/jira/browse/HADOOP-19613 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.5.0, 3.4.1 > Reporter: Anuj Modi > Assignee: Anuj Modi > Priority: Major > Labels: pull-request-available > > Read Buffer Manager used today was introduced way back and has been stable > for quite a while. > Read Buffer Manager to be introduced as part of > https://issues.apache.org/jira/browse/HADOOP-19596 will introduce many > changes incrementally over time. While the development goes on and we are > able to fully stabilise the optimized version we need the current flow to be > functional and undisturbed. > This work item is to isolate that from new code by refactoring > ReadBufferManager class to have 2 different implementations with same public > interfaces: ReadBufferManagerV1 and ReadBufferManagerV2. > This will also introduce new configs that can be used to toggle between new > and old code. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org