bhattmanish98 commented on code in PR #7832:
URL: https://github.com/apache/hadoop/pull/7832#discussion_r2465910989
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java:
##########
@@ -128,13 +128,20 @@ public final class FileSystemConfigurations {
public static final long
DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120;
public static final boolean DEFAULT_ENABLE_READAHEAD = true;
- public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = false;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = true;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2_DYNAMIC_SCALING =
true;
public static final int DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE = -1;
- public static final int DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLIS =
3_000;
+ public static final int DEFAULT_READAHEAD_V2_CPU_MONITORING_INTERVAL_MILLIS
= 6_000;
+ public static final int DEFAULT_READAHEAD_V2_THREAD_POOL_UPSCALE_PERCENTAGE
= 20;
+ public static final int
DEFAULT_READAHEAD_V2_THREAD_POOL_DOWNSCALE_PERCENTAGE = 30;
Review Comment:
Same as above
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java:
##########
@@ -128,13 +128,20 @@ public final class FileSystemConfigurations {
public static final long
DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120;
public static final boolean DEFAULT_ENABLE_READAHEAD = true;
- public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = false;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = true;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2_DYNAMIC_SCALING =
true;
public static final int DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE = -1;
- public static final int DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLIS =
3_000;
+ public static final int DEFAULT_READAHEAD_V2_CPU_MONITORING_INTERVAL_MILLIS
= 6_000;
+ public static final int DEFAULT_READAHEAD_V2_THREAD_POOL_UPSCALE_PERCENTAGE
= 20;
Review Comment:
For some variable you have used Persentage and for percent, we should keep
it consistent across all places.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -17,67 +17,91 @@
*/
package org.apache.hadoop.fs.azurebfs.services;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
+
+import com.sun.management.OperatingSystemMXBean;
+
import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
-import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
-import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.classification.VisibleForTesting;
-final class ReadBufferManagerV2 extends ReadBufferManager {
+import static
org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_HUNDRED;
+
+/**
+ * The Improved Read Buffer Manager for Rest AbfsClient.
+ */
+public class ReadBufferManagerV2 extends ReadBufferManager {
+ // Internal constants
+ private static final ReentrantLock LOCK = new ReentrantLock();
// Thread Pool Configurations
private static int minThreadPoolSize;
private static int maxThreadPoolSize;
+ private static int cpuMonitoringIntervalInMilliSec;
+ private static double cpuThreshold;
+ private static int threadPoolUpscalePercentage;
+ private static int threadPoolDownscalePercentage;
private static int executorServiceKeepAliveTimeInMilliSec;
+ private static final double THREAD_POOL_REQUIREMENT_BUFFER = 1.2; // 20%
more threads than the queue size
+ private static boolean isDynamicScalingEnabled;
+
+ private ScheduledExecutorService cpuMonitorThread;
private ThreadPoolExecutor workerPool;
+ private final List<ReadBufferWorker> workerRefs = new ArrayList<>();
// Buffer Pool Configurations
private static int minBufferPoolSize;
private static int maxBufferPoolSize;
+ private static int memoryMonitoringIntervalInMilliSec;
+ private static double memoryThreshold;
+
private int numberOfActiveBuffers = 0;
private byte[][] bufferPool;
+ private Stack<Integer> removedBufferList = new Stack<>();
+ private ScheduledExecutorService memoryMonitorThread;
+ // Buffer Manager Structures
private static ReadBufferManagerV2 bufferManager;
-
- // hide instance constructor
- private ReadBufferManagerV2() {
- LOGGER.trace("Creating readbuffer manager with HADOOP-18546 patch");
- }
+ private static boolean isConfigured = false;
/**
- * Sets the read buffer manager configurations.
- * @param readAheadBlockSize the size of the read-ahead block in bytes
- * @param abfsConfiguration the AbfsConfiguration instance for other
configurations
+ * Private constructor to prevent instantiation as this needs to be
singleton.
*/
- static void setReadBufferManagerConfigs(int readAheadBlockSize,
AbfsConfiguration abfsConfiguration) {
- if (bufferManager == null) {
- minThreadPoolSize = abfsConfiguration.getMinReadAheadV2ThreadPoolSize();
- maxThreadPoolSize = abfsConfiguration.getMaxReadAheadV2ThreadPoolSize();
- executorServiceKeepAliveTimeInMilliSec =
abfsConfiguration.getReadAheadExecutorServiceTTLInMillis();
-
- minBufferPoolSize = abfsConfiguration.getMinReadAheadV2BufferPoolSize();
- maxBufferPoolSize = abfsConfiguration.getMaxReadAheadV2BufferPoolSize();
-
setThresholdAgeMilliseconds(abfsConfiguration.getReadAheadV2CachedBufferTTLMillis());
- setReadAheadBlockSize(readAheadBlockSize);
- }
+ private ReadBufferManagerV2() {
+ printTraceLog("Creating Read Buffer Manager V2 with HADOOP-18546 patch");
Review Comment:
We should use LOG.trace instead of printTraceLOG.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -106,123 +166,731 @@ void init() {
executorServiceKeepAliveTimeInMilliSec,
TimeUnit.MILLISECONDS,
new SynchronousQueue<>(),
- namedThreadFactory);
+ workerThreadFactory);
workerPool.allowCoreThreadTimeOut(true);
for (int i = 0; i < minThreadPoolSize; i++) {
- ReadBufferWorker worker = new ReadBufferWorker(i, this);
+ ReadBufferWorker worker = new ReadBufferWorker(i, getBufferManager());
+ workerRefs.add(worker);
workerPool.submit(worker);
}
ReadBufferWorker.UNLEASH_WORKERS.countDown();
+
+ if (isDynamicScalingEnabled) {
+ cpuMonitorThread = Executors.newSingleThreadScheduledExecutor(runnable
-> {
+ Thread t = new Thread(runnable, "ReadAheadV2-CPU-Monitor");
+ t.setDaemon(true);
+ return t;
+ });
+ cpuMonitorThread.scheduleAtFixedRate(this::adjustThreadPool,
+ getCpuMonitoringIntervalInMilliSec(),
getCpuMonitoringIntervalInMilliSec(),
+ TimeUnit.MILLISECONDS);
+ }
+
+ printTraceLog("ReadBufferManagerV2 initialized with {} buffers and {}
worker threads",
Review Comment:
Same as above, please change it whereever you have used it.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -17,67 +17,91 @@
*/
package org.apache.hadoop.fs.azurebfs.services;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
+
+import com.sun.management.OperatingSystemMXBean;
+
import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
-import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
-import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.classification.VisibleForTesting;
-final class ReadBufferManagerV2 extends ReadBufferManager {
+import static
org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_HUNDRED;
+
+/**
+ * The Improved Read Buffer Manager for Rest AbfsClient.
+ */
+public class ReadBufferManagerV2 extends ReadBufferManager {
+ // Internal constants
+ private static final ReentrantLock LOCK = new ReentrantLock();
// Thread Pool Configurations
private static int minThreadPoolSize;
private static int maxThreadPoolSize;
+ private static int cpuMonitoringIntervalInMilliSec;
+ private static double cpuThreshold;
+ private static int threadPoolUpscalePercentage;
+ private static int threadPoolDownscalePercentage;
private static int executorServiceKeepAliveTimeInMilliSec;
+ private static final double THREAD_POOL_REQUIREMENT_BUFFER = 1.2; // 20%
more threads than the queue size
Review Comment:
Is this configurable? or we have fixed this number based on POC data? Is so
can we explain about it little more for future understanding.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]