This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch HDDS-3816-ec
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-3816-ec by this push:
     new c660483  HDDS-5952. EC: ECBlockReconstructedStripeInputStream should 
read from blocks in parallel (#2899)
c660483 is described below

commit c6604833d1833b4ee796f1fbe9eb13b955857470
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Wed Dec 15 18:22:07 2021 +0000

    HDDS-5952. EC: ECBlockReconstructedStripeInputStream should read from 
blocks in parallel (#2899)
---
 .../ozone/client/io/ECBlockInputStreamProxy.java   |  17 ++++
 .../io/ECBlockReconstructedStripeInputStream.java  | 108 ++++++++++++++++-----
 .../TestECBlockReconstructedStripeInputStream.java |   7 +-
 3 files changed, 103 insertions(+), 29 deletions(-)

diff --git 
a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java
 
b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java
index 2ed173c..2f7680e 100644
--- 
a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java
+++ 
b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java
@@ -57,6 +57,7 @@ public class ECBlockInputStreamProxy extends 
BlockExtendedInputStream {
   private BlockExtendedInputStream blockReader;
   private boolean reconstructionReader = false;
   private List<DatanodeDetails> failedLocations = new ArrayList<>();
+  private boolean closed = false;
 
   /**
    * Given the ECReplicationConfig and the block length, calculate how many
@@ -142,6 +143,7 @@ public class ECBlockInputStreamProxy extends 
BlockExtendedInputStream {
 
   @Override
   public synchronized int read(ByteBuffer buf) throws IOException {
+    ensureNotClosed();
     if (blockReader.getRemaining() == 0) {
       return EOF;
     }
@@ -208,6 +210,7 @@ public class ECBlockInputStreamProxy extends 
BlockExtendedInputStream {
 
   @Override
   public synchronized void seek(long pos) throws IOException {
+    ensureNotClosed();
     try {
       blockReader.seek(pos);
     } catch (IOException e) {
@@ -217,4 +220,18 @@ public class ECBlockInputStreamProxy extends 
BlockExtendedInputStream {
       failoverToReconstructionRead(null, pos);
     }
   }
+
+  @Override
+  public void close() throws IOException {
+    if (blockReader != null) {
+      blockReader.close();
+    }
+    closed = true;
+  }
+
+  private void ensureNotClosed() throws IOException {
+    if (closed) {
+      throw new IOException("The stream is closed");
+    }
+  }
 }
diff --git 
a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedStripeInputStream.java
 
b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedStripeInputStream.java
index a918eb4..46dd3e1 100644
--- 
a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedStripeInputStream.java
+++ 
b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedStripeInputStream.java
@@ -17,7 +17,9 @@
  */
 package org.apache.hadoop.ozone.client.io;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.commons.lang3.NotImplementedException;
+import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.client.ECReplicationConfig;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -34,13 +36,20 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Queue;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadFactory;
 import java.util.function.Function;
 
 /**
@@ -104,6 +113,8 @@ public class ECBlockReconstructedStripeInputStream extends 
ECBlockInputStream {
 
   private boolean initialized = false;
 
+  private ExecutorService executor;
+
   public ECBlockReconstructedStripeInputStream(ECReplicationConfig repConfig,
       OmKeyLocationInfo blockInfo, boolean verifyChecksum,
        XceiverClientFactory xceiverClientFactory, Function<BlockID,
@@ -118,6 +129,10 @@ public class ECBlockReconstructedStripeInputStream extends 
ECBlockInputStream {
     // The EC decoder needs an array data+parity long, with missing or not
     // needed indexes set to null.
     decoderInputBuffers = new ByteBuffer[getRepConfig().getRequiredNodes()];
+
+    ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat(
+        "ec-reader-for-" + blockInfo.getBlockID() + "-TID-%d").build();
+    executor = Executors.newFixedThreadPool(repConfig.getData(), 
threadFactory);
   }
 
   /**
@@ -271,6 +286,9 @@ public class ECBlockReconstructedStripeInputStream extends 
ECBlockInputStream {
         for (ByteBuffer b : bufs) {
           b.position(0);
         }
+      } catch (InterruptedException ie) {
+        Thread.currentThread().interrupt();
+        throw new IOException("Interrupted waiting for reads to complete", ie);
       }
     }
     if (missingIndexes.length > 0) {
@@ -438,33 +456,69 @@ public class ECBlockReconstructedStripeInputStream 
extends ECBlockInputStream {
     }
   }
 
-  protected void loadDataBuffersFromStream() throws IOException {
+  protected void loadDataBuffersFromStream()
+      throws IOException, InterruptedException {
+    Queue<ImmutablePair<Integer, Future<Void>>> pendingReads
+        = new ArrayDeque<>();
     for (int i : dataIndexes) {
+      pendingReads.add(new ImmutablePair<>(i, executor.submit(() -> {
+        readIntoBuffer(i, decoderInputBuffers[i]);
+        return null;
+      })));
+    }
+    boolean exceptionOccurred = false;
+    while(!pendingReads.isEmpty()) {
+      int index = -1;
       try {
-        BlockExtendedInputStream stream = getOrOpenStream(i);
-        seekStreamIfNecessary(stream, 0);
-        ByteBuffer b = decoderInputBuffers[i];
-        while (b.hasRemaining()) {
-          int read = stream.read(b);
-          if (read == EOF) {
-            // We should not reach EOF, as the block should have enough data to
-            // fill the buffer. If the block does not, then it indicates the
-            // block is not as long as it should be, based on the block length
-            // stored in OM. Therefore if there is any remaining space in the
-            // buffer, we should throw an exception.
-            if (b.hasRemaining()) {
-              throw new IOException("Expected to read " + b.remaining() +
-                  " bytes from block " + getBlockID() + " EC index " + (i + 1) 
+
-                  " but reached EOF");
-            }
-            break;
-          }
-        }
-      } catch (IOException e) {
+        ImmutablePair<Integer, Future<Void>> pair = pendingReads.poll();
+        index = pair.getKey();
+        // Should this future.get() have a timeout? At the end of the call 
chain
+        // we eventually call a grpc or ratis client to read the block data. 
Its
+        // the call to the DNs which could potentially block. There is a 
timeout
+        // on that call controlled by:
+        //     OZONE_CLIENT_READ_TIMEOUT = "ozone.client.read.timeout";
+        // Which defaults to 30s. So if there is a DN communication problem, it
+        // should timeout in the client which should propagate up the stack as
+        // an IOException.
+        pair.getValue().get();
+      } catch (ExecutionException ee) {
         LOG.warn("Failed to read from block {} EC index {}. Excluding the " +
-                "block", getBlockID(), i + 1, e);
-        failedDataIndexes.add(i);
-        throw e;
+            "block", getBlockID(), index + 1, ee.getCause());
+        failedDataIndexes.add(index);
+        exceptionOccurred = true;
+      } catch (InterruptedException ie) {
+        // Catch each InterruptedException to ensure all the futures have been
+        // handled, and then throw the exception later
+        LOG.debug("Interrupted while waiting for reads to complete", ie);
+        Thread.currentThread().interrupt();
+      }
+    }
+    if (Thread.currentThread().isInterrupted()) {
+      throw new InterruptedException(
+          "Interrupted while waiting for reads to complete");
+    }
+    if (exceptionOccurred) {
+      throw new IOException("One or more errors occurred reading blocks");
+    }
+  }
+
+  private void readIntoBuffer(int ind, ByteBuffer buf) throws IOException {
+    BlockExtendedInputStream stream = getOrOpenStream(ind);
+    seekStreamIfNecessary(stream, 0);
+    while (buf.hasRemaining()) {
+      int read = stream.read(buf);
+      if (read == EOF) {
+        // We should not reach EOF, as the block should have enough data to
+        // fill the buffer. If the block does not, then it indicates the
+        // block is not as long as it should be, based on the block length
+        // stored in OM. Therefore if there is any remaining space in the
+        // buffer, we should throw an exception.
+        if (buf.hasRemaining()) {
+          throw new IOException("Expected to read " + buf.remaining() +
+              " bytes from block " + getBlockID() + " EC index " + (ind + 1) +
+              " but reached EOF");
+        }
+        break;
       }
     }
   }
@@ -521,6 +575,12 @@ public class ECBlockReconstructedStripeInputStream extends 
ECBlockInputStream {
   }
 
   @Override
+  public synchronized void close() {
+    super.close();
+    executor.shutdownNow();
+  }
+
+  @Override
   public synchronized void seek(long pos) throws IOException {
     if (pos % getStripeSize() != 0) {
       // As this reader can only return full stripes, we only seek to the start
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestECBlockReconstructedStripeInputStream.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestECBlockReconstructedStripeInputStream.java
index 0dda350..589ceac 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestECBlockReconstructedStripeInputStream.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestECBlockReconstructedStripeInputStream.java
@@ -33,7 +33,6 @@ import org.junit.Test;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SplittableRandom;
@@ -74,11 +73,9 @@ public class TestECBlockReconstructedStripeInputStream {
         keyInfo, true, null, null, new TestBlockInputStreamFactory())) {
       Assert.assertTrue(ecb.hasSufficientLocations());
     }
-
-    Map<DatanodeDetails, Integer> dnMap = new HashMap<>();
-
     // Two Chunks, but missing data block 2.
-    dnMap = ECStreamTestUtil.createIndexMap(1, 4, 5);
+    Map<DatanodeDetails, Integer> dnMap
+        = ECStreamTestUtil.createIndexMap(1, 4, 5);
     keyInfo = ECStreamTestUtil.createKeyInfo(repConfig, ONEMB * 2, dnMap);
     try (ECBlockInputStream ecb =
         new ECBlockReconstructedStripeInputStream(repConfig,

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to