[GitHub] [hadoop] snvijaya commented on a change in pull request #2464: HADOOP-17347. ABFS: Read optimizations

GitBox Mon, 14 Dec 2020 23:31:05 -0800


snvijaya commented on a change in pull request #2464:
URL: https://github.com/apache/hadoop/pull/2464#discussion_r543108306




##########
File path: 
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java
##########
@@ -206,11 +229,120 @@ private int readOneBlock(final byte[] b, final int off, 
final int len) throws IO
       fCursor += bytesRead;
       fCursorAfterLastRead = fCursor;
     }
+    return copyToUserBuffer(b, off, len);
+  }
+
+  private int readFileCompletely(final byte[] b, final int off, final int len)
+      throws IOException {
+    if (len == 0) {
+      return 0;
+    }
+    if (!validate(b, off, len)) {
+      return -1;
+    }
+    savePointerState();
+
+    buffer = new byte[bufferSize];
+    // data need to be copied to user buffer from index bCursor, bCursor has
+    // to be the current fCusor
+    bCursor = (int) fCursor;
+    return optimisedRead(b, off, len, 0, contentLength);
+  }
+
+  private int readLastBlock(final byte[] b, final int off, final int len)
+      throws IOException {
+    if (len == 0) {
+      return 0;
+    }
+    if (!validate(b, off, len)) {
+      return -1;
+    }
+    savePointerState();
+
+    buffer = new byte[bufferSize];
+    // data need to be copied to user buffer from index bCursor,
+    // AbfsInutStream buffer is going p contain dta from footer start. In
+    // that case bCursor will be set to footerStart - fCursor
+    long lastBlockStart = max(0, contentLength - bufferSize);
+    bCursor = (int) (fCursor - lastBlockStart);
+    // read API call is considered 1 single operation in reality server could
+    // return partial data and client has to retry untill the last full block
+    // is read. So setting the fCursorAfterLastRead before the possible
+    // multiple server calls
+    fCursorAfterLastRead = fCursor;
+    // 0 if contentlength is < buffersize
+    long actualLenToRead = min(bufferSize, contentLength);
+    return optimisedRead(b, off, len, lastBlockStart, actualLenToRead);
+  }
+
+  private int optimisedRead(final byte[] b, final int off, final int len,
+      final long readFrom, final long actualLen) throws IOException {
+    int totalBytesRead = 0;
+    fCursor = readFrom;
+    for (int i = 0; i < 2 && fCursor < contentLength; i++) {

Review comment:
       2 => field name 
   

##########
File path: 
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java
##########
@@ -206,11 +229,120 @@ private int readOneBlock(final byte[] b, final int off, 
final int len) throws IO
       fCursor += bytesRead;
       fCursorAfterLastRead = fCursor;
     }
+    return copyToUserBuffer(b, off, len);
+  }
+
+  private int readFileCompletely(final byte[] b, final int off, final int len)
+      throws IOException {
+    if (len == 0) {
+      return 0;
+    }
+    if (!validate(b, off, len)) {
+      return -1;
+    }
+    savePointerState();
+
+    buffer = new byte[bufferSize];
+    // data need to be copied to user buffer from index bCursor, bCursor has
+    // to be the current fCusor
+    bCursor = (int) fCursor;
+    return optimisedRead(b, off, len, 0, contentLength);
+  }
+
+  private int readLastBlock(final byte[] b, final int off, final int len)
+      throws IOException {
+    if (len == 0) {
+      return 0;
+    }
+    if (!validate(b, off, len)) {
+      return -1;
+    }
+    savePointerState();
+
+    buffer = new byte[bufferSize];
+    // data need to be copied to user buffer from index bCursor,
+    // AbfsInutStream buffer is going p contain dta from footer start. In
+    // that case bCursor will be set to footerStart - fCursor
+    long lastBlockStart = max(0, contentLength - bufferSize);
+    bCursor = (int) (fCursor - lastBlockStart);
+    // read API call is considered 1 single operation in reality server could
+    // return partial data and client has to retry untill the last full block
+    // is read. So setting the fCursorAfterLastRead before the possible
+    // multiple server calls
+    fCursorAfterLastRead = fCursor;
+    // 0 if contentlength is < buffersize
+    long actualLenToRead = min(bufferSize, contentLength);
+    return optimisedRead(b, off, len, lastBlockStart, actualLenToRead);
+  }
+
+  private int optimisedRead(final byte[] b, final int off, final int len,
+      final long readFrom, final long actualLen) throws IOException {
+    int totalBytesRead = 0;
+    fCursor = readFrom;
+    for (int i = 0; i < 2 && fCursor < contentLength; i++) {
+      int bytesRead = readInternal(fCursor, buffer, limit,
+          (int) actualLen - limit, true);
+      if (bytesRead > 0) {
+        totalBytesRead += bytesRead;
+        limit += bytesRead;
+        fCursor += bytesRead;
+      }
+    }
+    //  if the read was not success and the user requested part of data has

Review comment:
       re-write the comment




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [hadoop] snvijaya commented on a change in pull request #2464: HADOOP-17347. ABFS: Read optimizations

Reply via email to