Repository: hbase
Updated Branches:
  refs/heads/0.98 c6d98c339 -> e6bb5f4f6


HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

Amending-Author: Andrew Purtell <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e6bb5f4f
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e6bb5f4f
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e6bb5f4f

Branch: refs/heads/0.98
Commit: e6bb5f4f62b0984671c55bb4d57ea402aedaa4ac
Parents: c6d98c3
Author: stack <[email protected]>
Authored: Tue Feb 9 20:55:20 2016 -0800
Committer: Andrew Purtell <[email protected]>
Committed: Wed Feb 10 00:59:56 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/io/hfile/ChecksumUtil.java     | 16 +++++++-------
 .../hadoop/hbase/io/hfile/FixedFileTrailer.java |  9 ++++----
 .../hadoop/hbase/io/hfile/HFileBlock.java       |  9 ++++----
 .../hadoop/hbase/io/hfile/HFileReaderV2.java    | 22 ++++++++++++--------
 .../hadoop/hbase/io/hfile/TestChecksum.java     |  2 +-
 5 files changed, 31 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e6bb5f4f/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 3282213..adb5d11 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -34,11 +34,11 @@ public class ChecksumUtil {
   /** This is used to reserve space in a byte buffer */
   private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
 
-  /** 
-   * This is used by unit tests to make checksum failures throw an 
-   * exception instead of returning null. Returning a null value from 
-   * checksum validation will cause the higher layer to retry that 
-   * read with hdfs-level checksums. Instead, we would like checksum 
+  /**
+   * This is used by unit tests to make checksum failures throw an
+   * exception instead of returning null. Returning a null value from
+   * checksum validation will cause the higher layer to retry that
+   * read with hdfs-level checksums. Instead, we would like checksum
    * failures to cause the entire unit test to fail.
    */
   private static boolean generateExceptions = false;
@@ -95,7 +95,7 @@ public class ChecksumUtil {
    * The header is extracted from the specified HFileBlock while the
    * data-to-be-verified is extracted from 'data'.
    */
-  static boolean validateBlockChecksum(Path path, HFileBlock block, 
+  static boolean validateBlockChecksum(Path path, long offset, HFileBlock 
block,
     byte[] data, int hdrSize) throws IOException {
 
     // If this is an older version of the block that does not have
@@ -109,7 +109,7 @@ public class ChecksumUtil {
     }
 
     // Get a checksum object based on the type of checksum that is
-    // set in the HFileBlock header. A ChecksumType.NULL indicates that 
+    // set in the HFileBlock header. A ChecksumType.NULL indicates that
     // the caller is not interested in validating checksums, so we
     // always return true.
     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
@@ -179,7 +179,7 @@ public class ChecksumUtil {
    * @return The number of bytes needed to store the checksum values
    */
   static long numBytes(long datasize, int bytesPerChecksum) {
-    return numChunks(datasize, bytesPerChecksum) * 
+    return numChunks(datasize, bytesPerChecksum) *
                      HFileBlock.CHECKSUM_SIZE;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/e6bb5f4f/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
index 56510f0..6735036 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@@ -41,8 +41,7 @@ import org.apache.hadoop.hbase.util.Bytes;
  * trailer size is fixed within a given {@link HFile} format version only, but
  * we always store the version number as the last four-byte integer of the 
file.
  * The version number itself is split into two portions, a major 
- * version and a minor version. 
- * The last three bytes of a file is the major
+ * version and a minor version. The last three bytes of a file are the major
  * version and a single preceding byte is the minor number. The major version
  * determines which readers/writers to use to read/write a hfile while a minor
  * version determines smaller changes in hfile format that do not need a new
@@ -50,7 +49,6 @@ import org.apache.hadoop.hbase.util.Bytes;
  */
 @InterfaceAudience.Private
 public class FixedFileTrailer {
-
   /**
    * We store the comparator class name as a fixed-length field in the trailer.
    */
@@ -65,8 +63,9 @@ public class FixedFileTrailer {
   /**
    * In version 1, the offset to the data block index. Starting from version 2,
    * the meaning of this field is the offset to the section of the file that
-   * should be loaded at the time the file is being opened, and as of the time
-   * of writing, this happens to be the offset of the file info section.
+   * should be loaded at the time the file is being opened: i.e. on open we 
load
+   * the root index, file info, etc. See 
http://hbase.apache.org/book.html#_hfile_format_2
+   * in the reference guide.
    */
   private long loadOnOpenDataOffset;
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/e6bb5f4f/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 704cbbb..46e2ba1 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -1628,7 +1628,7 @@ public class HFileBlock implements Cacheable {
         b.assumeUncompressed();
       }
 
-      if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+      if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, 
hdrSize)) {
         return null;             // checksum mismatch
       }
 
@@ -1678,9 +1678,10 @@ public class HFileBlock implements Cacheable {
      * If there is a checksum mismatch, then return false. Otherwise
      * return true.
      */
-    protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, 
int hdrSize)
-        throws IOException {
-      return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
+    protected boolean validateBlockChecksum(HFileBlock block, long offset, 
byte[] data,
+        int hdrSize)
+    throws IOException {
+      return ChecksumUtil.validateBlockChecksum(path, offset, block, data, 
hdrSize);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/e6bb5f4f/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
index 9fb156a..dc9af99 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
@@ -183,10 +183,14 @@ public class HFileReaderV2 extends AbstractHFileReader {
     if (cacheConf.shouldPrefetchOnOpen()) {
       PrefetchExecutor.request(path, new Runnable() {
         public void run() {
+          long offset = 0;
+          long end = 0;
           try {
-            long offset = 0;
-            long end = fileSize - getTrailer().getTrailerSize();
+            end = getTrailer().getLoadOnOpenDataOffset();
             HFileBlock prevBlock = null;
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", 
end=" + end);
+            }
             while (offset < end) {
               if (Thread.interrupted()) {
                 break;
@@ -202,11 +206,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
           } catch (IOException e) {
             // IOExceptions are probably due to region closes (relocation, 
etc.)
             if (LOG.isTraceEnabled()) {
-              LOG.trace("Exception encountered while prefetching " + path + 
":", e);
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", 
end=" + end, e);
             }
           } catch (Exception e) {
             // Other exceptions are interesting
-            LOG.warn("Exception encountered while prefetching " + path + ":", 
e);
+            LOG.warn("File=" + path.toString() + ", offset=" + offset + ", 
end=" + end, e);
           } finally {
             PrefetchExecutor.complete(path);
           }
@@ -328,11 +332,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
     if (dataBlockIndexReader == null) {
       throw new IOException("Block index not loaded");
     }
-    if (dataBlockOffset < 0
-        || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
-      throw new IOException("Requested block is out of range: "
-          + dataBlockOffset + ", lastDataBlockOffset: "
-          + trailer.getLastDataBlockOffset());
+    long trailerOffset = trailer.getLoadOnOpenDataOffset();
+    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
+      throw new IOException("Requested block is out of range: " + 
dataBlockOffset +
+        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
+        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
     }
     // For any given block from any given file, synchronize reads for said
     // block.

http://git-wip-us.apache.org/repos/asf/hbase/blob/e6bb5f4f/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index d81b16d..c36a33e 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -291,7 +291,7 @@ public class TestChecksum {
     }
 
     @Override
-    protected boolean validateBlockChecksum(HFileBlock block, 
+    protected boolean validateBlockChecksum(HFileBlock block, long offset,
       byte[] data, int hdrSize) throws IOException {
       return false;  // checksum validation failure
     }

Reply via email to