[GitHub] [hbase] joshelser commented on a diff in pull request #4414: HBASE-27013 Introduce read all bytes when using pread for prefetch

GitBox Mon, 09 May 2022 14:24:02 -0700


joshelser commented on code in PR #4414:
URL: https://github.com/apache/hbase/pull/4414#discussion_r868474782



##########
hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java:
##########
@@ -92,6 +107,102 @@ public void testReadFully() throws IOException {
     assertArrayEquals(Bytes.toBytes(s), heapBuf);
   }
 
+  @Test
+  public void testPreadWithReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(true);
+  }
+
+  @Test
+  public void testPreadWithoutReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(false);
+  }
+
+  private void testPreadReadFullBytesInternal(boolean readAllBytes) throws 
IOException {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    conf.setBoolean(HConstants.HFILE_PREAD_ALL_BYTES_ENABLED_KEY, 
readAllBytes);
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path path = new Path(TEST_UTIL.getDataTestDirOnTestFS(), 
testName.getMethodName());
+    Random rand = new Random();

Review Comment:
   There may be value in being able to specify a specific seed here, such that 
if you do see a test failure in a specific case, we can actually try to 
reproduce that failure.



##########
hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/BlockIOUtils.java:
##########
@@ -284,6 +311,10 @@ private static boolean preadWithExtraDirectly(ByteBuff 
buff, FSDataInputStream d
         throw e;
       }
       if (ret < 0) {
+        if (remain <= extraLen) {
+          // break for the "extra data" when hitting end of stream and 
remaining is necessary
+          break;

Review Comment:
   If I rephrase your comment: the client is asking for data which cannot 
possibly exist because the `extraLen` would read off the end of the file. The 
client told us to try to read an extra header after the current datablock but 
there is no "next header" to read because we're at the end of the file.
   
   I worry that this may mask a problem where a file is corrupt but we would 
miss the "premature EOF" IOException which should be thrown.



##########
hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java:
##########
@@ -92,6 +107,102 @@ public void testReadFully() throws IOException {
     assertArrayEquals(Bytes.toBytes(s), heapBuf);
   }
 
+  @Test
+  public void testPreadWithReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(true);
+  }
+
+  @Test
+  public void testPreadWithoutReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(false);
+  }
+
+  private void testPreadReadFullBytesInternal(boolean readAllBytes) throws 
IOException {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    conf.setBoolean(HConstants.HFILE_PREAD_ALL_BYTES_ENABLED_KEY, 
readAllBytes);
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path path = new Path(TEST_UTIL.getDataTestDirOnTestFS(), 
testName.getMethodName());
+    Random rand = new Random();
+    long totalDataBlockBytes = writeBlocks(TEST_UTIL.getConfiguration(), rand,
+      Compression.Algorithm.GZ, path);
+    readDataBlocksAndVerify(fs, path, totalDataBlockBytes);
+  }
+
+  private long writeBlocks(Configuration conf, Random rand, 
Compression.Algorithm compressAlgo,
+    Path path) throws IOException {
+    FileSystem fs = HFileSystem.get(conf);
+    FSDataOutputStream os = fs.create(path);
+    HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true)
+      
.withCompression(compressAlgo).withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();

Review Comment:
   `.withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)` is unnecessary. As 
is `withHBaseCheckSum(true)`



##########
hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java:
##########
@@ -92,6 +107,102 @@ public void testReadFully() throws IOException {
     assertArrayEquals(Bytes.toBytes(s), heapBuf);
   }
 
+  @Test
+  public void testPreadWithReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(true);
+  }
+
+  @Test
+  public void testPreadWithoutReadFullBytes() throws IOException {
+    testPreadReadFullBytesInternal(false);
+  }
+
+  private void testPreadReadFullBytesInternal(boolean readAllBytes) throws 
IOException {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    conf.setBoolean(HConstants.HFILE_PREAD_ALL_BYTES_ENABLED_KEY, 
readAllBytes);
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path path = new Path(TEST_UTIL.getDataTestDirOnTestFS(), 
testName.getMethodName());
+    Random rand = new Random();
+    long totalDataBlockBytes = writeBlocks(TEST_UTIL.getConfiguration(), rand,
+      Compression.Algorithm.GZ, path);
+    readDataBlocksAndVerify(fs, path, totalDataBlockBytes);
+  }
+
+  private long writeBlocks(Configuration conf, Random rand, 
Compression.Algorithm compressAlgo,
+    Path path) throws IOException {
+    FileSystem fs = HFileSystem.get(conf);
+    FSDataOutputStream os = fs.create(path);
+    HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true)
+      
.withCompression(compressAlgo).withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
+    HFileBlock.Writer hbw = new HFileBlock.Writer(conf, null, meta);
+    long totalDataBlockBytes = 0;
+    for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
+      int blockTypeOrdinal = rand.nextInt(BlockType.values().length);
+      if (blockTypeOrdinal == BlockType.ENCODED_DATA.ordinal()) {
+        blockTypeOrdinal = BlockType.DATA.ordinal();
+      }
+      BlockType bt = BlockType.values()[blockTypeOrdinal];
+      DataOutputStream dos = hbw.startWriting(bt);
+      int size = rand.nextInt(500);
+      for (int j = 0; j < size; ++j) {
+        dos.writeShort(i + 1);
+        dos.writeInt(j + 1);
+      }
+
+      hbw.writeHeaderAndData(os);
+      totalDataBlockBytes += hbw.getOnDiskSizeWithHeader();
+    }
+    // append a dummy trailer and in a actual HFile it should have more data.
+    FixedFileTrailer trailer = new FixedFileTrailer(3, 3);
+    trailer.setFirstDataBlockOffset(0);
+    trailer.setLastDataBlockOffset(totalDataBlockBytes);
+    trailer.setComparatorClass(meta.getCellComparator().getClass());
+    trailer.setDataIndexCount(NUM_TEST_BLOCKS);
+    trailer.setCompressionCodec(compressAlgo);
+    trailer.serialize(os);
+    // close the stream
+    os.close();
+    return totalDataBlockBytes;
+  }
+
+  private void readDataBlocksAndVerify(FileSystem fs, Path path, long 
totalDataBlockBytes)
+    throws IOException {
+    FSDataInputStream is = fs.open(path);
+    HFileContext fileContext =
+      new HFileContextBuilder().withHBaseCheckSum(true)
+        .withCompression(Compression.Algorithm.GZ).build();

Review Comment:
   `Compression.Algorithm.GZ` was passed into `writeBlocks(..)`. Could pass it 
into this method too so that you avoid having to change the constant in two 
places.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [hbase] joshelser commented on a diff in pull request #4414: HBASE-27013 Introduce read all bytes when using pread for prefetch

Reply via email to