Re: [PR] [HUDI-7170][WIP] Implement HFile reader independent of HBase [hudi]

via GitHub Mon, 11 Dec 2023 14:24:27 -0800


vinothchandar commented on code in PR #10241:
URL: https://github.com/apache/hudi/pull/10241#discussion_r1422901356



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java:
##########
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.compress.CompressionCodec;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
+import static org.apache.hudi.io.util.IOUtils.readInt;
+
+/**
+ * Represents a block in a HFile. The types of blocks are defined in {@link 
HFileBlockType}.
+ */
+public abstract class HFileBlock {
+  // The HFile block header size without checksum
+  public static final int HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM =
+      MAGIC_LENGTH + 2 * SIZEOF_INT32 + SIZEOF_INT64;
+  // The HFile block header size with checksum
+  // There is a 1 byte checksum type, followed by a 4 byte bytesPerChecksum
+  // followed by another 4 byte value to store sizeofDataOnDisk.
+  public static final int HFILEBLOCK_HEADER_SIZE =
+      HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + SIZEOF_BYTE + 2 * SIZEOF_INT32;
+
+  /**
+   * Each checksum value is an integer that can be stored in 4 bytes.
+   */
+  static final int CHECKSUM_SIZE = SIZEOF_INT32;
+  static final int BYTES_PER_CHECKSUM = 16384;
+
+  static class Header {
+    // Format of header is:
+    // 8 bytes - block magic
+    // 4 bytes int - onDiskSizeWithoutHeader
+    // 4 bytes int - uncompressedSizeWithoutHeader
+    // 8 bytes long - prevBlockOffset
+    // The following 3 are only present if header contains checksum information
+    // 1 byte - checksum type
+    // 4 byte int - bytes per checksum
+    // 4 byte int - onDiskDataSizeWithHeader
+    static int BLOCK_MAGIC_INDEX = 0;
+    static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
+    static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
+    static int PREV_BLOCK_OFFSET_INDEX = 16;
+    static int CHECKSUM_TYPE_INDEX = 24;
+    static int BYTES_PER_CHECKSUM_INDEX = 25;
+    static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
+  }
+
+  protected final HFileContext context;
+  protected final byte[] byteBuff;
+  protected final int startOffsetInBuff;
+  private final HFileBlockType blockType;
+  protected int onDiskSizeWithoutHeader;
+  protected int uncompressedSizeWithoutHeader;
+
+  protected HFileBlock(HFileContext context,
+                       HFileBlockType blockType,
+                       byte[] byteBuff,
+                       int startOffsetInBuff) {
+    this.context = context;
+    this.byteBuff = byteBuff;
+    this.startOffsetInBuff = startOffsetInBuff;
+    this.blockType = blockType;
+    this.onDiskSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+    this.uncompressedSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
+  }
+
+  /**
+   * Parses the HFile block header and returns the {@link HFileBlock} instance 
based on the input.
+   *
+   * @param context           HFile context.
+   * @param byteBuff          Input data.
+   * @param startOffsetInBuff Offset to start parsing.
+   * @return The {@link HFileBlock} instance based on the input.
+   * @throws IOException if the block cannot be parsed.
+   */
+  public static HFileBlock parse(HFileContext context, byte[] byteBuff, int 
startOffsetInBuff)
+      throws IOException {
+    HFileBlockType blockType = HFileBlockType.parse(byteBuff, 
startOffsetInBuff);
+    switch (blockType) {
+      case ROOT_INDEX:
+        return new HFileRootIndexBlock(context, byteBuff, startOffsetInBuff);
+      case FILE_INFO:
+        return new HFileFileInfoBlock(context, byteBuff, startOffsetInBuff);
+      case DATA:
+        return new HFileDataBlock(context, byteBuff, startOffsetInBuff);
+      default:
+        throw new IOException(
+            "Parsing of the HFile block type " + blockType + " is not 
supported");
+    }
+  }
+
+  /**
+   * Returns the number of bytes needed to store the checksums for
+   * a specified data size.
+   *
+   * @param numBytes number of bytes of data.
+   * @return The number of bytes needed to store the checksum values.
+   */
+  static int numBytes(long numBytes) {
+    return numChunks(numBytes, BYTES_PER_CHECKSUM) * HFileBlock.CHECKSUM_SIZE;
+  }
+
+  /**
+   * Returns the number of checksum chunks needed to store the checksums for
+   * a specified data size.
+   *
+   * @param numBytes         number of bytes of data
+   * @param bytesPerChecksum number of bytes in a checksum chunk
+   * @return The number of checksum chunks
+   */
+  static int numChunks(long numBytes, int bytesPerChecksum) {
+    long numChunks = numBytes / bytesPerChecksum;
+    if (numBytes % bytesPerChecksum != 0) {
+      numChunks++;
+    }
+    if (numChunks > Integer.MAX_VALUE / HFileBlock.CHECKSUM_SIZE) {
+      throw new IllegalArgumentException("The number of chunks is too large: " 
+ numChunks);
+    }
+    return (int) numChunks;
+  }
+
+  /**
+   * Allocates a new byte buffer for uncompressed data and returns a new 
{@link HFileBlock}
+   * instance for the decompressed content.
+   *
+   * @return
+   */
+  public abstract HFileBlock cloneForUnpack();
+
+  public HFileBlockType getBlockType() {
+    return blockType;
+  }
+
+  public byte[] getByteBuff() {
+    return byteBuff;
+  }
+
+  public int getOnDiskSizeWithHeader() {
+    return onDiskSizeWithoutHeader + HFILEBLOCK_HEADER_SIZE;
+  }
+
+  /**
+   * Decodes and decompresses the block content if the block content is 
compressed.
+   *
+   * @return {@link HFileBlock} instance
+   * @throws IOException upon decoding and decompression error.
+   */
+  public HFileBlock unpack() throws IOException {
+    // Should only be called for compressed blocks
+    CompressionCodec compression = context.getCompressionCodec();
+    if (compression != CompressionCodec.NONE) {
+      HFileBlock unpacked = this.cloneForUnpack();
+      try (InputStream byteBuffInputStream = new ByteArrayInputStream(
+          byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, 
onDiskSizeWithoutHeader)) {
+        context.getDecompressor().decompress(
+            byteBuffInputStream,
+            unpacked.getByteBuff(),
+            HFILEBLOCK_HEADER_SIZE,
+            unpacked.getByteBuff().length - HFILEBLOCK_HEADER_SIZE);
+      }
+      return unpacked;
+    }
+    return this;
+  }
+
+  /**
+   * Allocates new byte buffer for the uncompressed bytes.
+   *
+   * @return A new byte array based on the size of uncompressed data, holding 
the same header
+   * bytes.
+   */
+  protected byte[] allocateBuffer() {
+    int checksumSize = numBytes(getOnDiskSizeWithHeader());
+    int headerSize = HFILEBLOCK_HEADER_SIZE;
+    int capacity = headerSize + uncompressedSizeWithoutHeader + checksumSize;

Review Comment:
   so - we allocate for the worst case that this is uncompressed?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java:
##########
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.compress.CompressionCodec;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
+import static org.apache.hudi.io.util.IOUtils.readInt;
+
+/**
+ * Represents a block in a HFile. The types of blocks are defined in {@link 
HFileBlockType}.
+ */
+public abstract class HFileBlock {
+  // The HFile block header size without checksum
+  public static final int HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM =
+      MAGIC_LENGTH + 2 * SIZEOF_INT32 + SIZEOF_INT64;
+  // The HFile block header size with checksum
+  // There is a 1 byte checksum type, followed by a 4 byte bytesPerChecksum
+  // followed by another 4 byte value to store sizeofDataOnDisk.
+  public static final int HFILEBLOCK_HEADER_SIZE =
+      HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + SIZEOF_BYTE + 2 * SIZEOF_INT32;
+
+  /**
+   * Each checksum value is an integer that can be stored in 4 bytes.
+   */
+  static final int CHECKSUM_SIZE = SIZEOF_INT32;
+  static final int BYTES_PER_CHECKSUM = 16384;
+
+  static class Header {
+    // Format of header is:
+    // 8 bytes - block magic
+    // 4 bytes int - onDiskSizeWithoutHeader
+    // 4 bytes int - uncompressedSizeWithoutHeader
+    // 8 bytes long - prevBlockOffset
+    // The following 3 are only present if header contains checksum information
+    // 1 byte - checksum type
+    // 4 byte int - bytes per checksum
+    // 4 byte int - onDiskDataSizeWithHeader
+    static int BLOCK_MAGIC_INDEX = 0;
+    static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
+    static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
+    static int PREV_BLOCK_OFFSET_INDEX = 16;
+    static int CHECKSUM_TYPE_INDEX = 24;
+    static int BYTES_PER_CHECKSUM_INDEX = 25;
+    static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
+  }
+
+  protected final HFileContext context;
+  protected final byte[] byteBuff;
+  protected final int startOffsetInBuff;
+  private final HFileBlockType blockType;
+  protected int onDiskSizeWithoutHeader;
+  protected int uncompressedSizeWithoutHeader;
+
+  protected HFileBlock(HFileContext context,
+                       HFileBlockType blockType,
+                       byte[] byteBuff,
+                       int startOffsetInBuff) {
+    this.context = context;
+    this.byteBuff = byteBuff;
+    this.startOffsetInBuff = startOffsetInBuff;
+    this.blockType = blockType;
+    this.onDiskSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+    this.uncompressedSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
+  }
+
+  /**
+   * Parses the HFile block header and returns the {@link HFileBlock} instance 
based on the input.
+   *
+   * @param context           HFile context.
+   * @param byteBuff          Input data.
+   * @param startOffsetInBuff Offset to start parsing.
+   * @return The {@link HFileBlock} instance based on the input.
+   * @throws IOException if the block cannot be parsed.
+   */
+  public static HFileBlock parse(HFileContext context, byte[] byteBuff, int 
startOffsetInBuff)
+      throws IOException {
+    HFileBlockType blockType = HFileBlockType.parse(byteBuff, 
startOffsetInBuff);
+    switch (blockType) {
+      case ROOT_INDEX:
+        return new HFileRootIndexBlock(context, byteBuff, startOffsetInBuff);
+      case FILE_INFO:
+        return new HFileFileInfoBlock(context, byteBuff, startOffsetInBuff);
+      case DATA:
+        return new HFileDataBlock(context, byteBuff, startOffsetInBuff);
+      default:
+        throw new IOException(
+            "Parsing of the HFile block type " + blockType + " is not 
supported");
+    }
+  }
+
+  /**
+   * Returns the number of bytes needed to store the checksums for
+   * a specified data size.
+   *
+   * @param numBytes number of bytes of data.
+   * @return The number of bytes needed to store the checksum values.
+   */
+  static int numBytes(long numBytes) {

Review Comment:
   rename: numChecksumBytes



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+/**
+ * A reader to read one or more HFile blocks based on the start and end 
offsets.
+ */
+public class HFileBlockReader {
+  private final HFileContext context;
+  private final byte[] byteBuff;
+  private int offset;
+
+  /**
+   * Instantiates the {@link HFileBlockReader}.
+   *
+   * @param context     HFile context.
+   * @param stream      Input data.
+   * @param startOffset Start offset to read from.
+   * @param endOffset   End offset to stop at.
+   * @throws IOException
+   */
+  public HFileBlockReader(HFileContext context,

Review Comment:
   does this class read just a single block? 



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+/**
+ * A reader to read one or more HFile blocks based on the start and end 
offsets.
+ */
+public class HFileBlockReader {
+  private final HFileContext context;
+  private final byte[] byteBuff;
+  private int offset;
+
+  /**
+   * Instantiates the {@link HFileBlockReader}.
+   *
+   * @param context     HFile context.
+   * @param stream      Input data.
+   * @param startOffset Start offset to read from.
+   * @param endOffset   End offset to stop at.
+   * @throws IOException
+   */
+  public HFileBlockReader(HFileContext context,
+                          FSDataInputStream stream,
+                          long startOffset,
+                          long endOffset) throws IOException {
+    this.context = context;
+    this.offset = 0;
+    stream.seek(startOffset);
+    long length = endOffset - startOffset;
+    if (length >= 0 && length <= Integer.MAX_VALUE) {
+      this.byteBuff = new byte[(int) length];
+    } else {
+      throw new IllegalArgumentException(
+          "The range of bytes is too large or invalid: ["
+              + startOffset + ", " + endOffset + "], length=" + length);
+    }
+    stream.readFully(byteBuff);

Review Comment:
   again, if this is reading more than 1 block. Can you move all reading logic 
to the nextBlock() method?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.util.IOUtils;
+
+import java.util.Optional;
+
+import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
+
+/**
+ * Represents a {@link HFileBlockType#DATA} block in the "Scanned block" 
section.
+ */
+public class HFileDataBlock extends HFileBlock {
+  protected HFileDataBlock(HFileContext context,
+                           byte[] byteBuff,
+                           int startOffsetInBuff) {
+    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   */
+  public Optional<KeyValue> seekTo(Key key) {
+    int offset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE;
+    int endOffset = offset + onDiskSizeWithoutHeader;
+    // TODO: check last 4 bytes in the data block
+    while (offset + HFILEBLOCK_HEADER_SIZE < endOffset) {
+      // Full length is not known yet until parsing
+      KeyValue kv = new KeyValue(byteBuff, offset, -1);

Review Comment:
   pull `-1` into a final static variable? 



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.util.IOUtils;
+
+import java.util.Optional;
+
+import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
+
+/**
+ * Represents a {@link HFileBlockType#DATA} block in the "Scanned block" 
section.
+ */
+public class HFileDataBlock extends HFileBlock {
+  protected HFileDataBlock(HFileContext context,
+                           byte[] byteBuff,
+                           int startOffsetInBuff) {
+    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   */
+  public Optional<KeyValue> seekTo(Key key) {
+    int offset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE;
+    int endOffset = offset + onDiskSizeWithoutHeader;
+    // TODO: check last 4 bytes in the data block
+    while (offset + HFILEBLOCK_HEADER_SIZE < endOffset) {
+      // Full length is not known yet until parsing
+      KeyValue kv = new KeyValue(byteBuff, offset, -1);

Review Comment:
   since you hold the buffer in memory any way. instead of iterating over keys 
for each seekTo() time, can you use a MultiMap structure to fetch the keys out? 
Doing this comparison for lots of keys - this linear search cost will add up.



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A reader reading a HFile.
+ */
+public class HFileReader {
+  private final FSDataInputStream stream;
+  private final long fileSize;
+  private boolean isMetadataInitialized = false;
+  private HFileContext context;
+  private List<BlockIndexEntry> blockIndexEntryList;
+  private HFileBlock metaIndexBlock;
+  private HFileBlock fileInfoBlock;
+
+  public HFileReader(FSDataInputStream stream, long fileSize) {
+    this.stream = stream;
+    this.fileSize = fileSize;
+  }
+
+  /**
+   * Initializes the metadata by reading the "Load-on-open" section.
+   *
+   * @throws IOException upon error.
+   */
+  public void initializeMetadata() throws IOException {
+    assert !this.isMetadataInitialized;
+
+    // Read Trailer (serialized in Proto)
+    HFileTrailer trailer = readTrailer(stream, fileSize);
+    this.context = HFileContext.builder()
+        .compressAlgo(trailer.getCompressionCodec())
+        .build();
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, trailer.getLoadOnOpenDataOffset(), fileSize - 
HFileTrailer.getTrailerSize());
+    HFileRootIndexBlock dataIndexBlock =
+        (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.blockIndexEntryList = 
dataIndexBlock.readDataIndex(trailer.getDataIndexCount());
+    this.metaIndexBlock = blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.fileInfoBlock = blockReader.nextBlock(HFileBlockType.FILE_INFO);
+
+    this.isMetadataInitialized = true;
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   * @throws IOException upon error.
+   */
+  public Optional<KeyValue> seekTo(Key key) throws IOException {
+    BlockIndexEntry lookUpKey = new BlockIndexEntry(key, -1, -1);
+    int rootLevelBlockIndex = searchBlockByKey(lookUpKey);
+    if (rootLevelBlockIndex < 0) {
+      // Key smaller than the start key of the first block
+      return Optional.empty();
+    }
+    BlockIndexEntry blockToRead = blockIndexEntryList.get(rootLevelBlockIndex);
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, blockToRead.getOffset(), blockToRead.getOffset() + 
(long) blockToRead.getSize());
+    HFileDataBlock dataBlock = (HFileDataBlock) 
blockReader.nextBlock(HFileBlockType.DATA);
+    return seekToKeyInBlock(dataBlock, key);
+  }
+
+  /**
+   * Reads the HFile major version from the input.
+   *
+   * @param bytes  Input data.
+   * @param offset Offset to start reading.
+   * @return Major version of the file.
+   */
+  public static int readMajorVersion(byte[] bytes, int offset) {
+    int ch1 = bytes[offset] & 0xFF;
+    int ch2 = bytes[offset + 1] & 0xFF;
+    int ch3 = bytes[offset + 2] & 0xFF;
+    return ((ch1 << 16) + (ch2 << 8) + ch3);
+  }
+
+  /**
+   * Reads and parses the HFile trailer.
+   *
+   * @param stream   HFile input.
+   * @param fileSize HFile size.
+   * @return {@link HFileTrailer} instance.
+   * @throws IOException upon error.
+   */
+  private static HFileTrailer readTrailer(FSDataInputStream stream,
+                                          long fileSize) throws IOException {
+    int bufferSize = HFileTrailer.getTrailerSize();
+    long seekPos = fileSize - bufferSize;
+    if (seekPos < 0) {
+      // It is hard to imagine such a small HFile.
+      seekPos = 0;
+      bufferSize = (int) fileSize;
+    }
+    stream.seek(seekPos);
+
+    byte[] byteBuff = new byte[bufferSize];
+    stream.readFully(byteBuff);
+
+    int majorVersion = readMajorVersion(byteBuff, bufferSize - 3);
+    int minorVersion = byteBuff[bufferSize - 3];
+
+    HFileTrailer trailer = new HFileTrailer(majorVersion, minorVersion);
+    trailer.deserialize(new DataInputStream(new 
ByteArrayInputStream(byteBuff)));
+    return trailer;
+  }
+
+  /**
+   * Searches the block that may contain the lookup key based the starting keys
+   * of all blocks (sorted in the input list), using binary search.
+   *
+   * @param lookUpKey The key to lookup.
+   * @return Block index in the input. An index outside the range of input 
means the key does not
+   * exist in the HFile.
+   */
+  private int searchBlockByKey(BlockIndexEntry lookUpKey) {
+    int pos = Collections.binarySearch(blockIndexEntryList, lookUpKey);

Review Comment:
   can we use a TreeMap and write this without all the index manipulation? 
(thinking about maintainability)



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A reader reading a HFile.
+ */
+public class HFileReader {
+  private final FSDataInputStream stream;
+  private final long fileSize;
+  private boolean isMetadataInitialized = false;
+  private HFileContext context;
+  private List<BlockIndexEntry> blockIndexEntryList;
+  private HFileBlock metaIndexBlock;
+  private HFileBlock fileInfoBlock;
+
+  public HFileReader(FSDataInputStream stream, long fileSize) {
+    this.stream = stream;
+    this.fileSize = fileSize;
+  }
+
+  /**
+   * Initializes the metadata by reading the "Load-on-open" section.
+   *
+   * @throws IOException upon error.
+   */
+  public void initializeMetadata() throws IOException {
+    assert !this.isMetadataInitialized;
+
+    // Read Trailer (serialized in Proto)
+    HFileTrailer trailer = readTrailer(stream, fileSize);
+    this.context = HFileContext.builder()
+        .compressAlgo(trailer.getCompressionCodec())
+        .build();
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, trailer.getLoadOnOpenDataOffset(), fileSize - 
HFileTrailer.getTrailerSize());
+    HFileRootIndexBlock dataIndexBlock =
+        (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.blockIndexEntryList = 
dataIndexBlock.readDataIndex(trailer.getDataIndexCount());
+    this.metaIndexBlock = blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.fileInfoBlock = blockReader.nextBlock(HFileBlockType.FILE_INFO);
+
+    this.isMetadataInitialized = true;
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   * @throws IOException upon error.
+   */
+  public Optional<KeyValue> seekTo(Key key) throws IOException {
+    BlockIndexEntry lookUpKey = new BlockIndexEntry(key, -1, -1);
+    int rootLevelBlockIndex = searchBlockByKey(lookUpKey);
+    if (rootLevelBlockIndex < 0) {
+      // Key smaller than the start key of the first block
+      return Optional.empty();
+    }
+    BlockIndexEntry blockToRead = blockIndexEntryList.get(rootLevelBlockIndex);
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, blockToRead.getOffset(), blockToRead.getOffset() + 
(long) blockToRead.getSize());
+    HFileDataBlock dataBlock = (HFileDataBlock) 
blockReader.nextBlock(HFileBlockType.DATA);
+    return seekToKeyInBlock(dataBlock, key);
+  }
+
+  /**
+   * Reads the HFile major version from the input.
+   *
+   * @param bytes  Input data.
+   * @param offset Offset to start reading.
+   * @return Major version of the file.
+   */
+  public static int readMajorVersion(byte[] bytes, int offset) {

Review Comment:
   anyway to move this to IOUtils as a generic method. and then invoke here?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/StringKey.java:
##########
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Represent a String key only, with no length information encoded.
+ */
+public class StringKey extends Key {

Review Comment:
   rename: UTF8StringKey



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+/**
+ * A reader to read one or more HFile blocks based on the start and end 
offsets.
+ */
+public class HFileBlockReader {
+  private final HFileContext context;
+  private final byte[] byteBuff;
+  private int offset;
+
+  /**
+   * Instantiates the {@link HFileBlockReader}.
+   *
+   * @param context     HFile context.
+   * @param stream      Input data.
+   * @param startOffset Start offset to read from.
+   * @param endOffset   End offset to stop at.
+   * @throws IOException
+   */
+  public HFileBlockReader(HFileContext context,
+                          FSDataInputStream stream,
+                          long startOffset,
+                          long endOffset) throws IOException {

Review Comment:
   if yes - then rename `nextBlock()` to just `readBlock`
   if no - then clarify that the endOffset is just for the first block?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.util.IOUtils;
+
+import java.util.Optional;
+
+import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
+
+/**
+ * Represents a {@link HFileBlockType#DATA} block in the "Scanned block" 
section.
+ */
+public class HFileDataBlock extends HFileBlock {
+  protected HFileDataBlock(HFileContext context,
+                           byte[] byteBuff,
+                           int startOffsetInBuff) {
+    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   */
+  public Optional<KeyValue> seekTo(Key key) {
+    int offset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE;
+    int endOffset = offset + onDiskSizeWithoutHeader;
+    // TODO: check last 4 bytes in the data block
+    while (offset + HFILEBLOCK_HEADER_SIZE < endOffset) {
+      // Full length is not known yet until parsing
+      KeyValue kv = new KeyValue(byteBuff, offset, -1);
+      // TODO: Reading long instead of two integers per HBase

Review Comment:
   whats this?



##########
hudi-io/src/main/java/org/apache/hudi/io/compress/builtin/HoodieBuiltInNoneDecompressor.java:
##########
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.compress.builtin;
+
+import org.apache.hudi.io.compress.CompressionCodec;
+import org.apache.hudi.io.compress.HoodieDecompressor;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Implementation of {@link HoodieDecompressor} for {@link 
CompressionCodec#NONE} compression
+ * codec (no compression) by directly reading the input stream.
+ */
+public class HoodieBuiltInNoneDecompressor implements HoodieDecompressor {

Review Comment:
   rename: HoodieNoneDecompressor ? (if you expect only tests to use it, move 
to src/test)



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java:
##########
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+
+/**
+ * Represents the HFile block type.
+ */
+public enum HFileBlockType {
+  /**
+   * Data block, both versions
+   */
+  DATA("DATABLK*", BlockCategory.DATA),
+
+  /**
+   * An encoded data block (e.g. with prefix compression), version 2
+   */
+  ENCODED_DATA("DATABLKE", BlockCategory.DATA) {
+    @Override
+    public int getId() {
+      return DATA.ordinal();
+    }
+  },
+
+  /**
+   * Version 2 leaf index block. Appears in the data block section
+   */
+  LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX),
+
+  /**
+   * Bloom filter block, version 2
+   */
+  BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM),
+
+  // Non-scanned block section
+
+  /**
+   * Meta blocks
+   */
+  META("METABLKc", BlockCategory.META),
+
+  /**
+   * Intermediate-level version 2 index in the non-data block section
+   */
+  INTERMEDIATE_INDEX("IDXINTE2", BlockCategory.INDEX),
+
+  // Load-on-open section.
+
+  /**
+   * Root index block, also used for the single-level meta index, version 2
+   */
+  ROOT_INDEX("IDXROOT2", BlockCategory.INDEX),
+
+  /**
+   * File info, version 2
+   */
+  FILE_INFO("FILEINF2", BlockCategory.META),
+
+  /**
+   * General Bloom filter metadata, version 2
+   */
+  GENERAL_BLOOM_META("BLMFMET2", BlockCategory.BLOOM),
+
+  /**
+   * Delete Family Bloom filter metadata, version 2
+   */
+  DELETE_FAMILY_BLOOM_META("DFBLMET2", BlockCategory.BLOOM),
+
+  // Trailer
+
+  /**
+   * Fixed file trailer, both versions (always just a magic string)
+   */
+  TRAILER("TRABLK\"$", BlockCategory.META),
+
+  // Legacy blocks
+
+  /**
+   * Block index magic string in version 1
+   */
+  INDEX_V1("IDXBLK)+", BlockCategory.INDEX);
+
+  public enum BlockCategory {
+    DATA, META, INDEX, BLOOM, ALL_CATEGORIES, UNKNOWN;
+  }
+
+  private final byte[] magic;
+  private final BlockCategory metricCat;
+
+  HFileBlockType(String magicStr, BlockCategory metricCat) {
+    magic = magicStr.getBytes(UTF_8);
+    this.metricCat = metricCat;
+    assert magic.length == MAGIC_LENGTH;
+  }
+
+  /**
+   * Parses the block type from the block magic.
+   *
+   * @param buf    Input data.
+   * @param offset Offset to start reading.
+   * @return The block type.
+   * @throws IOException if the block magic is invalid.
+   */
+  public static HFileBlockType parse(byte[] buf, int offset)
+      throws IOException {
+    for (HFileBlockType blockType : values()) {
+      if (compareByteArray(MAGIC_LENGTH, blockType.magic, 0, buf, offset)) {
+        return blockType;
+      }
+    }
+
+    throw new IOException("Invalid HFile block magic: "
+        + bytesToString(buf, offset, MAGIC_LENGTH));
+  }
+
+  /**
+   * Use this instead of {@link #ordinal()}. They work exactly the same, except
+   * DATA and ENCODED_DATA get the same id using this method (overridden for
+   * {@link #ENCODED_DATA}).
+   *
+   * @return block type id from 0 to the number of block types - 1
+   */
+  public int getId() {
+    // Default implementation, can be overridden for individual enum members.
+    return ordinal();
+  }
+
+  /**
+   * Reads a magic record of the length {@link DataSize#MAGIC_LENGTH} from the 
given
+   * stream and expects it to match this block type.
+   */
+  public void readAndCheck(DataInputStream in) throws IOException {
+    byte[] buf = new byte[MAGIC_LENGTH];
+    in.readFully(buf);
+    if (!compareByteArray(buf, magic)) {
+      throw new IOException("Invalid magic: expected "
+          + new String(magic) + ", got " + new String(buf));
+    }
+  }
+
+  private static boolean compareByteArray(byte[] bytes1, byte[] bytes2) {

Review Comment:
   same. IOUtils?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java:
##########
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+
+/**
+ * Represents the HFile block type.
+ */
+public enum HFileBlockType {
+  /**
+   * Data block, both versions
+   */
+  DATA("DATABLK*", BlockCategory.DATA),
+
+  /**
+   * An encoded data block (e.g. with prefix compression), version 2
+   */
+  ENCODED_DATA("DATABLKE", BlockCategory.DATA) {
+    @Override
+    public int getId() {
+      return DATA.ordinal();
+    }
+  },
+
+  /**
+   * Version 2 leaf index block. Appears in the data block section
+   */
+  LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX),
+
+  /**
+   * Bloom filter block, version 2
+   */
+  BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM),
+
+  // Non-scanned block section
+
+  /**
+   * Meta blocks
+   */
+  META("METABLKc", BlockCategory.META),
+
+  /**
+   * Intermediate-level version 2 index in the non-data block section
+   */
+  INTERMEDIATE_INDEX("IDXINTE2", BlockCategory.INDEX),
+
+  // Load-on-open section.
+
+  /**
+   * Root index block, also used for the single-level meta index, version 2
+   */
+  ROOT_INDEX("IDXROOT2", BlockCategory.INDEX),
+
+  /**
+   * File info, version 2
+   */
+  FILE_INFO("FILEINF2", BlockCategory.META),
+
+  /**
+   * General Bloom filter metadata, version 2
+   */
+  GENERAL_BLOOM_META("BLMFMET2", BlockCategory.BLOOM),
+
+  /**
+   * Delete Family Bloom filter metadata, version 2
+   */
+  DELETE_FAMILY_BLOOM_META("DFBLMET2", BlockCategory.BLOOM),
+
+  // Trailer
+
+  /**
+   * Fixed file trailer, both versions (always just a magic string)
+   */
+  TRAILER("TRABLK\"$", BlockCategory.META),
+
+  // Legacy blocks
+
+  /**
+   * Block index magic string in version 1
+   */
+  INDEX_V1("IDXBLK)+", BlockCategory.INDEX);
+
+  public enum BlockCategory {
+    DATA, META, INDEX, BLOOM, ALL_CATEGORIES, UNKNOWN;
+  }
+
+  private final byte[] magic;
+  private final BlockCategory metricCat;
+
+  HFileBlockType(String magicStr, BlockCategory metricCat) {
+    magic = magicStr.getBytes(UTF_8);
+    this.metricCat = metricCat;
+    assert magic.length == MAGIC_LENGTH;
+  }
+
+  /**
+   * Parses the block type from the block magic.
+   *
+   * @param buf    Input data.
+   * @param offset Offset to start reading.
+   * @return The block type.
+   * @throws IOException if the block magic is invalid.
+   */
+  public static HFileBlockType parse(byte[] buf, int offset)
+      throws IOException {
+    for (HFileBlockType blockType : values()) {
+      if (compareByteArray(MAGIC_LENGTH, blockType.magic, 0, buf, offset)) {
+        return blockType;
+      }
+    }
+
+    throw new IOException("Invalid HFile block magic: "
+        + bytesToString(buf, offset, MAGIC_LENGTH));
+  }
+
+  /**
+   * Use this instead of {@link #ordinal()}. They work exactly the same, except
+   * DATA and ENCODED_DATA get the same id using this method (overridden for
+   * {@link #ENCODED_DATA}).
+   *
+   * @return block type id from 0 to the number of block types - 1
+   */
+  public int getId() {
+    // Default implementation, can be overridden for individual enum members.
+    return ordinal();
+  }
+
+  /**
+   * Reads a magic record of the length {@link DataSize#MAGIC_LENGTH} from the 
given
+   * stream and expects it to match this block type.
+   */
+  public void readAndCheck(DataInputStream in) throws IOException {
+    byte[] buf = new byte[MAGIC_LENGTH];
+    in.readFully(buf);
+    if (!compareByteArray(buf, magic)) {
+      throw new IOException("Invalid magic: expected "
+          + new String(magic) + ", got " + new String(buf));
+    }
+  }
+
+  private static boolean compareByteArray(byte[] bytes1, byte[] bytes2) {
+    if (bytes1.length != bytes2.length) {
+      return false;
+    }
+    return compareByteArray(bytes1.length, bytes1, 0, bytes2, 0);
+  }
+
+  private static boolean compareByteArray(int length, byte[] bytes1, int 
offset1, byte[] bytes2,
+                                          int offset2) {
+    for (int i = 0; i < length; i++) {
+      if (bytes1[offset1 + i] != bytes2[offset2 + i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private static String bytesToString(byte[] bytes, int offset, int length) {

Review Comment:
   same.



##########
hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java:
##########
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.util;
+
+/**
+ * Util methods on I/O.
+ */
+public class IOUtils {

Review Comment:
   is this moved over from hudi-common?



##########
hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.IOException;
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestHFileReader {

Review Comment:
   love to see more cases tested in terms of keys being present, absent, 
adjacent keys, multiple values for same key etc.



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java:
##########
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+
+/**
+ * Represents the HFile block type.
+ */
+public enum HFileBlockType {
+  /**
+   * Data block, both versions
+   */
+  DATA("DATABLK*", BlockCategory.DATA),
+
+  /**
+   * An encoded data block (e.g. with prefix compression), version 2
+   */
+  ENCODED_DATA("DATABLKE", BlockCategory.DATA) {
+    @Override
+    public int getId() {
+      return DATA.ordinal();
+    }
+  },
+
+  /**
+   * Version 2 leaf index block. Appears in the data block section
+   */
+  LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX),
+
+  /**
+   * Bloom filter block, version 2
+   */
+  BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM),
+
+  // Non-scanned block section
+
+  /**
+   * Meta blocks
+   */
+  META("METABLKc", BlockCategory.META),
+
+  /**
+   * Intermediate-level version 2 index in the non-data block section
+   */
+  INTERMEDIATE_INDEX("IDXINTE2", BlockCategory.INDEX),
+
+  // Load-on-open section.
+
+  /**
+   * Root index block, also used for the single-level meta index, version 2
+   */
+  ROOT_INDEX("IDXROOT2", BlockCategory.INDEX),
+
+  /**
+   * File info, version 2
+   */
+  FILE_INFO("FILEINF2", BlockCategory.META),
+
+  /**
+   * General Bloom filter metadata, version 2
+   */
+  GENERAL_BLOOM_META("BLMFMET2", BlockCategory.BLOOM),
+
+  /**
+   * Delete Family Bloom filter metadata, version 2
+   */
+  DELETE_FAMILY_BLOOM_META("DFBLMET2", BlockCategory.BLOOM),
+
+  // Trailer
+
+  /**
+   * Fixed file trailer, both versions (always just a magic string)
+   */
+  TRAILER("TRABLK\"$", BlockCategory.META),
+
+  // Legacy blocks
+
+  /**
+   * Block index magic string in version 1
+   */
+  INDEX_V1("IDXBLK)+", BlockCategory.INDEX);
+
+  public enum BlockCategory {
+    DATA, META, INDEX, BLOOM, ALL_CATEGORIES, UNKNOWN;
+  }
+
+  private final byte[] magic;
+  private final BlockCategory metricCat;
+
+  HFileBlockType(String magicStr, BlockCategory metricCat) {
+    magic = magicStr.getBytes(UTF_8);
+    this.metricCat = metricCat;
+    assert magic.length == MAGIC_LENGTH;
+  }
+
+  /**
+   * Parses the block type from the block magic.
+   *
+   * @param buf    Input data.
+   * @param offset Offset to start reading.
+   * @return The block type.
+   * @throws IOException if the block magic is invalid.
+   */
+  public static HFileBlockType parse(byte[] buf, int offset)
+      throws IOException {
+    for (HFileBlockType blockType : values()) {
+      if (compareByteArray(MAGIC_LENGTH, blockType.magic, 0, buf, offset)) {
+        return blockType;
+      }
+    }
+
+    throw new IOException("Invalid HFile block magic: "
+        + bytesToString(buf, offset, MAGIC_LENGTH));
+  }
+
+  /**
+   * Use this instead of {@link #ordinal()}. They work exactly the same, except
+   * DATA and ENCODED_DATA get the same id using this method (overridden for
+   * {@link #ENCODED_DATA}).
+   *
+   * @return block type id from 0 to the number of block types - 1
+   */
+  public int getId() {
+    // Default implementation, can be overridden for individual enum members.
+    return ordinal();
+  }
+
+  /**
+   * Reads a magic record of the length {@link DataSize#MAGIC_LENGTH} from the 
given
+   * stream and expects it to match this block type.
+   */
+  public void readAndCheck(DataInputStream in) throws IOException {
+    byte[] buf = new byte[MAGIC_LENGTH];
+    in.readFully(buf);
+    if (!compareByteArray(buf, magic)) {
+      throw new IOException("Invalid magic: expected "
+          + new String(magic) + ", got " + new String(buf));
+    }
+  }
+
+  private static boolean compareByteArray(byte[] bytes1, byte[] bytes2) {
+    if (bytes1.length != bytes2.length) {
+      return false;
+    }
+    return compareByteArray(bytes1.length, bytes1, 0, bytes2, 0);
+  }
+
+  private static boolean compareByteArray(int length, byte[] bytes1, int 
offset1, byte[] bytes2,

Review Comment:
   move to IOUtils?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java:
##########
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.compress.CompressionCodec;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
+import static org.apache.hudi.io.util.IOUtils.readInt;
+
+/**
+ * Represents a block in a HFile. The types of blocks are defined in {@link 
HFileBlockType}.
+ */
+public abstract class HFileBlock {
+  // The HFile block header size without checksum
+  public static final int HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM =
+      MAGIC_LENGTH + 2 * SIZEOF_INT32 + SIZEOF_INT64;
+  // The HFile block header size with checksum
+  // There is a 1 byte checksum type, followed by a 4 byte bytesPerChecksum
+  // followed by another 4 byte value to store sizeofDataOnDisk.
+  public static final int HFILEBLOCK_HEADER_SIZE =
+      HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + SIZEOF_BYTE + 2 * SIZEOF_INT32;
+
+  /**
+   * Each checksum value is an integer that can be stored in 4 bytes.
+   */
+  static final int CHECKSUM_SIZE = SIZEOF_INT32;
+  static final int BYTES_PER_CHECKSUM = 16384;
+
+  static class Header {
+    // Format of header is:
+    // 8 bytes - block magic
+    // 4 bytes int - onDiskSizeWithoutHeader
+    // 4 bytes int - uncompressedSizeWithoutHeader
+    // 8 bytes long - prevBlockOffset
+    // The following 3 are only present if header contains checksum information
+    // 1 byte - checksum type
+    // 4 byte int - bytes per checksum
+    // 4 byte int - onDiskDataSizeWithHeader
+    static int BLOCK_MAGIC_INDEX = 0;
+    static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
+    static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
+    static int PREV_BLOCK_OFFSET_INDEX = 16;
+    static int CHECKSUM_TYPE_INDEX = 24;
+    static int BYTES_PER_CHECKSUM_INDEX = 25;
+    static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
+  }
+
+  protected final HFileContext context;
+  protected final byte[] byteBuff;
+  protected final int startOffsetInBuff;
+  private final HFileBlockType blockType;
+  protected int onDiskSizeWithoutHeader;
+  protected int uncompressedSizeWithoutHeader;
+
+  protected HFileBlock(HFileContext context,
+                       HFileBlockType blockType,
+                       byte[] byteBuff,
+                       int startOffsetInBuff) {
+    this.context = context;
+    this.byteBuff = byteBuff;
+    this.startOffsetInBuff = startOffsetInBuff;
+    this.blockType = blockType;
+    this.onDiskSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+    this.uncompressedSizeWithoutHeader = readInt(
+        byteBuff, startOffsetInBuff + 
Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
+  }
+
+  /**
+   * Parses the HFile block header and returns the {@link HFileBlock} instance 
based on the input.
+   *
+   * @param context           HFile context.
+   * @param byteBuff          Input data.
+   * @param startOffsetInBuff Offset to start parsing.
+   * @return The {@link HFileBlock} instance based on the input.
+   * @throws IOException if the block cannot be parsed.
+   */
+  public static HFileBlock parse(HFileContext context, byte[] byteBuff, int 
startOffsetInBuff)
+      throws IOException {
+    HFileBlockType blockType = HFileBlockType.parse(byteBuff, 
startOffsetInBuff);
+    switch (blockType) {
+      case ROOT_INDEX:
+        return new HFileRootIndexBlock(context, byteBuff, startOffsetInBuff);
+      case FILE_INFO:
+        return new HFileFileInfoBlock(context, byteBuff, startOffsetInBuff);
+      case DATA:
+        return new HFileDataBlock(context, byteBuff, startOffsetInBuff);
+      default:
+        throw new IOException(
+            "Parsing of the HFile block type " + blockType + " is not 
supported");
+    }
+  }
+
+  /**
+   * Returns the number of bytes needed to store the checksums for
+   * a specified data size.
+   *
+   * @param numBytes number of bytes of data.
+   * @return The number of bytes needed to store the checksum values.
+   */
+  static int numBytes(long numBytes) {
+    return numChunks(numBytes, BYTES_PER_CHECKSUM) * HFileBlock.CHECKSUM_SIZE;
+  }
+
+  /**
+   * Returns the number of checksum chunks needed to store the checksums for
+   * a specified data size.
+   *
+   * @param numBytes         number of bytes of data
+   * @param bytesPerChecksum number of bytes in a checksum chunk
+   * @return The number of checksum chunks
+   */
+  static int numChunks(long numBytes, int bytesPerChecksum) {
+    long numChunks = numBytes / bytesPerChecksum;
+    if (numBytes % bytesPerChecksum != 0) {
+      numChunks++;
+    }
+    if (numChunks > Integer.MAX_VALUE / HFileBlock.CHECKSUM_SIZE) {
+      throw new IllegalArgumentException("The number of chunks is too large: " 
+ numChunks);
+    }
+    return (int) numChunks;
+  }
+
+  /**
+   * Allocates a new byte buffer for uncompressed data and returns a new 
{@link HFileBlock}
+   * instance for the decompressed content.
+   *
+   * @return
+   */
+  public abstract HFileBlock cloneForUnpack();
+
+  public HFileBlockType getBlockType() {
+    return blockType;
+  }
+
+  public byte[] getByteBuff() {
+    return byteBuff;
+  }
+
+  public int getOnDiskSizeWithHeader() {
+    return onDiskSizeWithoutHeader + HFILEBLOCK_HEADER_SIZE;
+  }
+
+  /**
+   * Decodes and decompresses the block content if the block content is 
compressed.
+   *
+   * @return {@link HFileBlock} instance
+   * @throws IOException upon decoding and decompression error.
+   */
+  public HFileBlock unpack() throws IOException {

Review Comment:
   is it cleaner to just have a `void` return type and for the caller to work 
with the same block object? A class returning a new instance of that class, 
feels a lil awkward? do you need a builder here?(or alternatively, should we 
return a new object for no compression case too?)



##########
pom.xml:
##########
@@ -929,6 +930,13 @@
         <scope>provided</scope>
       </dependency>
 
+      <!-- airlift -->
+      <dependency>

Review Comment:
   should this be part of some bundles?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.util.IOUtils;
+
+import java.util.Optional;
+
+import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
+
+/**
+ * Represents a {@link HFileBlockType#DATA} block in the "Scanned block" 
section.
+ */
+public class HFileDataBlock extends HFileBlock {
+  protected HFileDataBlock(HFileContext context,
+                           byte[] byteBuff,
+                           int startOffsetInBuff) {
+    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   */
+  public Optional<KeyValue> seekTo(Key key) {
+    int offset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE;
+    int endOffset = offset + onDiskSizeWithoutHeader;
+    // TODO: check last 4 bytes in the data block

Review Comment:
   are these `TODO:`s WIP or need to be resolved?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.io.util.IOUtils;
+
+import java.util.Optional;
+
+import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
+
+/**
+ * Represents a {@link HFileBlockType#DATA} block in the "Scanned block" 
section.
+ */
+public class HFileDataBlock extends HFileBlock {
+  protected HFileDataBlock(HFileContext context,
+                           byte[] byteBuff,
+                           int startOffsetInBuff) {
+    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   */
+  public Optional<KeyValue> seekTo(Key key) {
+    int offset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE;
+    int endOffset = offset + onDiskSizeWithoutHeader;
+    // TODO: check last 4 bytes in the data block
+    while (offset + HFILEBLOCK_HEADER_SIZE < endOffset) {
+      // Full length is not known yet until parsing
+      KeyValue kv = new KeyValue(byteBuff, offset, -1);
+      // TODO: Reading long instead of two integers per HBase
+      int comp =
+          IOUtils.compareTo(kv.getBytes(), kv.getKeyContentOffset(), 
kv.getKeyContentLength(),
+              key.getBytes(), key.getContentOffset(), key.getContentLength());
+      if (comp == 0) {
+        return Optional.of(kv);
+      } else if (comp > 0) {
+        return Optional.empty();
+      }
+      // TODO: check what's the extra byte

Review Comment:
   so, you need to read an extra byte. always? is it some kind of type/enum? 
Would be good to understand what this is.



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A reader reading a HFile.
+ */
+public class HFileReader {
+  private final FSDataInputStream stream;
+  private final long fileSize;
+  private boolean isMetadataInitialized = false;
+  private HFileContext context;
+  private List<BlockIndexEntry> blockIndexEntryList;
+  private HFileBlock metaIndexBlock;
+  private HFileBlock fileInfoBlock;
+
+  public HFileReader(FSDataInputStream stream, long fileSize) {
+    this.stream = stream;
+    this.fileSize = fileSize;
+  }
+
+  /**
+   * Initializes the metadata by reading the "Load-on-open" section.
+   *
+   * @throws IOException upon error.
+   */
+  public void initializeMetadata() throws IOException {
+    assert !this.isMetadataInitialized;
+
+    // Read Trailer (serialized in Proto)
+    HFileTrailer trailer = readTrailer(stream, fileSize);
+    this.context = HFileContext.builder()
+        .compressAlgo(trailer.getCompressionCodec())
+        .build();
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, trailer.getLoadOnOpenDataOffset(), fileSize - 
HFileTrailer.getTrailerSize());
+    HFileRootIndexBlock dataIndexBlock =
+        (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.blockIndexEntryList = 
dataIndexBlock.readDataIndex(trailer.getDataIndexCount());
+    this.metaIndexBlock = blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.fileInfoBlock = blockReader.nextBlock(HFileBlockType.FILE_INFO);
+
+    this.isMetadataInitialized = true;
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   * @throws IOException upon error.
+   */
+  public Optional<KeyValue> seekTo(Key key) throws IOException {
+    BlockIndexEntry lookUpKey = new BlockIndexEntry(key, -1, -1);
+    int rootLevelBlockIndex = searchBlockByKey(lookUpKey);
+    if (rootLevelBlockIndex < 0) {
+      // Key smaller than the start key of the first block
+      return Optional.empty();
+    }
+    BlockIndexEntry blockToRead = blockIndexEntryList.get(rootLevelBlockIndex);
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, blockToRead.getOffset(), blockToRead.getOffset() + 
(long) blockToRead.getSize());
+    HFileDataBlock dataBlock = (HFileDataBlock) 
blockReader.nextBlock(HFileBlockType.DATA);

Review Comment:
   we should return multiple values per search? for e.g col stats, we can do a 
prefix search and it can go over several data blocks?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A reader reading a HFile.
+ */
+public class HFileReader {
+  private final FSDataInputStream stream;
+  private final long fileSize;
+  private boolean isMetadataInitialized = false;
+  private HFileContext context;
+  private List<BlockIndexEntry> blockIndexEntryList;
+  private HFileBlock metaIndexBlock;
+  private HFileBlock fileInfoBlock;
+
+  public HFileReader(FSDataInputStream stream, long fileSize) {
+    this.stream = stream;
+    this.fileSize = fileSize;
+  }
+
+  /**
+   * Initializes the metadata by reading the "Load-on-open" section.
+   *
+   * @throws IOException upon error.
+   */
+  public void initializeMetadata() throws IOException {
+    assert !this.isMetadataInitialized;
+
+    // Read Trailer (serialized in Proto)
+    HFileTrailer trailer = readTrailer(stream, fileSize);
+    this.context = HFileContext.builder()
+        .compressAlgo(trailer.getCompressionCodec())
+        .build();
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, trailer.getLoadOnOpenDataOffset(), fileSize - 
HFileTrailer.getTrailerSize());
+    HFileRootIndexBlock dataIndexBlock =
+        (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.blockIndexEntryList = 
dataIndexBlock.readDataIndex(trailer.getDataIndexCount());
+    this.metaIndexBlock = blockReader.nextBlock(HFileBlockType.ROOT_INDEX);
+    this.fileInfoBlock = blockReader.nextBlock(HFileBlockType.FILE_INFO);
+
+    this.isMetadataInitialized = true;
+  }
+
+  /**
+   * Seeks to the key to look up.
+   *
+   * @param key Key to look up.
+   * @return The {@link KeyValue} instance in the block that contains the 
exact same key as the
+   * lookup key; or empty {@link Optional} if the lookup key does not exist.
+   * @throws IOException upon error.
+   */
+  public Optional<KeyValue> seekTo(Key key) throws IOException {
+    BlockIndexEntry lookUpKey = new BlockIndexEntry(key, -1, -1);
+    int rootLevelBlockIndex = searchBlockByKey(lookUpKey);
+    if (rootLevelBlockIndex < 0) {
+      // Key smaller than the start key of the first block
+      return Optional.empty();
+    }
+    BlockIndexEntry blockToRead = blockIndexEntryList.get(rootLevelBlockIndex);
+    HFileBlockReader blockReader = new HFileBlockReader(
+        context, stream, blockToRead.getOffset(), blockToRead.getOffset() + 
(long) blockToRead.getSize());
+    HFileDataBlock dataBlock = (HFileDataBlock) 
blockReader.nextBlock(HFileBlockType.DATA);
+    return seekToKeyInBlock(dataBlock, key);
+  }
+
+  /**
+   * Reads the HFile major version from the input.
+   *
+   * @param bytes  Input data.
+   * @param offset Offset to start reading.
+   * @return Major version of the file.
+   */
+  public static int readMajorVersion(byte[] bytes, int offset) {
+    int ch1 = bytes[offset] & 0xFF;
+    int ch2 = bytes[offset + 1] & 0xFF;
+    int ch3 = bytes[offset + 2] & 0xFF;
+    return ((ch1 << 16) + (ch2 << 8) + ch3);
+  }
+
+  /**
+   * Reads and parses the HFile trailer.
+   *
+   * @param stream   HFile input.
+   * @param fileSize HFile size.
+   * @return {@link HFileTrailer} instance.
+   * @throws IOException upon error.
+   */
+  private static HFileTrailer readTrailer(FSDataInputStream stream,
+                                          long fileSize) throws IOException {
+    int bufferSize = HFileTrailer.getTrailerSize();
+    long seekPos = fileSize - bufferSize;
+    if (seekPos < 0) {
+      // It is hard to imagine such a small HFile.
+      seekPos = 0;
+      bufferSize = (int) fileSize;
+    }
+    stream.seek(seekPos);
+
+    byte[] byteBuff = new byte[bufferSize];
+    stream.readFully(byteBuff);
+
+    int majorVersion = readMajorVersion(byteBuff, bufferSize - 3);
+    int minorVersion = byteBuff[bufferSize - 3];
+
+    HFileTrailer trailer = new HFileTrailer(majorVersion, minorVersion);
+    trailer.deserialize(new DataInputStream(new 
ByteArrayInputStream(byteBuff)));
+    return trailer;
+  }
+
+  /**
+   * Searches the block that may contain the lookup key based the starting keys
+   * of all blocks (sorted in the input list), using binary search.
+   *
+   * @param lookUpKey The key to lookup.
+   * @return Block index in the input. An index outside the range of input 
means the key does not
+   * exist in the HFile.
+   */
+  private int searchBlockByKey(BlockIndexEntry lookUpKey) {
+    int pos = Collections.binarySearch(blockIndexEntryList, lookUpKey);
+    // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
+    // binarySearch's javadoc.
+
+    if (pos >= 0) {
+      // This means this is an exact match with an element of blockKeys.
+      assert pos < blockIndexEntryList.size();
+      return pos;
+    }
+
+    // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
+    // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
+    // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
+    // key < blockKeys[0], meaning the file does not contain the given key.
+
+    int i = -pos - 1;

Review Comment:
   this kind of stuff - seems correct. but when we get down to debugging this 
day-to-day, probably a higher level data structure helps?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java:
##########
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
+
+/**
+ * Represents the HFile block type.
+ */
+public enum HFileBlockType {
+  /**
+   * Data block, both versions
+   */
+  DATA("DATABLK*", BlockCategory.DATA),
+
+  /**
+   * An encoded data block (e.g. with prefix compression), version 2
+   */
+  ENCODED_DATA("DATABLKE", BlockCategory.DATA) {
+    @Override
+    public int getId() {
+      return DATA.ordinal();
+    }
+  },
+
+  /**
+   * Version 2 leaf index block. Appears in the data block section
+   */
+  LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX),
+
+  /**
+   * Bloom filter block, version 2
+   */
+  BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM),

Review Comment:
   lets do a follow on JIRA to fix the bloom filters as well. 



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A reader reading a HFile.
+ */
+public class HFileReader {
+  private final FSDataInputStream stream;
+  private final long fileSize;
+  private boolean isMetadataInitialized = false;
+  private HFileContext context;
+  private List<BlockIndexEntry> blockIndexEntryList;
+  private HFileBlock metaIndexBlock;
+  private HFileBlock fileInfoBlock;
+
+  public HFileReader(FSDataInputStream stream, long fileSize) {
+    this.stream = stream;
+    this.fileSize = fileSize;
+  }
+
+  /**
+   * Initializes the metadata by reading the "Load-on-open" section.
+   *
+   * @throws IOException upon error.
+   */
+  public void initializeMetadata() throws IOException {
+    assert !this.isMetadataInitialized;
+
+    // Read Trailer (serialized in Proto)
+    HFileTrailer trailer = readTrailer(stream, fileSize);
+    this.context = HFileContext.builder()
+        .compressAlgo(trailer.getCompressionCodec())

Review Comment:
   rename. `compression()` 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7170][WIP] Implement HFile reader independent of HBase [hudi]

Reply via email to