This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ca72f9d932b [HUDI-9791] Fix native HFile writer to be compatible with 
HBase HFile reader (#13873)
9ca72f9d932b is described below

commit 9ca72f9d932b14b414ca605560d1145de7e525a6
Author: Lin Liu <[email protected]>
AuthorDate: Fri Sep 12 08:11:50 2025 -0700

    [HUDI-9791] Fix native HFile writer to be compatible with HBase HFile 
reader (#13873)
    
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 hudi-io/pom.xml                                    |  36 +++
 .../java/org/apache/hudi/io/hfile/HFileBlock.java  |   9 +-
 .../org/apache/hudi/io/hfile/HFileContext.java     |  19 +-
 .../org/apache/hudi/io/hfile/HFileDataBlock.java   |  24 +-
 .../apache/hudi/io/hfile/HFileFileInfoBlock.java   |   4 +
 .../java/org/apache/hudi/io/hfile/HFileInfo.java   |  17 +-
 .../org/apache/hudi/io/hfile/HFileWriterImpl.java  |  47 ++-
 .../hudi/io/hfile/TestHFileCompatibility.java      | 311 ++++++++++++++++++++
 .../hudi/io/hfile/TestHFileReadCompatibility.java  | 315 +++++++++++++++++++++
 .../org/apache/hudi/io/hfile/TestHFileWriter.java  |  23 +-
 .../org/apache/hudi/io/hfile/TestHfileBlock.java   |  60 ++++
 .../hudi/io/storage/HoodieHBaseKVComparator.java   |  29 ++
 .../src/test/resources/hfile/hbase-generated.hfile | Bin 0 -> 4619 bytes
 .../src/test/resources/hfile/hudi-generated.hfile  | Bin 0 -> 4654 bytes
 pom.xml                                            |   6 +-
 15 files changed, 868 insertions(+), 32 deletions(-)

diff --git a/hudi-io/pom.xml b/hudi-io/pom.xml
index 94b05512575d..c2a08e7e22fb 100644
--- a/hudi-io/pom.xml
+++ b/hudi-io/pom.xml
@@ -173,5 +173,41 @@
       <version>${slf4j.version}</version>
       <scope>provided</scope>
     </dependency>
+
+    <!-- HBase dependencies for testing HFile compatibility -->
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <version>2.4.13</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+      <version>2.4.13</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <version>2.4.13</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-testing-util</artifactId>
+      <version>2.4.13</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
   </dependencies>
 </project>
diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java
index 14290362e48e..4c61e6b3b8be 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java
@@ -189,6 +189,9 @@ public abstract class HFileBlock {
    * @return the number of checksum chunks.
    */
   static int numChecksumChunks(long numBytes, int bytesPerChecksum) {
+    if (bytesPerChecksum == 0) {
+      return 0;
+    }
     long numChunks = numBytes / bytesPerChecksum;
     if (numBytes % bytesPerChecksum != 0) {
       numChunks++;
@@ -283,7 +286,11 @@ public abstract class HFileBlock {
     // 5. Checksum type.
     buf.put(context.getChecksumType().getCode());
     // 6. Bytes covered per checksum.
-    buf.putInt(DEFAULT_BYTES_PER_CHECKSUM);
+    // Note that: Default value is 16K. There is a check on
+    // onDiskSizeWithoutHeader = uncompressedSizeWithoutHeader + Checksum.
+    // In order to pass this check, either we make isUseHBaseChecksum false in 
HFileContext (hbase),
+    // or we set this value to zero.
+    buf.putInt(0);
     // 7. onDiskDataSizeWithHeader
     int onDiskDataSizeWithHeader =
         HFileBlock.HFILEBLOCK_HEADER_SIZE + compressedBlockData.limit();
diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java
index d2864e21ac50..8523e8cddc61 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java
@@ -31,12 +31,17 @@ public class HFileContext {
   private final HoodieCompressor compressor;
   private final ChecksumType checksumType;
   private final int blockSize;
+  private final long fileCreationTime;
 
-  private HFileContext(CompressionCodec compressionCodec, int blockSize, 
ChecksumType checksumType) {
+  private HFileContext(CompressionCodec compressionCodec,
+                       int blockSize,
+                       ChecksumType checksumType,
+                       long fileCreationTime) {
     this.compressionCodec = compressionCodec;
     this.compressor = HoodieCompressorFactory.getCompressor(compressionCodec);
     this.blockSize = blockSize;
     this.checksumType = checksumType;
+    this.fileCreationTime = fileCreationTime;
   }
 
   CompressionCodec getCompressionCodec() {
@@ -55,6 +60,10 @@ public class HFileContext {
     return checksumType;
   }
 
+  long getFileCreationTime() {
+    return fileCreationTime;
+  }
+
   public static Builder builder() {
     return new Builder();
   }
@@ -63,6 +72,7 @@ public class HFileContext {
     private CompressionCodec compressionCodec = CompressionCodec.NONE;
     private int blockSize = 1024 * 1024;
     private ChecksumType checksumType = ChecksumType.NULL;
+    private long fileCreationTime = System.currentTimeMillis();
 
     public Builder blockSize(int blockSize) {
       this.blockSize = blockSize;
@@ -79,8 +89,13 @@ public class HFileContext {
       return this;
     }
 
+    public Builder fileCreationTime(long fileCreationTime) {
+      this.fileCreationTime = fileCreationTime;
+      return this;
+    }
+
     public HFileContext build() {
-      return new HFileContext(compressionCodec, blockSize, checksumType);
+      return new HFileContext(compressionCodec, blockSize, checksumType, 
fileCreationTime);
     }
   }
 }
diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java
index 7f62b1c54162..230c9c686000 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java
@@ -26,7 +26,9 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
 import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16;
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
 import static 
org.apache.hudi.io.hfile.HFileReader.SEEK_TO_BEFORE_BLOCK_FIRST_KEY;
 import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND;
 import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_IN_RANGE;
@@ -36,11 +38,18 @@ import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;
  * Represents a {@link HFileBlockType#DATA} block.
  */
 public class HFileDataBlock extends HFileBlock {
+  private static final int KEY_LENGTH_LENGTH = SIZEOF_INT16;
+  private static final int COLUMN_FAMILY_LENGTH = SIZEOF_BYTE;
+  private static final int VERSION_TIMESTAMP_LENGTH = SIZEOF_INT64;
+  private static final int KEY_TYPE_LENGTH = SIZEOF_BYTE;
   // Hudi does not use HFile MVCC timestamp version so the version
   // is always 0, thus the byte length of the version is always 1.
   // This assumption is also validated when parsing {@link HFileInfo},
   // i.e., the maximum MVCC timestamp in a HFile must be 0.
   private static final long ZERO_TS_VERSION_BYTE_LENGTH = 1;
+  // Hudi does not set version timestamp for key value pairs,
+  // so the latest timestamp is used.
+  private static final long LATEST_TIMESTAMP = Long.MAX_VALUE;
 
   // End offset of content in the block, relative to the start of the start of 
the block
   protected final int uncompressedContentEndRelativeOffset;
@@ -203,17 +212,30 @@ public class HFileDataBlock extends HFileBlock {
     ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
     for (KeyValueEntry kv : entriesToWrite) {
       // Length of key + length of a short variable indicating length of key.
-      dataBuf.putInt(kv.key.length + SIZEOF_INT16);
+      // Note that 10 extra bytes are required by hbase reader.
+      // That is: 1 byte for column family length, 8 bytes for timestamp, 1 
bytes for key type.
+      dataBuf.putInt(kv.key.length + KEY_LENGTH_LENGTH + COLUMN_FAMILY_LENGTH 
+ VERSION_TIMESTAMP_LENGTH + KEY_TYPE_LENGTH);
       // Length of value.
       dataBuf.putInt(kv.value.length);
       // Key content length.
       dataBuf.putShort((short)kv.key.length);
       // Key.
       dataBuf.put(kv.key);
+      // Column family length: constant 0.
+      dataBuf.put((byte)0);
+      // Column qualifier: assume 0 bits.
+      // Timestamp: using the latest.
+      dataBuf.putLong(LATEST_TIMESTAMP);
+      // Key type: constant Put (4) in Hudi.
+      // Minimum((byte) 0), Put((byte) 4), Delete((byte) 8),
+      // DeleteFamilyVersion((byte) 10), DeleteColumn((byte) 12),
+      // DeleteFamily((byte) 14), Maximum((byte) 255).
+      dataBuf.put((byte)4);
       // Value.
       dataBuf.put(kv.value);
       // MVCC.
       dataBuf.put((byte)0);
+
       // Copy to output stream.
       baos.write(dataBuf.array(), 0, dataBuf.position());
       // Clear the buffer.
diff --git 
a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java
index 2f9918f6d07a..e6387e804e8d 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java
@@ -82,6 +82,10 @@ public class HFileFileInfoBlock extends HFileBlock {
     fileInfoToWrite.put(name, value);
   }
 
+  public boolean containsKey(String name) {
+    return fileInfoToWrite.containsKey(name);
+  }
+
   @Override
   public ByteBuffer getUncompressedBlockDataToWrite() {
     ByteBuffer buff = ByteBuffer.allocate(context.getBlockSize() * 2);
diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java
index b2c8e141675b..824ec1328e6c 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java
@@ -31,28 +31,27 @@ public class HFileInfo {
   private static final String RESERVED_PREFIX = "hfile.";
   static final UTF8StringKey LAST_KEY =
       new UTF8StringKey(RESERVED_PREFIX + "LASTKEY");
-  private static final UTF8StringKey FILE_CREATION_TIME_TS =
+  static final UTF8StringKey FILE_CREATION_TIME_TS =
       new UTF8StringKey(RESERVED_PREFIX + "CREATE_TIME_TS");
-  private static final UTF8StringKey KEY_VALUE_VERSION =
+  static final UTF8StringKey KEY_VALUE_VERSION =
       new UTF8StringKey("KEY_VALUE_VERSION");
   static final UTF8StringKey MAX_MVCC_TS_KEY =
       new UTF8StringKey("MAX_MEMSTORE_TS_KEY");
-
-  private static final int KEY_VALUE_VERSION_WITH_MVCC_TS = 1;
+  static final UTF8StringKey AVG_KEY_LEN =
+      new UTF8StringKey(RESERVED_PREFIX + "AVG_KEY_LEN");
+  static final UTF8StringKey AVG_VALUE_LEN =
+      new UTF8StringKey(RESERVED_PREFIX + "AVG_VALUE_LEN");
+  static final int KEY_VALUE_VERSION_WITH_MVCC_TS = 1;
 
   private final Map<UTF8StringKey, byte[]> infoMap;
   private final long fileCreationTime;
   private final Option<Key> lastKey;
-  private final long maxMvccTs;
-  private final boolean containsMvccTs;
 
   public HFileInfo(Map<UTF8StringKey, byte[]> infoMap) {
     this.infoMap = infoMap;
     this.fileCreationTime = parseFileCreationTime();
     this.lastKey = parseLastKey();
-    this.maxMvccTs = parseMaxMvccTs();
-    this.containsMvccTs = maxMvccTs > 0;
-    if (containsMvccTs) {
+    if (parseMaxMvccTs() > 0) {
       // The HFile written by Hudi does not contain MVCC timestamps.
       // Parsing MVCC timestamps is not supported.
       throw new UnsupportedOperationException("HFiles with MVCC timestamps are 
not supported");
diff --git 
a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java 
b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java
index c8153328ca7d..b9c835d06186 100644
--- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java
+++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java
@@ -22,8 +22,6 @@ package org.apache.hudi.io.hfile;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.io.hfile.protobuf.generated.HFileProtos;
 
-import com.google.protobuf.ByteString;
-
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
@@ -35,9 +33,11 @@ import java.util.Map;
 import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16;
 import static 
org.apache.hudi.io.hfile.HFileBlock.getVariableLengthEncodedBytes;
 import static org.apache.hudi.io.hfile.HFileBlockType.TRAILER;
+import static 
org.apache.hudi.io.hfile.HFileInfo.KEY_VALUE_VERSION_WITH_MVCC_TS;
 import static org.apache.hudi.io.hfile.HFileInfo.LAST_KEY;
 import static org.apache.hudi.io.hfile.HFileInfo.MAX_MVCC_TS_KEY;
 import static org.apache.hudi.io.hfile.HFileTrailer.TRAILER_SIZE;
+import static org.apache.hudi.io.util.IOUtils.toBytes;
 
 /**
  * Pure Java implementation of HFile writer (HFile v3 format) for Hudi.
@@ -68,6 +68,8 @@ public class HFileWriterImpl implements HFileWriter {
   private long firstDataBlockOffset = -1;
   private long lastDataBlockOffset;
   private long totalNumberOfRecords = 0;
+  private long totalKeyLength = 0;
+  private long totalValueLength = 0;
 
   public HFileWriterImpl(HFileContext context, OutputStream outputStream) {
     this.outputStream = outputStream;
@@ -87,6 +89,8 @@ public class HFileWriterImpl implements HFileWriter {
   public void append(String key, byte[] value) throws IOException {
     byte[] keyBytes = StringUtils.getUTF8Bytes(key);
     lastKey = keyBytes;
+    totalKeyLength += keyBytes.length;
+    totalValueLength += value.length;
     // Records with the same key must be put into the same block.
     // Here 9 = 4 bytes of key length + 4 bytes of value length + 1 byte MVCC.
     if (!Arrays.equals(currentDataBlock.getLastKeyContent(), keyBytes)
@@ -170,10 +174,7 @@ public class HFileWriterImpl implements HFileWriter {
     metaIndexBlock.setStartOffsetInBuffForWrite(currentOffset);
     writeBuffer(metaIndexBuffer);
     // Write File Info.
-    fileInfoBlock.add(
-        new String(LAST_KEY.getBytes(), StandardCharsets.UTF_8),
-        addKeyLength(lastKey));
-    fileInfoBlock.setStartOffsetInBuffForWrite(currentOffset);
+    finishFileInfo();
     writeBuffer(fileInfoBlock.serialize());
   }
 
@@ -191,7 +192,6 @@ public class HFileWriterImpl implements HFileWriter {
     builder.setLastDataBlockOffset(lastDataBlockOffset);
     builder.setComparatorClassName(COMPARATOR_CLASS_NAME);
     builder.setCompressionCodec(context.getCompressionCodec().getId());
-    builder.setEncryptionKey(ByteString.EMPTY);
     HFileProtos.TrailerProto trailerProto = builder.build();
 
     ByteBuffer trailer = ByteBuffer.allocate(TRAILER_SIZE);
@@ -216,7 +216,38 @@ public class HFileWriterImpl implements HFileWriter {
   private void initFileInfo() {
     fileInfoBlock.add(
         new String(MAX_MVCC_TS_KEY.getBytes(), StandardCharsets.UTF_8),
-        new byte[]{0});
+        toBytes(0L));
+  }
+
+  protected void finishFileInfo() {
+    // Record last key.
+    fileInfoBlock.add(
+        new String(LAST_KEY.getBytes(), StandardCharsets.UTF_8),
+        addKeyLength(lastKey));
+    fileInfoBlock.setStartOffsetInBuffForWrite(currentOffset);
+
+    // Average key length.
+    int avgKeyLen = totalNumberOfRecords == 0
+        ? 0 : (int) (totalKeyLength / totalNumberOfRecords);
+    fileInfoBlock.add(
+        new String(HFileInfo.AVG_KEY_LEN.getBytes(), StandardCharsets.UTF_8),
+        toBytes(avgKeyLen));
+    fileInfoBlock.add(
+        new String(HFileInfo.FILE_CREATION_TIME_TS.getBytes(), 
StandardCharsets.UTF_8),
+        toBytes(context.getFileCreationTime()));
+
+    // Average value length.
+    int avgValueLen = totalNumberOfRecords == 0
+        ? 0 : (int) (totalValueLength / totalNumberOfRecords);
+    fileInfoBlock.add(
+        new String(HFileInfo.AVG_VALUE_LEN.getBytes(), StandardCharsets.UTF_8),
+        toBytes(avgValueLen));
+
+    // NOTE: Set this property to make sure the key value and MVCC timestamp
+    // pairs are properly decoded
+    appendFileInfo(
+        new String(HFileInfo.KEY_VALUE_VERSION.getBytes(), 
StandardCharsets.UTF_8),
+        toBytes(KEY_VALUE_VERSION_WITH_MVCC_TS));
   }
 
   // Note: HFileReaderImpl assumes that:
diff --git 
a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileCompatibility.java 
b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileCompatibility.java
new file mode 100644
index 000000000000..97dc821cb79d
--- /dev/null
+++ b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileCompatibility.java
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
+import org.apache.hudi.io.ByteArraySeekableDataInputStream;
+import org.apache.hudi.io.SeekableDataInputStream;
+import org.apache.hudi.io.util.IOUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparatorImpl;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import static org.apache.hudi.common.util.FileIOUtils.readAsByteArray;
+import static org.apache.hudi.io.hfile.HFileInfo.KEY_VALUE_VERSION;
+import static org.apache.hudi.io.util.IOUtils.readInt;
+import static org.apache.hudi.io.util.IOUtils.toBytes;
+
+class TestHFileCompatibility {
+  // Test data - simple key-value pairs
+  private static final List<TestRecord> TEST_RECORDS = Arrays.asList(
+      new TestRecord("row1", "value1"),
+      new TestRecord("row2", "value2"),
+      new TestRecord("row3", "value3"),
+      new TestRecord("row4", "value4"),
+      new TestRecord("row5", "value5")
+  );
+
+  @ParameterizedTest
+  @CsvSource({
+      "/hfile/hudi-generated.hfile,/hfile/hbase-generated.hfile",
+      "/hfile/hbase-generated.hfile,/hfile/hudi-generated.hfile"
+  })
+  void testHFileReadCompatibility(String hudiFilePath, String hbaseFilePath) 
throws Exception {
+    try (HFileReader hudiReader = createHFileReaderFromResource(hudiFilePath);
+        org.apache.hadoop.hbase.io.hfile.HFile.Reader hbaseReader =
+            createHBaseHFileReaderFromResource(hbaseFilePath)) {
+      // Validate number of entries.
+      Assertions.assertEquals(5, hudiReader.getNumKeyValueEntries());
+      Assertions.assertEquals(5, hbaseReader.getEntries());
+      // Validate data block content.
+      hudiReader.seekTo();
+      HFileScanner scanner = hbaseReader.getScanner(true, true);
+      scanner.seekTo();
+      int i = 0;
+      do {
+        org.apache.hudi.io.hfile.KeyValue keyValue = 
hudiReader.getKeyValue().get();
+        Cell cell = scanner.getCell();
+        // Ensure Hudi record is correct.
+        Assertions.assertEquals(TEST_RECORDS.get(i).key, 
keyValue.getKey().getContentInString());
+        byte[] value = Arrays.copyOfRange(
+            keyValue.getBytes(),
+            keyValue.getValueOffset(),
+            keyValue.getValueOffset() + keyValue.getValueLength());
+        Assertions.assertArrayEquals(value, 
TEST_RECORDS.get(i).value.getBytes());
+        // Ensure Hbase record is correct.
+        byte[] key = Arrays.copyOfRange(
+            cell.getRowArray(),
+            cell.getRowOffset(),
+            cell.getRowOffset() + cell.getRowLength());
+        Assertions.assertArrayEquals(TEST_RECORDS.get(i).key.getBytes(), key);
+        value = Arrays.copyOfRange(
+            cell.getValueArray(),
+            cell.getValueOffset(),
+            cell.getValueOffset() + cell.getValueLength());
+        Assertions.assertArrayEquals(value, 
TEST_RECORDS.get(i).value.getBytes());
+        i++;
+      } while (hudiReader.next() && scanner.next());
+
+      // Compare some meta information.
+      // LAST KEY.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.LAST_KEY.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.LAST_KEY).isPresent());
+      if (hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get().length
+          < 
hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes()).length) {
+        Assertions.assertTrue(isPrefix(
+            hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get(),
+            hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes())));
+      } else {
+        Assertions.assertTrue(isPrefix(
+            hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes()),
+            hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get()));
+      }
+      // Average key length.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.AVG_KEY_LEN.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.AVG_KEY_LEN).isPresent());
+      // Average value length.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.AVG_VALUE_LEN.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.AVG_VALUE_LEN).isPresent());
+      Assertions.assertTrue(
+          hbaseReader.getHFileInfo().getAvgValueLen()
+              >= 
readInt(hudiReader.getMetaInfo(HFileInfo.AVG_VALUE_LEN).get(), 0));
+      // MVCC.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().shouldIncludeMemStoreTS());
+      // Note that MemStoreTS is not set.
+      Assertions.assertFalse(hbaseReader.getHFileInfo().isDecodeMemstoreTS());
+      
Assertions.assertTrue(hudiReader.getMetaInfo(KEY_VALUE_VERSION).isPresent());
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.MAX_MVCC_TS_KEY).isPresent());
+      Assertions.assertEquals(0L,
+          IOUtils.readLong(
+          hudiReader.getMetaInfo(HFileInfo.MAX_MVCC_TS_KEY).get(), 0));
+    }
+  }
+
+  @Test
+  void testHbaseReaderSucceedsWhenKeyValueVersionIsSetTo1() throws IOException 
{
+    String fileName = "hudi-generated-for-keyvalue-versions";
+    Path tempFile = new Path(Files.createTempFile(fileName, 
".hfile").toString());
+    // By default this value is set to 1. Here we explicitly set it to 1 for 
test purpose.
+    writeHFileWithHudi(tempFile, 1);
+
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    // Create HBase HFile.Reader from the temporary file
+    HFile.Reader reader = HFile.createReader(fs, new 
Path(tempFile.toString()), conf);
+    byte[] keyValueVersion = 
reader.getHFileInfo().get(KEY_VALUE_VERSION.getBytes());
+    Assertions.assertEquals(1, IOUtils.readInt(keyValueVersion, 0));
+    // Values from trailer still works.
+    Assertions.assertEquals(5, reader.getEntries());
+    // Scanning the file succeeds.
+    HFileScanner scanner = reader.getScanner(true, true);
+    scanner.seekTo();
+    Assertions.assertDoesNotThrow(() -> {
+      int i = 0;
+      do {
+        Cell cell = scanner.getCell();
+        byte[] key = Arrays.copyOfRange(
+            cell.getRowArray(),
+            cell.getRowOffset(),
+            cell.getRowOffset() + cell.getRowLength());
+        Assertions.assertArrayEquals(TEST_RECORDS.get(i).key.getBytes(), key);
+        i++;
+      } while (scanner.next());
+    });
+  }
+
+  static boolean isPrefix(byte[] prefix, byte[] array) {
+    if (prefix.length > array.length) {
+      return false; // can't be prefix if longer
+    }
+    for (int i = 0; i < prefix.length; i++) {
+      if (prefix[i] != array[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static HFileReader createHFileReaderFromResource(String fileName) throws 
IOException {
+    // Read HFile data from resources
+    byte[] hfileData = readHFileFromResources(fileName);
+    // Convert to ByteBuffer
+    ByteBuffer buffer = ByteBuffer.wrap(hfileData);
+    // Create SeekableDataInputStream
+    SeekableDataInputStream inputStream = new ByteArraySeekableDataInputStream(
+        new ByteBufferBackedInputStream(buffer)
+    );
+    // Create and return HFileReaderImpl
+    return new HFileReaderImpl(inputStream, hfileData.length);
+  }
+
+  static HFile.Reader createHBaseHFileReaderFromResource(String fileName) 
throws IOException {
+    // Read HFile data from resources
+    byte[] hfileData = readHFileFromResources(fileName);
+    // Create a temporary file to write the HFile data
+    Path tempFile = new Path(Files.createTempFile("hbase_hfile_", 
".hfile").toString());
+    try {
+      // Write the byte array to temporary file
+      Files.write(Paths.get(tempFile.toString()), hfileData);
+      // Create Hadoop Configuration and FileSystem
+      Configuration conf = new Configuration();
+      FileSystem fs = FileSystem.get(conf);
+      // Create HBase HFile.Reader from the temporary file
+      HFile.Reader reader = HFile.createReader(fs, new 
Path(tempFile.toString()), conf);
+      // Note: The temporary file will be cleaned up when the reader is closed
+      // or you can manually delete it after use
+      return reader;
+    } catch (IOException e) {
+      // Clean up temp file if creation fails
+      Files.deleteIfExists(Paths.get(tempFile.toString()));
+      throw e;
+    }
+  }
+
+  private static byte[] readHFileFromResources(String filename) throws 
IOException {
+    long size = 
Objects.requireNonNull(TestHFileCompatibility.class.getResource(filename))
+        .openConnection().getContentLength();
+    return readAsByteArray(
+        TestHFileReader.class.getResourceAsStream(filename), (int) size);
+  }
+
+  /**
+   * The following are the functions used to generate hfile used in the tests.
+   */
+  void testWriteHFiles() throws IOException, URISyntaxException {
+    String hbaseFile = 
Paths.get("src/test/resources/hfile/hbase-generated.hfile").toAbsolutePath().toString();
+    String hudiFile = 
Paths.get("src/test/resources/hfile/hudi-generated.hfile").toAbsolutePath().toString();
+    writeHFileWithHbase(new Path(hbaseFile));
+    writeHFileWithHudi(new Path(hudiFile));
+  }
+
+  private void writeHFileWithHbase(Path filePath) throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+
+    // Create HFile context with appropriate settings
+    HFileContext context = new HFileContextBuilder()
+        .withBlockSize(64 * 1024)
+        .withCompression(Compression.Algorithm.NONE)
+        .withCellComparator(CellComparatorImpl.COMPARATOR)
+        .withIncludesMvcc(true)
+        .build();
+
+    // Create HBase HFile writer
+    try (HFile.Writer writer = HFile.getWriterFactory(conf, new 
CacheConfig(conf))
+        .withPath(fs, filePath).withFileContext(context).create()) {
+      // Write test records as HBase KeyValue objects
+      for (TestRecord record : TEST_RECORDS) {
+        KeyValue kv = new KeyValue(
+            Bytes.toBytes(record.key),           // row
+            new byte[0],                         // family
+            new byte[0],                         // qualifier
+            0L,                                  // timestamp
+            Bytes.toBytes(record.value)          // value
+        );
+        writer.append(kv);
+      }
+    }
+  }
+
+  private void writeHFileWithHudi(Path filePath) throws IOException {
+    writeHFileWithHudi(filePath, 1);
+  }
+
+  private void writeHFileWithHudi(Path filePath, int keyValueVersion) throws 
IOException {
+    org.apache.hudi.io.hfile.HFileContext context = 
org.apache.hudi.io.hfile.HFileContext.builder()
+        .blockSize(64 * 1024)
+        .build();
+    try (DataOutputStream outputStream = new DataOutputStream(
+        Files.newOutputStream(Paths.get(filePath.toString())));
+         HFileWriter writer = new HFileWriterImpl(context, outputStream)) {
+      for (TestRecord record : TEST_RECORDS) {
+        writer.append(record.key, record.value.getBytes("UTF-8"));
+      }
+      writer.appendMetaInfo("bloom_filter", "random_string".getBytes());
+      // To validate if a specific KEY_VALUE_VERSION value should be set.
+      if (keyValueVersion != 1) {
+        writer.appendFileInfo(
+            new String(KEY_VALUE_VERSION.getBytes(), StandardCharsets.UTF_8), 
toBytes(keyValueVersion));
+      }
+    }
+  }
+
+  // Simple test record class
+  private static class TestRecord {
+    final String key;
+    final String value;
+    
+    TestRecord(String key, String value) {
+      this.key = key;
+      this.value = value;
+    }
+    
+    @Override
+    public String toString() {
+      return "TestRecord{key='" + key + "', value='" + value + "'}";
+    }
+  }
+}
diff --git 
a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReadCompatibility.java
 
b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReadCompatibility.java
new file mode 100644
index 000000000000..45f1dab9c931
--- /dev/null
+++ 
b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReadCompatibility.java
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
+import org.apache.hudi.io.ByteArraySeekableDataInputStream;
+import org.apache.hudi.io.SeekableDataInputStream;
+import org.apache.hudi.io.util.IOUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparatorImpl;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import static org.apache.hudi.common.util.FileIOUtils.readAsByteArray;
+import static org.apache.hudi.io.hfile.HFileInfo.KEY_VALUE_VERSION;
+import static org.apache.hudi.io.util.IOUtils.readInt;
+import static org.apache.hudi.io.util.IOUtils.toBytes;
+
+class TestHFileReadCompatibility {
+  // Test data - simple key-value pairs
+  private static final List<TestRecord> TEST_RECORDS = Arrays.asList(
+      new TestRecord("row1", "value1"),
+      new TestRecord("row2", "value2"),
+      new TestRecord("row3", "value3"),
+      new TestRecord("row4", "value4"),
+      new TestRecord("row5", "value5")
+  );
+
+  @ParameterizedTest
+  @CsvSource({
+      "/hfile/hudi-generated.hfile,/hfile/hbase-generated.hfile",
+      "/hfile/hbase-generated.hfile,/hfile/hudi-generated.hfile"
+  })
+  void testHFileReadCompatibility(String hudiFilePath, String hbaseFilePath) 
throws Exception {
+    try (HFileReader hudiReader = createHFileReaderFromResource(hudiFilePath);
+        org.apache.hadoop.hbase.io.hfile.HFile.Reader hbaseReader =
+            createHBaseHFileReaderFromResource(hbaseFilePath)) {
+      // Validate number of entries.
+      Assertions.assertEquals(5, hudiReader.getNumKeyValueEntries());
+      Assertions.assertEquals(5, hbaseReader.getEntries());
+      // Validate data block content.
+      hudiReader.seekTo();
+      HFileScanner scanner = hbaseReader.getScanner(true, true);
+      scanner.seekTo();
+      int i = 0;
+      do {
+        org.apache.hudi.io.hfile.KeyValue keyValue = 
hudiReader.getKeyValue().get();
+        Cell cell = scanner.getCell();
+        // Ensure Hudi record is correct.
+        Assertions.assertEquals(TEST_RECORDS.get(i).key, 
keyValue.getKey().getContentInString());
+        byte[] value = Arrays.copyOfRange(
+            keyValue.getBytes(),
+            keyValue.getValueOffset(),
+            keyValue.getValueOffset() + keyValue.getValueLength());
+        Assertions.assertArrayEquals(value, 
TEST_RECORDS.get(i).value.getBytes());
+        // Ensure Hbase record is correct.
+        byte[] key = Arrays.copyOfRange(
+            cell.getRowArray(),
+            cell.getRowOffset(),
+            cell.getRowOffset() + cell.getRowLength());
+        Assertions.assertArrayEquals(TEST_RECORDS.get(i).key.getBytes(), key);
+        value = Arrays.copyOfRange(
+            cell.getValueArray(),
+            cell.getValueOffset(),
+            cell.getValueOffset() + cell.getValueLength());
+        Assertions.assertArrayEquals(value, 
TEST_RECORDS.get(i).value.getBytes());
+        i++;
+      } while (hudiReader.next() && scanner.next());
+
+      // Compare some meta information.
+      // LAST KEY.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.LAST_KEY.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.LAST_KEY).isPresent());
+      // The last key value returned from hbase contains the extra fields,
+      // e.g., column family, column qualifier, timestamp, key type, which is 
10 more bytes.
+      // Therefore, the last key value from hudi should be the prefix since 
hudi does not use these
+      // extra fields.
+      if (hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get().length
+          < 
hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes()).length) {
+        Assertions.assertTrue(isPrefix(
+            hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get(),
+            hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes())));
+      } else {
+        Assertions.assertTrue(isPrefix(
+            hbaseReader.getHFileInfo().get(HFileInfo.LAST_KEY.getBytes()),
+            hudiReader.getMetaInfo(HFileInfo.LAST_KEY).get()));
+      }
+      // Average key length.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.AVG_KEY_LEN.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.AVG_KEY_LEN).isPresent());
+      // Average value length.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().containsKey(HFileInfo.AVG_VALUE_LEN.getBytes()));
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.AVG_VALUE_LEN).isPresent());
+      Assertions.assertTrue(
+          hbaseReader.getHFileInfo().getAvgValueLen()
+              >= 
readInt(hudiReader.getMetaInfo(HFileInfo.AVG_VALUE_LEN).get(), 0));
+      // MVCC.
+      
Assertions.assertTrue(hbaseReader.getHFileInfo().shouldIncludeMemStoreTS());
+      // Note that MemStoreTS is not set.
+      Assertions.assertFalse(hbaseReader.getHFileInfo().isDecodeMemstoreTS());
+      
Assertions.assertTrue(hudiReader.getMetaInfo(KEY_VALUE_VERSION).isPresent());
+      
Assertions.assertTrue(hudiReader.getMetaInfo(HFileInfo.MAX_MVCC_TS_KEY).isPresent());
+      Assertions.assertEquals(0L,
+          IOUtils.readLong(
+          hudiReader.getMetaInfo(HFileInfo.MAX_MVCC_TS_KEY).get(), 0));
+    }
+  }
+
+  @Test
+  void testHbaseReaderSucceedsWhenKeyValueVersionIsSetTo1() throws IOException 
{
+    String fileName = "hudi-generated-for-keyvalue-versions";
+    Path tempFile = new Path(Files.createTempFile(fileName, 
".hfile").toString());
+    // By default this value is set to 1. Here we explicitly set it to 1 for 
test purpose.
+    writeHFileWithHudi(tempFile, 1);
+
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    // Create HBase HFile.Reader from the temporary file
+    HFile.Reader reader = HFile.createReader(fs, new 
Path(tempFile.toString()), conf);
+    byte[] keyValueVersion = 
reader.getHFileInfo().get(KEY_VALUE_VERSION.getBytes());
+    Assertions.assertEquals(1, IOUtils.readInt(keyValueVersion, 0));
+    // Values from trailer still works.
+    Assertions.assertEquals(5, reader.getEntries());
+    // Scanning the file succeeds.
+    HFileScanner scanner = reader.getScanner(true, true);
+    scanner.seekTo();
+    Assertions.assertDoesNotThrow(() -> {
+      int i = 0;
+      do {
+        Cell cell = scanner.getCell();
+        byte[] key = Arrays.copyOfRange(
+            cell.getRowArray(),
+            cell.getRowOffset(),
+            cell.getRowOffset() + cell.getRowLength());
+        Assertions.assertArrayEquals(TEST_RECORDS.get(i).key.getBytes(), key);
+        i++;
+      } while (scanner.next());
+    });
+  }
+
+  static boolean isPrefix(byte[] prefix, byte[] array) {
+    if (prefix.length > array.length) {
+      return false; // can't be prefix if longer
+    }
+    for (int i = 0; i < prefix.length; i++) {
+      if (prefix[i] != array[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static HFileReader createHFileReaderFromResource(String fileName) throws 
IOException {
+    // Read HFile data from resources
+    byte[] hfileData = readHFileFromResources(fileName);
+    // Convert to ByteBuffer
+    ByteBuffer buffer = ByteBuffer.wrap(hfileData);
+    // Create SeekableDataInputStream
+    SeekableDataInputStream inputStream = new ByteArraySeekableDataInputStream(
+        new ByteBufferBackedInputStream(buffer)
+    );
+    // Create and return HFileReaderImpl
+    return new HFileReaderImpl(inputStream, hfileData.length);
+  }
+
+  static HFile.Reader createHBaseHFileReaderFromResource(String fileName) 
throws IOException {
+    // Read HFile data from resources
+    byte[] hfileData = readHFileFromResources(fileName);
+    // Create a temporary file to write the HFile data
+    Path tempFile = new Path(Files.createTempFile("hbase_hfile_", 
".hfile").toString());
+    try {
+      // Write the byte array to temporary file
+      Files.write(Paths.get(tempFile.toString()), hfileData);
+      // Create Hadoop Configuration and FileSystem
+      Configuration conf = new Configuration();
+      FileSystem fs = FileSystem.get(conf);
+      // Create HBase HFile.Reader from the temporary file
+      HFile.Reader reader = HFile.createReader(fs, new 
Path(tempFile.toString()), conf);
+      // Note: The temporary file will be cleaned up when the reader is closed
+      // or you can manually delete it after use
+      return reader;
+    } catch (IOException e) {
+      // Clean up temp file if creation fails
+      Files.deleteIfExists(Paths.get(tempFile.toString()));
+      throw e;
+    }
+  }
+
+  private static byte[] readHFileFromResources(String filename) throws 
IOException {
+    long size = 
Objects.requireNonNull(TestHFileReadCompatibility.class.getResource(filename))
+        .openConnection().getContentLength();
+    return readAsByteArray(
+        TestHFileReader.class.getResourceAsStream(filename), (int) size);
+  }
+
+  /**
+   * The following are the functions used to generate hfile used in the tests.
+   */
+  void testWriteHFiles() throws IOException, URISyntaxException {
+    String hbaseFile = 
Paths.get("src/test/resources/hfile/hbase-generated.hfile").toAbsolutePath().toString();
+    String hudiFile = 
Paths.get("src/test/resources/hfile/hudi-generated.hfile").toAbsolutePath().toString();
+    writeHFileWithHbase(new Path(hbaseFile));
+    writeHFileWithHudi(new Path(hudiFile));
+  }
+
+  private void writeHFileWithHbase(Path filePath) throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+
+    // Create HFile context with appropriate settings
+    HFileContext context = new HFileContextBuilder()
+        .withBlockSize(64 * 1024)
+        .withCompression(Compression.Algorithm.NONE)
+        .withCellComparator(CellComparatorImpl.COMPARATOR)
+        .withIncludesMvcc(true)
+        .build();
+
+    // Create HBase HFile writer
+    try (HFile.Writer writer = HFile.getWriterFactory(conf, new 
CacheConfig(conf))
+        .withPath(fs, filePath).withFileContext(context).create()) {
+      // Write test records as HBase KeyValue objects
+      for (TestRecord record : TEST_RECORDS) {
+        KeyValue kv = new KeyValue(
+            Bytes.toBytes(record.key),           // row
+            new byte[0],                         // family
+            new byte[0],                         // qualifier
+            0L,                                  // timestamp
+            Bytes.toBytes(record.value)          // value
+        );
+        writer.append(kv);
+      }
+    }
+  }
+
+  private void writeHFileWithHudi(Path filePath) throws IOException {
+    writeHFileWithHudi(filePath, 1);
+  }
+
+  private void writeHFileWithHudi(Path filePath, int keyValueVersion) throws 
IOException {
+    org.apache.hudi.io.hfile.HFileContext context = 
org.apache.hudi.io.hfile.HFileContext.builder()
+        .blockSize(64 * 1024)
+        .build();
+    try (DataOutputStream outputStream = new DataOutputStream(
+        Files.newOutputStream(Paths.get(filePath.toString())));
+         HFileWriter writer = new HFileWriterImpl(context, outputStream)) {
+      for (TestRecord record : TEST_RECORDS) {
+        writer.append(record.key, record.value.getBytes("UTF-8"));
+      }
+      writer.appendMetaInfo("bloom_filter", "random_string".getBytes());
+      // To validate if a specific KEY_VALUE_VERSION value should be set.
+      if (keyValueVersion != 1) {
+        writer.appendFileInfo(
+            new String(KEY_VALUE_VERSION.getBytes(), StandardCharsets.UTF_8), 
toBytes(keyValueVersion));
+      }
+    }
+  }
+
+  // Simple test record class
+  private static class TestRecord {
+    final String key;
+    final String value;
+    
+    TestRecord(String key, String value) {
+      this.key = key;
+      this.value = value;
+    }
+    
+    @Override
+    public String toString() {
+      return "TestRecord{key='" + key + "', value='" + value + "'}";
+    }
+  }
+}
diff --git 
a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java 
b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java
index fe9cf62dfbb0..da87dddf1ae1 100644
--- a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java
+++ b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java
@@ -41,6 +41,9 @@ import java.util.Arrays;
 
 import static org.apache.hudi.io.hfile.HFileBlockType.DATA;
 import static org.apache.hudi.io.hfile.HFileBlockType.TRAILER;
+import static org.apache.hudi.io.hfile.HFileInfo.AVG_KEY_LEN;
+import static org.apache.hudi.io.hfile.HFileInfo.AVG_VALUE_LEN;
+import static org.apache.hudi.io.hfile.HFileInfo.KEY_VALUE_VERSION;
 import static org.apache.hudi.io.hfile.HFileInfo.LAST_KEY;
 import static org.apache.hudi.io.hfile.HFileInfo.MAX_MVCC_TS_KEY;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -72,7 +75,7 @@ class TestHFileWriter {
   @Test
   void testSameKeyLocation() throws IOException {
     // 50 bytes for data part limit.
-    HFileContext context = new HFileContext.Builder().blockSize(50).build();
+    HFileContext context = new HFileContext.Builder().blockSize(100).build();
     String testFile = TEST_FILE;
     try (DataOutputStream outputStream =
              new DataOutputStream(Files.newOutputStream(Paths.get(testFile)));
@@ -98,7 +101,7 @@ class TestHFileWriter {
       reader.initializeMetadata();
       assertEquals(20, reader.getNumKeyValueEntries());
       HFileTrailer trailer = reader.getTrailer();
-      assertEquals(6, trailer.getDataIndexCount());
+      assertEquals(4, trailer.getDataIndexCount());
       int i = 0;
       for (Key key : reader.getDataBlockIndexMap().keySet()) {
         assertArrayEquals(
@@ -107,7 +110,7 @@ class TestHFileWriter {
         if (i == 0) {
           i++;
         } else {
-          i += 2;
+          i += 4;
         }
       }
     }
@@ -116,7 +119,7 @@ class TestHFileWriter {
   @Test
   void testUniqueKeyLocation() throws IOException {
     // 50 bytes for data part limit.
-    HFileContext context = new HFileContext.Builder().blockSize(50).build();
+    HFileContext context = new HFileContext.Builder().blockSize(100).build();
     String testFile = TEST_FILE;
     try (DataOutputStream outputStream =
              new DataOutputStream(Files.newOutputStream(Paths.get(testFile)));
@@ -138,7 +141,7 @@ class TestHFileWriter {
       reader.initializeMetadata();
       assertEquals(50, reader.getNumKeyValueEntries());
       HFileTrailer trailer = reader.getTrailer();
-      assertEquals(25, trailer.getDataIndexCount());
+      assertEquals(13, trailer.getDataIndexCount());
       reader.seekTo();
       for (int i = 0; i < 50; i++) {
         KeyValue kv = reader.getKeyValue().get();
@@ -171,7 +174,7 @@ class TestHFileWriter {
   private static void validateHFileSize() throws IOException {
     Path path = Paths.get(TEST_FILE);
     long actualSize = Files.size(path);
-    long expectedSize = 4366L;
+    long expectedSize = 4521;
     assertEquals(expectedSize, actualSize);
   }
 
@@ -193,7 +196,11 @@ class TestHFileWriter {
       reader.initializeMetadata();
       assertEquals(3, reader.getNumKeyValueEntries());
       assertTrue(reader.getMetaInfo(LAST_KEY).isPresent());
-      assertEquals(1, reader.getMetaInfo(MAX_MVCC_TS_KEY).get().length);
+      assertEquals(4, reader.getMetaInfo(AVG_KEY_LEN).get().length);
+      assertEquals(4, reader.getMetaInfo(AVG_VALUE_LEN).get().length);
+      assertEquals(8, reader.getMetaInfo(MAX_MVCC_TS_KEY).get().length);
+      assertEquals(1,
+          
ByteBuffer.wrap(reader.getMetaInfo(KEY_VALUE_VERSION).get()).getInt());
     }
   }
 
@@ -244,7 +251,7 @@ class TestHFileWriter {
 
     byte[] key = new byte[keyLen];
     buf.get(key);
-    byte[] keyContent = Arrays.copyOfRange(key, 2, key.length);
+    byte[] keyContent = Arrays.copyOfRange(key, 2, key.length - 10);
     assertArrayEquals(expectedKey.getBytes(StandardCharsets.UTF_8), 
keyContent);
 
     byte[] value = new byte[valLen];
diff --git a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHfileBlock.java 
b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHfileBlock.java
new file mode 100644
index 000000000000..93f90141b155
--- /dev/null
+++ b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHfileBlock.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+class TestHfileBlock {
+  @Test
+  void testNumChecksumChunksZeroBytes() {
+    Assertions.assertEquals(0, HFileBlock.numChecksumChunks(0L, 512));
+  }
+
+  @Test
+  void testNumChecksumChunksZeroBytesPerChecksum() {
+    Assertions.assertEquals(0, HFileBlock.numChecksumChunks(100L, 0));
+  }
+
+  @Test
+  void testNumChecksumChunksExactDivision() {
+    Assertions.assertEquals(2, HFileBlock.numChecksumChunks(1024L, 512));
+  }
+
+  @Test
+  void testNumChecksumChunksWithRemainder() {
+    Assertions.assertEquals(3, HFileBlock.numChecksumChunks(1200L, 512));
+  }
+
+  @Test
+  void testNumChecksumChunksSingleChunk() {
+    Assertions.assertEquals(1, HFileBlock.numChecksumChunks(200L, 512));
+  }
+
+  @Test
+  void testNumChecksumChunksOverflowThrows() {
+    long numBytes = ((long) Integer.MAX_VALUE / HFileBlock.CHECKSUM_SIZE + 1)
+        * 1024; // force too many chunks
+    assertThrows(IllegalArgumentException.class,
+        () -> HFileBlock.numChecksumChunks(numBytes, 1024));
+  }
+}
diff --git 
a/hudi-io/src/test/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java 
b/hudi-io/src/test/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
new file mode 100644
index 000000000000..c6d66fb10e02
--- /dev/null
+++ 
b/hudi-io/src/test/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hadoop.hbase.CellComparatorImpl;
+
+/**
+ * This class is used by HBase HFile reader to read HFiles written by Hudi
+ * along with this comparator class
+ */
+public class HoodieHBaseKVComparator extends CellComparatorImpl {
+}
diff --git a/hudi-io/src/test/resources/hfile/hbase-generated.hfile 
b/hudi-io/src/test/resources/hfile/hbase-generated.hfile
new file mode 100644
index 000000000000..b1ea85fa3648
Binary files /dev/null and 
b/hudi-io/src/test/resources/hfile/hbase-generated.hfile differ
diff --git a/hudi-io/src/test/resources/hfile/hudi-generated.hfile 
b/hudi-io/src/test/resources/hfile/hudi-generated.hfile
new file mode 100644
index 000000000000..27d5f9af1345
Binary files /dev/null and 
b/hudi-io/src/test/resources/hfile/hudi-generated.hfile differ
diff --git a/pom.xml b/pom.xml
index 6fc24473c8a1..984bff9dd673 100644
--- a/pom.xml
+++ b/pom.xml
@@ -415,9 +415,9 @@
                     <exclude>log4j:log4j</exclude>
                     <exclude>ch.qos.logback:logback-classic</exclude>
                     <!-- NOTE: We're banning any HBase deps -->
-                    <exclude>org.apache.hbase:hbase-common:*</exclude>
-                    <exclude>org.apache.hbase:hbase-client:*</exclude>
-                    <exclude>org.apache.hbase:hbase-server:*</exclude>
+                    
<exclude>org.apache.hbase:hbase-common:*:*:compile</exclude>
+                    
<exclude>org.apache.hbase:hbase-client:*:*:compile</exclude>
+                    
<exclude>org.apache.hbase:hbase-server:*:*:compile</exclude>
                     <!--To upgrade snappy because pre 1.1.8.2 does not work on 
m1 mac-->
                     <exclude>org.xerial.snappy:snappy-java:*</exclude>
                   </excludes>

Reply via email to