linliu-code commented on code in PR #13977:
URL: https://github.com/apache/hudi/pull/13977#discussion_r2376847497
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileIndexBlock.java:
##########
@@ -43,6 +43,8 @@ protected HFileIndexBlock(HFileContext context,
public void add(byte[] firstKey, long offset, int size) {
Key key = new Key(firstKey);
entries.add(new BlockIndexEntry(key, Option.empty(), offset, size));
+ // 8 bytes for offset, 4 bytes for size.
+ longestEntrySize = Math.max(longestEntrySize, key.getContentLength() + 12);
Review Comment:
Remove the bytebuff now.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java:
##########
@@ -267,40 +267,40 @@ public ByteBuffer serialize() throws IOException {
// Compress if specified.
ByteBuffer compressedBlockData =
context.getCompressor().compress(uncompressedBlockData);
// Buffer for building block.
- ByteBuffer buf = ByteBuffer.allocate(Math.max(
- context.getBlockSize() * 2,
+ ByteArrayOutputStream buf = new ByteArrayOutputStream(Math.max(
+ context.getBlockSize(),
compressedBlockData.limit() + HFILEBLOCK_HEADER_SIZE * 2));
// Block header
// 1. Magic is always 8 bytes.
- buf.put(blockType.getMagic(), 0, 8);
+ buf.write(blockType.getMagic(), 0, 8);
// 2. onDiskSizeWithoutHeader.
int compressedDataSize = compressedBlockData.limit();
int onDiskDataSizeWithHeader = HFileBlock.HFILEBLOCK_HEADER_SIZE +
compressedDataSize;
int numChecksumBytes = numChecksumBytes(onDiskDataSizeWithHeader,
DEFAULT_BYTES_PER_CHECKSUM);
- buf.putInt(compressedDataSize + numChecksumBytes);
+ HFileUtils.writeInt(buf, compressedDataSize + numChecksumBytes);
Review Comment:
Done.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaIndexBlock.java:
##########
@@ -32,9 +32,15 @@ public static HFileMetaIndexBlock
createMetaIndexBlockToWrite(HFileContext conte
return new HFileMetaIndexBlock(context);
}
+ @Override
+ protected int calculateBufferCapacity() {
+ // Use 5 since the keyLength could use 5 bytes maximally.
+ return longestEntrySize + 5;
Review Comment:
This variable is removed.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -206,10 +207,22 @@ byte[] getLastKeyContent() {
return entriesToWrite.get(entriesToWrite.size() - 1).key;
}
+ @Override
+ protected int calculateBufferCapacity() {
+ // Key length = 4,
+ // value length = 4,
+ // key length length = 2,
+ // 10 bytes for column family, timestamp, and key type,
+ // 1 byte for MVCC.
+ // Sum is 21 bytes.
+ // So the capacity of the buffer should be: longestEntrySize + 21.
+ return longestEntrySize + 21;
+ }
+
@Override
protected ByteBuffer getUncompressedBlockDataToWrite() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
+ ByteBuffer dataBuf = ByteBuffer.allocate(calculateBufferCapacity());
Review Comment:
ByteBuffer is removed during the write. We just use it as the return type.
No need to calculate the capacity of. the buffer.
##########
hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java:
##########
@@ -74,15 +75,17 @@ void testOverflow() throws Exception {
@Test
void testSameKeyLocation() throws IOException {
- // 50 bytes for data part limit.
- HFileContext context = new HFileContext.Builder().blockSize(100).build();
+ // 1 bytes for data part limit.
+ HFileContext context = new HFileContext.Builder().blockSize(1).build();
Review Comment:
We can do that. Let me tune the parameters.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaIndexBlock.java:
##########
@@ -33,27 +34,27 @@ public static HFileMetaIndexBlock
createMetaIndexBlockToWrite(HFileContext conte
}
@Override
- public ByteBuffer getUncompressedBlockDataToWrite() {
- ByteBuffer buf = ByteBuffer.allocate(context.getBlockSize() * 2);
+ public ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
Review Comment:
Done.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java:
##########
@@ -56,12 +58,11 @@ public byte[] getFirstKey() {
}
@Override
- public ByteBuffer getUncompressedBlockDataToWrite() {
- ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
+ public ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream dataBuf = new ByteArrayOutputStream();
Review Comment:
We can.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java:
##########
@@ -92,16 +92,17 @@ public void append(String key, byte[] value) throws
IOException {
totalKeyLength += keyBytes.length;
totalValueLength += value.length;
// Records with the same key must be put into the same block.
- // Here 9 = 4 bytes of key length + 4 bytes of value length + 1 byte MVCC.
+ // Here 9 = 4 bytes of key length + 4 bytes of value length + 1 byte MVCC
+ // + 10 bytes (check sum, timestamp, key type).
if (!Arrays.equals(currentDataBlock.getLastKeyContent(), keyBytes)
- && uncompressedDataBlockBytes + keyBytes.length + value.length + 9 >
blockSize) {
+ && uncompressedDataBlockBytes + keyBytes.length + value.length + 19 >
blockSize) {
flushCurrentDataBlock();
uncompressedDataBlockBytes = 0;
}
currentDataBlock.add(keyBytes, value);
int uncompressedKeyValueSize = keyBytes.length + value.length;
- uncompressedDataBlockBytes += uncompressedKeyValueSize + 9;
- totalUncompressedDataBlockBytes += uncompressedKeyValueSize + 9;
+ uncompressedDataBlockBytes += uncompressedKeyValueSize + 19;
Review Comment:
Done.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java:
##########
@@ -92,16 +92,17 @@ public void append(String key, byte[] value) throws
IOException {
totalKeyLength += keyBytes.length;
totalValueLength += value.length;
// Records with the same key must be put into the same block.
- // Here 9 = 4 bytes of key length + 4 bytes of value length + 1 byte MVCC.
+ // Here 9 = 4 bytes of key length + 4 bytes of value length + 1 byte MVCC
Review Comment:
Done.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -207,40 +208,35 @@ byte[] getLastKeyContent() {
}
@Override
- protected ByteBuffer getUncompressedBlockDataToWrite() {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
+ protected ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream dataBuf = new ByteArrayOutputStream();
Review Comment:
done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]