nsivabalan commented on code in PR #13977:
URL: https://github.com/apache/hudi/pull/13977#discussion_r2380302408
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -207,39 +209,35 @@ byte[] getLastKeyContent() {
}
@Override
- protected ByteBuffer getUncompressedBlockDataToWrite() {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
- for (KeyValueEntry kv : entriesToWrite) {
- // Length of key + length of a short variable indicating length of key.
- // Note that 10 extra bytes are required by hbase reader.
- // That is: 1 byte for column family length, 8 bytes for timestamp, 1
bytes for key type.
- dataBuf.putInt(kv.key.length + KEY_LENGTH_LENGTH + COLUMN_FAMILY_LENGTH
+ VERSION_TIMESTAMP_LENGTH + KEY_TYPE_LENGTH);
- // Length of value.
- dataBuf.putInt(kv.value.length);
- // Key content length.
- dataBuf.putShort((short)kv.key.length);
- // Key.
- dataBuf.put(kv.key);
- // Column family length: constant 0.
- dataBuf.put((byte)0);
- // Column qualifier: assume 0 bits.
- // Timestamp: using the latest.
- dataBuf.putLong(LATEST_TIMESTAMP);
- // Key type: constant Put (4) in Hudi.
- // Minimum((byte) 0), Put((byte) 4), Delete((byte) 8),
- // DeleteFamilyVersion((byte) 10), DeleteColumn((byte) 12),
- // DeleteFamily((byte) 14), Maximum((byte) 255).
- dataBuf.put((byte)4);
- // Value.
- dataBuf.put(kv.value);
- // MVCC.
- dataBuf.put((byte)0);
-
- // Copy to output stream.
- baos.write(dataBuf.array(), 0, dataBuf.position());
- // Clear the buffer.
- dataBuf.clear();
+ protected ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream baos = new
ByteArrayOutputStream(context.getBlockSize());
+ try (DataOutputStream dataBuf = new DataOutputStream(baos)) {
Review Comment:
minor.
`dataOutStream`
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaIndexBlock.java:
##########
@@ -33,27 +35,29 @@ public static HFileMetaIndexBlock
createMetaIndexBlockToWrite(HFileContext conte
}
@Override
- public ByteBuffer getUncompressedBlockDataToWrite() {
- ByteBuffer buf = ByteBuffer.allocate(context.getBlockSize() * 2);
- for (BlockIndexEntry entry : entries) {
- buf.putLong(entry.getOffset());
- buf.putInt(entry.getSize());
- // Key length.
- try {
- byte[] keyLength =
getVariableLengthEncodedBytes(entry.getFirstKey().getLength());
- buf.put(keyLength);
- } catch (IOException e) {
- throw new RuntimeException(
- "Failed to serialize number: " + entry.getFirstKey().getLength());
+ public ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream baos = new
ByteArrayOutputStream(context.getBlockSize());
+ try (DataOutputStream buf = new DataOutputStream(baos)) {
Review Comment:
lets fix the naming `buf`
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java:
##########
@@ -267,40 +268,41 @@ public ByteBuffer serialize() throws IOException {
// Compress if specified.
ByteBuffer compressedBlockData =
context.getCompressor().compress(uncompressedBlockData);
// Buffer for building block.
- ByteBuffer buf = ByteBuffer.allocate(Math.max(
- context.getBlockSize() * 2,
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(Math.max(
+ context.getBlockSize(),
compressedBlockData.limit() + HFILEBLOCK_HEADER_SIZE * 2));
+ try (DataOutputStream buf = new DataOutputStream(baos)) {
Review Comment:
minor.
`buf` -> `dataOutStream`
##########
hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java:
##########
@@ -99,18 +102,22 @@ void testSameKeyLocation() throws IOException {
new ByteArraySeekableDataInputStream(new
ByteBufferBackedInputStream(buf));
HFileReaderImpl reader = new HFileReaderImpl(inputStream,
channel.size());
reader.initializeMetadata();
- assertEquals(20, reader.getNumKeyValueEntries());
+ // Totally 110 records.
+ assertEquals(110, reader.getNumKeyValueEntries());
HFileTrailer trailer = reader.getTrailer();
- assertEquals(4, trailer.getDataIndexCount());
+ // Totally 11 blocks.
+ assertEquals(3, trailer.getDataIndexCount());
int i = 0;
- for (Key key : reader.getDataBlockIndexMap().keySet()) {
- assertArrayEquals(
- String.format("key%02d", i).getBytes(),
- key.getContentInString().getBytes());
+ for (Map.Entry<Key, BlockIndexEntry> entry :
reader.getDataBlockIndexMap().entrySet()) {
+ System.out.println(String.format("key%02d", i) + " vs " +
entry.getKey().getContentInString());
if (i == 0) {
+ // first block: 100 records * 33 bytes + 37 bytes for header and
checksum = 3337.
Review Comment:
why data validation is removed?
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileRootIndexBlock.java:
##########
@@ -100,30 +101,27 @@ public List<BlockIndexEntry> readBlockIndexEntry(int
numEntries,
}
@Override
- public ByteBuffer getUncompressedBlockDataToWrite() {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ByteBuffer buf = ByteBuffer.allocate(context.getBlockSize());
- for (BlockIndexEntry entry : entries) {
- buf.putLong(entry.getOffset());
- buf.putInt(entry.getSize());
+ public ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream baos = new
ByteArrayOutputStream(context.getBlockSize());
+ try (DataOutputStream buf = new DataOutputStream(baos)) {
Review Comment:
same here
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -87,8 +88,8 @@ public boolean containsKey(String name) {
}
@Override
- public ByteBuffer getUncompressedBlockDataToWrite() {
- ByteBuffer buff = ByteBuffer.allocate(context.getBlockSize() * 2);
+ public ByteBuffer getUncompressedBlockDataToWrite() throws IOException {
+ ByteArrayOutputStream buff = new
ByteArrayOutputStream(context.getBlockSize());
Review Comment:
minor. fix the naming
##########
hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileWriter.java:
##########
@@ -99,18 +102,22 @@ void testSameKeyLocation() throws IOException {
new ByteArraySeekableDataInputStream(new
ByteBufferBackedInputStream(buf));
HFileReaderImpl reader = new HFileReaderImpl(inputStream,
channel.size());
reader.initializeMetadata();
- assertEquals(20, reader.getNumKeyValueEntries());
+ // Totally 110 records.
+ assertEquals(110, reader.getNumKeyValueEntries());
HFileTrailer trailer = reader.getTrailer();
- assertEquals(4, trailer.getDataIndexCount());
+ // Totally 11 blocks.
+ assertEquals(3, trailer.getDataIndexCount());
int i = 0;
- for (Key key : reader.getDataBlockIndexMap().keySet()) {
- assertArrayEquals(
- String.format("key%02d", i).getBytes(),
- key.getContentInString().getBytes());
+ for (Map.Entry<Key, BlockIndexEntry> entry :
reader.getDataBlockIndexMap().entrySet()) {
+ System.out.println(String.format("key%02d", i) + " vs " +
entry.getKey().getContentInString());
Review Comment:
whats the need for printing ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]