yihua commented on code in PR #12866:
URL: https://github.com/apache/hudi/pull/12866#discussion_r2105531469
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java:
##########
@@ -34,6 +36,44 @@ protected HFileMetaBlock(HFileContext context,
public ByteBuffer readContent() {
return ByteBuffer.wrap(
getByteBuff(),
- startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
uncompressedSizeWithoutHeader);
+ readAttributesOpt.get().startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
+ readAttributesOpt.get().uncompressedSizeWithoutHeader);
+ }
+
+ // ================ Below are for Write ================
+
+ protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+ public HFileMetaBlock(HFileContext context) {
+ super(context, HFileBlockType.META, -1L);
+ }
Review Comment:
Make the meta block take the single entry upon instantiation? There is no
need to have a list or do `add`.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileIndexBlock.java:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.common.util.Option;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16;
+
+public class HFileIndexBlock extends HFileBlock {
+ protected final List<BlockIndexEntry> entries = new ArrayList<>();
+ protected long payloadSize = -1L;
+
+ public HFileIndexBlock(HFileContext context,
+ HFileBlockType blockType,
+ byte[] byteBuff,
+ int startOffsetInBuff) {
+ super(context, blockType, byteBuff, startOffsetInBuff);
+ }
+
+ public HFileIndexBlock(HFileContext context,
+ HFileBlockType blockType) {
+ super(context, blockType, -1L);
+ }
+
+ public void add(byte[] firstKey, long offset, int size) {
+ Key key = new Key(firstKey);
+ entries.add(new BlockIndexEntry(key, Option.empty(), offset, size));
+ }
+
+ public int getNumOfEntries() {
+ return entries.size();
+ }
+
+ public long getPayloadSize() {
+ return payloadSize;
+ }
+
+ public boolean isEmpty() {
+ return entries.isEmpty();
+ }
+
+ @Override
+ public ByteBuffer getPayload() {
+ ByteBuffer buf = ByteBuffer.allocate(context.getBlockSize() * 2);
+ for (BlockIndexEntry entry : entries) {
+ buf.putLong(entry.getOffset());
+ buf.putInt(entry.getSize());
+ // Key length + 2.
+ try {
+ byte[] keyLength = getVariableLengthEncodes(
+ entry.getFirstKey().getLength() + SIZEOF_INT16);
Review Comment:
For all the places using varint, let's update the HFile format spec
`https://github.com/apache/hudi/blob/master/hudi-io/hfile_format.md` if such
information is not already in the spec.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileIndexBlock.java:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.hfile;
+
+import org.apache.hudi.common.util.Option;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16;
+
+public class HFileIndexBlock extends HFileBlock {
+ protected final List<BlockIndexEntry> entries = new ArrayList<>();
+ protected long payloadSize = -1L;
+
+ public HFileIndexBlock(HFileContext context,
+ HFileBlockType blockType,
+ byte[] byteBuff,
+ int startOffsetInBuff) {
+ super(context, blockType, byteBuff, startOffsetInBuff);
+ }
+
+ public HFileIndexBlock(HFileContext context,
+ HFileBlockType blockType) {
+ super(context, blockType, -1L);
+ }
+
+ public void add(byte[] firstKey, long offset, int size) {
+ Key key = new Key(firstKey);
+ entries.add(new BlockIndexEntry(key, Option.empty(), offset, size));
+ }
+
+ public int getNumOfEntries() {
+ return entries.size();
+ }
+
+ public long getPayloadSize() {
+ return payloadSize;
+ }
+
+ public boolean isEmpty() {
+ return entries.isEmpty();
+ }
+
+ @Override
+ public ByteBuffer getPayload() {
Review Comment:
Should this logic be put into the `HFileRootIndexBlock` instead? It's not
shared between `HFileMetaIndexBlock` and `HFileRootIndexBlock` anyway.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java:
##########
@@ -34,6 +36,44 @@ protected HFileMetaBlock(HFileContext context,
public ByteBuffer readContent() {
return ByteBuffer.wrap(
getByteBuff(),
- startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
uncompressedSizeWithoutHeader);
+ readAttributesOpt.get().startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
+ readAttributesOpt.get().uncompressedSizeWithoutHeader);
+ }
+
+ // ================ Below are for Write ================
+
+ protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+ public HFileMetaBlock(HFileContext context) {
+ super(context, HFileBlockType.META, -1L);
+ }
+
+ public byte[] getFirstKey() {
+ return entries.get(0).key;
+ }
+
+ public void add(byte[] key, byte[] value) {
+ KeyValueEntry kv = new KeyValueEntry(key, value);
+ add(kv, false);
+ }
+
+ protected void add(KeyValueEntry kv, boolean sorted) {
+ entries.add(kv);
+ if (sorted) {
+ entries.sort(KeyValueEntry::compareTo);
+ }
+ }
+
+ @Override
+ public ByteBuffer getPayload() {
+ ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize());
+ // Rule 1: there must be only one key-value entry.
+ assert (1 == entries.size())
+ : "only 1 value is allowed in meta block";
+ // Rule 2: only value should be store in the block.
+ // The key is stored in the meta index block.
+ dataBuf.put(entries.get(0).value);
Review Comment:
If we follow the constructor change, there is no need to do this additional
validation.
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java:
##########
@@ -34,6 +36,44 @@ protected HFileMetaBlock(HFileContext context,
public ByteBuffer readContent() {
return ByteBuffer.wrap(
getByteBuff(),
- startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
uncompressedSizeWithoutHeader);
+ readAttributesOpt.get().startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
+ readAttributesOpt.get().uncompressedSizeWithoutHeader);
+ }
+
+ // ================ Below are for Write ================
+
+ protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+ public HFileMetaBlock(HFileContext context) {
+ super(context, HFileBlockType.META, -1L);
+ }
+
+ public byte[] getFirstKey() {
+ return entries.get(0).key;
+ }
+
+ public void add(byte[] key, byte[] value) {
+ KeyValueEntry kv = new KeyValueEntry(key, value);
+ add(kv, false);
+ }
+
+ protected void add(KeyValueEntry kv, boolean sorted) {
+ entries.add(kv);
+ if (sorted) {
+ entries.sort(KeyValueEntry::compareTo);
+ }
+ }
Review Comment:
No need to sort as there is one entry?
##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java:
##########
@@ -34,6 +36,44 @@ protected HFileMetaBlock(HFileContext context,
public ByteBuffer readContent() {
return ByteBuffer.wrap(
getByteBuff(),
- startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
uncompressedSizeWithoutHeader);
+ readAttributesOpt.get().startOffsetInBuff + HFILEBLOCK_HEADER_SIZE,
+ readAttributesOpt.get().uncompressedSizeWithoutHeader);
+ }
+
+ // ================ Below are for Write ================
+
+ protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+ public HFileMetaBlock(HFileContext context) {
+ super(context, HFileBlockType.META, -1L);
+ }
+
+ public byte[] getFirstKey() {
+ return entries.get(0).key;
+ }
+
+ public void add(byte[] key, byte[] value) {
+ KeyValueEntry kv = new KeyValueEntry(key, value);
+ add(kv, false);
+ }
+
+ protected void add(KeyValueEntry kv, boolean sorted) {
+ entries.add(kv);
+ if (sorted) {
+ entries.sort(KeyValueEntry::compareTo);
+ }
+ }
Review Comment:
Do meta block entries have to be sorted?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]