yihua commented on code in PR #12866:
URL: https://github.com/apache/hudi/pull/12866#discussion_r2105508903


##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -149,4 +155,74 @@ public boolean next(HFileCursor cursor, int 
blockStartOffsetInFile) {
   private boolean isAtFirstKey(int relativeOffset) {
     return relativeOffset == HFILEBLOCK_HEADER_SIZE;
   }
+
+  // ================ Below are for Write ================
+  protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+  public HFileDataBlock(HFileContext context) {
+    this(context,-1L);
+  }
+
+  public HFileDataBlock(HFileContext context, long previousBlockOffset) {
+    super(context, HFileBlockType.DATA, previousBlockOffset);
+    // This is not used for write.
+    uncompressedContentEndRelativeOffset = -1;
+  }
+
+  public List<KeyValueEntry> getEntries() {
+    return entries;
+  }
+
+  public boolean isEmpty() {
+    return entries.isEmpty();
+  }
+
+  public void add(byte[] key, byte[] value) {
+    KeyValueEntry kv = new KeyValueEntry(key, value);
+    // Assume all entries are sorted before write.
+    add(kv, false);
+  }
+
+  public int getNumOfEntries() {
+    return entries.size();
+  }
+
+  protected void add(KeyValueEntry kv, boolean sorted) {
+    entries.add(kv);
+    if (sorted) {
+      entries.sort(KeyValueEntry::compareTo);
+    }
+  }
+
+  public byte[] getFirstKey() {
+    return entries.get(0).key;
+  }
+
+  public byte[] getLastKeyContent() {
+    if (entries.isEmpty()) {
+      return new byte[0];
+    }
+    return entries.get(entries.size() - 1).key;
+  }
+
+  @Override
+  public ByteBuffer getPayload() {
+    ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize() * 2);
+    for (KeyValueEntry kv : entries) {
+      // Length of key + length of a short variable indicating length of key;
+      dataBuf.putInt(kv.key.length + SIZEOF_INT16);
+      // Length of value;
+      dataBuf.putInt(kv.value.length);
+      // Key content length;
+      dataBuf.putShort((short)kv.key.length);

Review Comment:
   Avoid using member variables directly?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -61,4 +69,39 @@ public HFileInfo readFileInfo() throws IOException {
     }
     return new HFileInfo(fileInfoMap);
   }
+
+  // ================ Below are for Write ================
+
+  public HFileFileInfoBlock(HFileContext context) {
+    super(context, HFileBlockType.FILE_INFO, -1L);
+  }

Review Comment:
   Similar comment on moving and wrapping the constructor



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -36,6 +40,8 @@ public class HFileFileInfoBlock extends HFileBlock {
   // Magic we put ahead of a serialized protobuf message
   public static final byte[] PB_MAGIC = new byte[] {'P', 'B', 'U', 'F'};
 
+  private final Map<String, byte[]> entries = new HashMap<>();

Review Comment:
   ```suggestion
     private final Map<String, byte[]> fileInfoToWrite = new HashMap<>();
   ```



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java:
##########
@@ -149,4 +155,74 @@ public boolean next(HFileCursor cursor, int 
blockStartOffsetInFile) {
   private boolean isAtFirstKey(int relativeOffset) {
     return relativeOffset == HFILEBLOCK_HEADER_SIZE;
   }
+
+  // ================ Below are for Write ================
+  protected final List<KeyValueEntry> entries = new ArrayList<>();
+
+  public HFileDataBlock(HFileContext context) {
+    this(context,-1L);
+  }
+
+  public HFileDataBlock(HFileContext context, long previousBlockOffset) {
+    super(context, HFileBlockType.DATA, previousBlockOffset);
+    // This is not used for write.
+    uncompressedContentEndRelativeOffset = -1;
+  }
+
+  public List<KeyValueEntry> getEntries() {
+    return entries;
+  }
+
+  public boolean isEmpty() {
+    return entries.isEmpty();
+  }
+
+  public void add(byte[] key, byte[] value) {
+    KeyValueEntry kv = new KeyValueEntry(key, value);
+    // Assume all entries are sorted before write.
+    add(kv, false);
+  }
+
+  public int getNumOfEntries() {
+    return entries.size();
+  }
+
+  protected void add(KeyValueEntry kv, boolean sorted) {
+    entries.add(kv);
+    if (sorted) {
+      entries.sort(KeyValueEntry::compareTo);
+    }
+  }
+
+  public byte[] getFirstKey() {
+    return entries.get(0).key;
+  }
+
+  public byte[] getLastKeyContent() {
+    if (entries.isEmpty()) {
+      return new byte[0];
+    }
+    return entries.get(entries.size() - 1).key;
+  }
+
+  @Override
+  public ByteBuffer getPayload() {
+    ByteBuffer dataBuf = ByteBuffer.allocate(context.getBlockSize() * 2);
+    for (KeyValueEntry kv : entries) {
+      // Length of key + length of a short variable indicating length of key;
+      dataBuf.putInt(kv.key.length + SIZEOF_INT16);
+      // Length of value;
+      dataBuf.putInt(kv.value.length);
+      // Key content length;
+      dataBuf.putShort((short)kv.key.length);

Review Comment:
   Should the value be unsigned?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -61,4 +69,39 @@ public HFileInfo readFileInfo() throws IOException {
     }
     return new HFileInfo(fileInfoMap);
   }
+
+  // ================ Below are for Write ================
+
+  public HFileFileInfoBlock(HFileContext context) {
+    super(context, HFileBlockType.FILE_INFO, -1L);
+  }
+
+  public void add(String name, byte[] value) {
+    entries.put(name, value);
+  }
+
+  @Override
+  public ByteBuffer getPayload() {
+    ByteBuffer buff = ByteBuffer.allocate(context.getBlockSize() * 2);

Review Comment:
   The block size should only control data block size correct?



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -61,4 +69,39 @@ public HFileInfo readFileInfo() throws IOException {
     }
     return new HFileInfo(fileInfoMap);
   }
+
+  // ================ Below are for Write ================
+
+  public HFileFileInfoBlock(HFileContext context) {
+    super(context, HFileBlockType.FILE_INFO, -1L);
+  }
+
+  public void add(String name, byte[] value) {
+    entries.put(name, value);
+  }
+
+  @Override
+  public ByteBuffer getPayload() {
+    ByteBuffer buff = ByteBuffer.allocate(context.getBlockSize() * 2);
+    HFileProtos.InfoProto.Builder builder =
+        HFileProtos.InfoProto.newBuilder();
+    for (Map.Entry<String, byte[]> e : entries.entrySet()) {
+      HFileProtos.BytesBytesPair bbp = HFileProtos.BytesBytesPair
+          .newBuilder()
+          .setFirst(ByteString.copyFrom(getUTF8Bytes(e.getKey())))
+          .setSecond(ByteString.copyFrom(e.getValue()))

Review Comment:
   Could we call `wrap` instead of `copy`?
   ```suggestion
             .setFirst(ByteString.wrap(getUTF8Bytes(e.getKey())))
             .setSecond(ByteString.wrap(e.getValue()))
   ```



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -61,4 +69,39 @@ public HFileInfo readFileInfo() throws IOException {
     }
     return new HFileInfo(fileInfoMap);
   }
+
+  // ================ Below are for Write ================
+
+  public HFileFileInfoBlock(HFileContext context) {
+    super(context, HFileBlockType.FILE_INFO, -1L);
+  }
+
+  public void add(String name, byte[] value) {
+    entries.put(name, value);
+  }
+
+  @Override
+  public ByteBuffer getPayload() {
+    ByteBuffer buff = ByteBuffer.allocate(context.getBlockSize() * 2);
+    HFileProtos.InfoProto.Builder builder =
+        HFileProtos.InfoProto.newBuilder();
+    for (Map.Entry<String, byte[]> e : entries.entrySet()) {
+      HFileProtos.BytesBytesPair bbp = HFileProtos.BytesBytesPair
+          .newBuilder()
+          .setFirst(ByteString.copyFrom(getUTF8Bytes(e.getKey())))
+          .setSecond(ByteString.copyFrom(e.getValue()))
+          .build();
+      builder.addMapEntry(bbp);
+    }
+    buff.put(PB_MAGIC);
+    byte[] payload = builder.build().toByteArray();
+    try {
+      buff.put(getVariableLengthEncodes(payload.length));

Review Comment:
   Is this supposed to be `varint` or `uint32`? 



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java:
##########
@@ -61,4 +69,39 @@ public HFileInfo readFileInfo() throws IOException {
     }
     return new HFileInfo(fileInfoMap);
   }
+
+  // ================ Below are for Write ================
+
+  public HFileFileInfoBlock(HFileContext context) {
+    super(context, HFileBlockType.FILE_INFO, -1L);
+  }
+
+  public void add(String name, byte[] value) {
+    entries.put(name, value);

Review Comment:
   Have you checked all file info keys are covered?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to