linliu-code commented on code in PR #13873:
URL: https://github.com/apache/hudi/pull/13873#discussion_r2335899617


##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java:
##########
@@ -31,32 +31,31 @@ public class HFileInfo {
   private static final String RESERVED_PREFIX = "hfile.";
   static final UTF8StringKey LAST_KEY =
       new UTF8StringKey(RESERVED_PREFIX + "LASTKEY");
-  private static final UTF8StringKey FILE_CREATION_TIME_TS =
+  static final UTF8StringKey FILE_CREATION_TIME_TS =
       new UTF8StringKey(RESERVED_PREFIX + "CREATE_TIME_TS");
-  private static final UTF8StringKey KEY_VALUE_VERSION =
+  static final UTF8StringKey KEY_VALUE_VERSION =
       new UTF8StringKey("KEY_VALUE_VERSION");
   static final UTF8StringKey MAX_MVCC_TS_KEY =
       new UTF8StringKey("MAX_MEMSTORE_TS_KEY");
-
-  private static final int KEY_VALUE_VERSION_WITH_MVCC_TS = 1;
+  static final UTF8StringKey AVG_KEY_LEN =
+      new UTF8StringKey(RESERVED_PREFIX + "AVG_KEY_LEN");
+  static final UTF8StringKey AVG_VALUE_LEN =
+      new UTF8StringKey(RESERVED_PREFIX + "AVG_VALUE_LEN");
+  static final int KEY_VALUE_VERSION_WITH_MVCC_TS = 1;
 
   private final Map<UTF8StringKey, byte[]> infoMap;
   private final long fileCreationTime;
   private final Option<Key> lastKey;
+  // This is set to trigger the constant MVCC 0,
+  // such that table version 6 and >=8 can have
+  // the same behavior.
   private final long maxMvccTs;
-  private final boolean containsMvccTs;
 
   public HFileInfo(Map<UTF8StringKey, byte[]> infoMap) {
     this.infoMap = infoMap;
     this.fileCreationTime = parseFileCreationTime();
     this.lastKey = parseLastKey();
     this.maxMvccTs = parseMaxMvccTs();
-    this.containsMvccTs = maxMvccTs > 0;
-    if (containsMvccTs) {
-      // The HFile written by Hudi does not contain MVCC timestamps.
-      // Parsing MVCC timestamps is not supported.
-      throw new UnsupportedOperationException("HFiles with MVCC timestamps are 
not supported");
-    }

Review Comment:
   Your right. i have removed the MVCC setting and reran the Jon's test and it 
worked. 
   So MVCC is not necessarily needed to change.



##########
hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileWriterImpl.java:
##########
@@ -219,6 +219,41 @@ private void initFileInfo() {
         new byte[]{0});
   }
 
+  protected void finishFileInfo() {
+    // Record last key.
+    fileInfoBlock.add(
+        new String(LAST_KEY.getBytes(), StandardCharsets.UTF_8),
+        addKeyLength(lastKey));
+    fileInfoBlock.setStartOffsetInBuffForWrite(currentOffset);
+
+    // Average key length.
+    int avgKeyLen = totalNumberOfRecords == 0
+        ? 0 : (int) (totalKeyLength / totalNumberOfRecords);
+    fileInfoBlock.add(
+        new String(HFileInfo.AVG_KEY_LEN.getBytes(), StandardCharsets.UTF_8),
+        toBytes(avgKeyLen));
+    fileInfoBlock.add(
+        new String(HFileInfo.FILE_CREATION_TIME_TS.getBytes(), 
StandardCharsets.UTF_8),
+        toBytes(context.getFileCreateTime()));
+
+    // Average value length.
+    int avgValueLen = totalNumberOfRecords == 0
+        ? 0 : (int) (totalValueLength / totalNumberOfRecords);
+    fileInfoBlock.add(
+        new String(HFileInfo.AVG_VALUE_LEN.getBytes(), StandardCharsets.UTF_8),
+        toBytes(avgValueLen));
+
+    // NOTE: To make MVCC usage consistent cross different table versions,
+    // we should set following properties.
+    // After table versions <= 8 are deprecated, MVCC byte can be removed from 
key-value pair.
+    fileInfoBlock.add(
+        new String(HFileInfo.KEY_VALUE_VERSION.getBytes(), 
StandardCharsets.UTF_8),
+        toBytes(KEY_VALUE_VERSION_WITH_MVCC_TS));
+    fileInfoBlock.add(
+        new String(MAX_MVCC_TS_KEY.getBytes(), StandardCharsets.UTF_8),
+        toBytes(1L));

Review Comment:
   Removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to