Re: [PR] [HUDI-7692] Extract metadata record type to MetadataPartitionType enum [hudi]

via GitHub Thu, 25 Jul 2024 10:47:54 -0700


yihua commented on code in PR #11597:
URL: https://github.com/apache/hudi/pull/11597#discussion_r1691884566



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java:
##########
@@ -288,19 +277,19 @@ private HoodieMetadataPayload(String key, int type, 
Map<String, HoodieMetadataFi
   }
 
   private HoodieMetadataPayload(String key, HoodieMetadataBloomFilter 
metadataBloomFilter) {
-    this(key, METADATA_TYPE_BLOOM_FILTER, null, metadataBloomFilter, null, 
null, null);
+    this(key, MetadataPartitionType.BLOOM_FILTERS.getRecordType(), null, 
metadataBloomFilter, null, null, null);
   }
 
   private HoodieMetadataPayload(String key, HoodieMetadataColumnStats 
columnStats) {
-    this(key, METADATA_TYPE_COLUMN_STATS, null, null, columnStats, null, null);
+    this(key, MetadataPartitionType.COLUMN_STATS.getRecordType(), null, null, 
columnStats, null, null);
   }
 
   private HoodieMetadataPayload(String key, HoodieRecordIndexInfo 
recordIndexMetadata) {
-    this(key, METADATA_TYPE_RECORD_INDEX, null, null, null, 
recordIndexMetadata, null);
+    this(key, MetadataPartitionType.RECORD_INDEX.getRecordType(), null, null, 
null, recordIndexMetadata, null);
   }
 
   private HoodieMetadataPayload(String key, HoodieSecondaryIndexInfo 
secondaryIndexMetadata) {
-    this(key, METADATA_TYPE_SECONDARY_INDEX, null, null, null, null, 
secondaryIndexMetadata);
+    this(key, MetadataPartitionType.SECONDARY_INDEX.getRecordType(), null, 
null, null, null, secondaryIndexMetadata);

Review Comment:
   I think we should keep using the `MetadataPartitionType` enum instead of the 
type number.  We can introduce a new type named `PARTITIONS` with the same 
partition path and different record key.  We may want to rename 
`MetadataPartitionType` to `MetadataIndexType` if the new name fits better into 
the code structure.



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java:
##########
@@ -890,45 +877,35 @@ public String toString() {
     sb.append(KEY_FIELD_NAME + "=").append(key).append(", ");
     sb.append(SCHEMA_FIELD_NAME_TYPE + "=").append(type).append(", ");
 
-    switch (type) {
-      case METADATA_TYPE_PARTITION_LIST:
-      case METADATA_TYPE_FILE_LIST:
-        sb.append("Files: {");
-        
sb.append("creations=").append(Arrays.toString(getFilenames().toArray())).append(",
 ");
-        
sb.append("deletions=").append(Arrays.toString(getDeletions().toArray())).append(",
 ");
-        sb.append("}");
-        break;
-      case METADATA_TYPE_BLOOM_FILTER:
-        checkState(getBloomFilterMetadata().isPresent());
-        sb.append("BloomFilter: {");
-        sb.append("bloom size: 
").append(getBloomFilterMetadata().get().getBloomFilter().array().length).append(",
 ");
-        sb.append("timestamp: 
").append(getBloomFilterMetadata().get().getTimestamp()).append(", ");
-        sb.append("deleted: 
").append(getBloomFilterMetadata().get().getIsDeleted());
-        sb.append("}");
-        break;
-      case METADATA_TYPE_COLUMN_STATS:
-        checkState(getColumnStatMetadata().isPresent());
-        sb.append("ColStats: {");
-        sb.append(getColumnStatMetadata().get());
-        sb.append("}");
-        break;
-      case METADATA_TYPE_RECORD_INDEX:
-        sb.append("RecordIndex: {");
-        sb.append("location=").append(getRecordGlobalLocation());
-        sb.append("}");
-        break;
-      default:
-        break;
+    if (type == MetadataPartitionType.FILES.getRecordType() || type == 
MetadataPartitionType.FILES.getRecordType(RECORDKEY_PARTITION_LIST)) {
+      sb.append("Files: {");

Review Comment:
   Similar here.



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java:
##########
@@ -409,31 +398,29 @@ public HoodieMetadataPayload 
preCombine(HoodieMetadataPayload previousRecord) {
     checkArgument(previousRecord.key.equals(key),
         "Cannot combine " + previousRecord.key + " with " + key + " as the 
keys differ");
 
-    switch (type) {
-      case METADATA_TYPE_PARTITION_LIST:
-      case METADATA_TYPE_FILE_LIST:
-        Map<String, HoodieMetadataFileInfo> combinedFileInfo = 
combineFileSystemMetadata(previousRecord);
-        return new HoodieMetadataPayload(key, type, combinedFileInfo);
-      case METADATA_TYPE_BLOOM_FILTER:
-        HoodieMetadataBloomFilter combineBloomFilterMetadata = 
combineBloomFilterMetadata(previousRecord);
-        return new HoodieMetadataPayload(key, combineBloomFilterMetadata);
-      case METADATA_TYPE_COLUMN_STATS:
-        return new HoodieMetadataPayload(key, 
combineColumnStatsMetadata(previousRecord));
-      case METADATA_TYPE_RECORD_INDEX:
-        // There is always a single mapping and the latest mapping is 
maintained.
-        // Mappings in record index can change in two scenarios:
-        // 1. A key deleted from dataset and then added again (new filedID)
-        // 2. A key moved to a different file due to clustering
-
-        // No need to merge with previous record index, always pick the latest 
payload.
-        return this;
-      case METADATA_TYPE_SECONDARY_INDEX:
-        // Secondary Index combine()/merge() always returns the current 
(*this*)
-        // record and discards the prevRecord. Based on the 'isDeleted' marker 
in the payload,
-        // the merger running on top takes the right action (discard current 
or retain current record).
-        return this;
-      default:
-        throw new HoodieMetadataException("Unknown type of 
HoodieMetadataPayload: " + type);
+    if (type == MetadataPartitionType.FILES.getRecordType() || type == 
MetadataPartitionType.FILES.getRecordType(RECORDKEY_PARTITION_LIST)) {
+      Map<String, HoodieMetadataFileInfo> combinedFileInfo = 
combineFileSystemMetadata(previousRecord);

Review Comment:
   Similar here on removing the if-else logic by replacing it with logic in 
each enum in `MetadataPartitionType`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7692] Extract metadata record type to MetadataPartitionType enum [hudi]

Reply via email to