This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d689fd68f5 [HUDI-7648] Refactor MetadataPartitionType so as to 
enahance reuse (#11067)
9d689fd68f5 is described below

commit 9d689fd68f544cbd2424790770cc2673701be928
Author: Sagar Sumit <[email protected]>
AuthorDate: Thu Apr 25 10:52:53 2024 +0530

    [HUDI-7648] Refactor MetadataPartitionType so as to enahance reuse (#11067)
---
 .../metadata/HoodieBackedTableMetadataWriter.java  |  36 +----
 .../hudi/metadata/MetadataPartitionType.java       |  72 +++++++++-
 .../hudi/metadata/TestMetadataPartitionType.java   | 152 +++++++++++++++++++++
 3 files changed, 223 insertions(+), 37 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index f20643a9e95..3064f4073f8 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -103,17 +103,17 @@ import static 
org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION
 import static 
org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
 import static 
org.apache.hudi.common.table.timeline.HoodieTimeline.getIndexInflightInstant;
 import static 
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeIndexPlan;
+import static 
org.apache.hudi.metadata.HoodieMetadataWriteUtils.createMetadataWriteConfig;
 import static 
org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
 import static 
org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionLatestFileSlicesIncludingInflight;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getProjectedSchemaForFunctionalIndex;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.readRecordKeysFromBaseFiles;
-import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
-import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
 import static org.apache.hudi.metadata.MetadataPartitionType.FILES;
 import static org.apache.hudi.metadata.MetadataPartitionType.FUNCTIONAL_INDEX;
 import static org.apache.hudi.metadata.MetadataPartitionType.RECORD_INDEX;
+import static 
org.apache.hudi.metadata.MetadataPartitionType.getEnabledPartitions;
 
 /**
  * Writer implementation backed by an internal hudi table. Partition and file 
listing are saved within an internal MOR table
@@ -167,21 +167,15 @@ public abstract class HoodieBackedTableMetadataWriter<I> 
implements HoodieTableM
     this.engineContext = engineContext;
     this.hadoopConf = new SerializableConfiguration(hadoopConf);
     this.metrics = Option.empty();
-    this.enabledPartitionTypes = new ArrayList<>(4);
-
     this.dataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf)
         .setBasePath(dataWriteConfig.getBasePath())
         
.setTimeGeneratorConfig(dataWriteConfig.getTimeGeneratorConfig()).build();
-
+    this.enabledPartitionTypes = 
getEnabledPartitions(dataWriteConfig.getProps(), dataMetaClient);
     if (writeConfig.isMetadataTableEnabled()) {
-      this.metadataWriteConfig = 
HoodieMetadataWriteUtils.createMetadataWriteConfig(writeConfig, 
failedWritesCleaningPolicy);
-
+      this.metadataWriteConfig = createMetadataWriteConfig(writeConfig, 
failedWritesCleaningPolicy);
       try {
-        enablePartitions();
         initRegistry();
-
         initialized = initializeIfNeeded(dataMetaClient, 
inflightInstantTimestamp);
-
       } catch (IOException e) {
         LOG.error("Failed to initialize metadata table", e);
       }
@@ -202,28 +196,6 @@ public abstract class HoodieBackedTableMetadataWriter<I> 
implements HoodieTableM
     }
   }
 
-  /**
-   * Enable metadata table partitions based on config.
-   */
-  private void enablePartitions() {
-    final HoodieMetadataConfig metadataConfig = 
dataWriteConfig.getMetadataConfig();
-    if (dataWriteConfig.isMetadataTableEnabled() || 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(FILES)) {
-      this.enabledPartitionTypes.add(FILES);
-    }
-    if (metadataConfig.isBloomFilterIndexEnabled() || 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(BLOOM_FILTERS)) {
-      this.enabledPartitionTypes.add(BLOOM_FILTERS);
-    }
-    if (metadataConfig.isColumnStatsIndexEnabled() || 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(COLUMN_STATS)) {
-      this.enabledPartitionTypes.add(COLUMN_STATS);
-    }
-    if (dataWriteConfig.isRecordIndexEnabled() || 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(RECORD_INDEX)) {
-      this.enabledPartitionTypes.add(RECORD_INDEX);
-    }
-    if (dataMetaClient.getFunctionalIndexMetadata().isPresent()) {
-      this.enabledPartitionTypes.add(FUNCTIONAL_INDEX);
-    }
-  }
-
   protected abstract void initRegistry();
 
   public HoodieWriteConfig getWriteConfig() {
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java 
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 91114067051..8c9a06a9df2 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -18,27 +18,80 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE;
+import static 
org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_BLOOM_FILTER;
+import static 
org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS;
+import static 
org.apache.hudi.common.config.HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP;
+import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys;
+
 /**
  * Partition types for metadata table.
  */
 public enum MetadataPartitionType {
-  FILES(HoodieTableMetadataUtil.PARTITION_NAME_FILES, "files-"),
-  COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, 
"col-stats-"),
-  BLOOM_FILTERS(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS, 
"bloom-filters-"),
-  RECORD_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX, 
"record-index-"),
-  
FUNCTIONAL_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX,
 "func-index-");
+  FILES(HoodieTableMetadataUtil.PARTITION_NAME_FILES, "files-") {
+    @Override
+    public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+      return getBooleanWithAltKeys(writeConfig, ENABLE);
+    }
+  },
+  COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, 
"col-stats-") {
+    @Override
+    public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+      return getBooleanWithAltKeys(writeConfig, 
ENABLE_METADATA_INDEX_COLUMN_STATS);
+    }
+  },
+  BLOOM_FILTERS(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS, 
"bloom-filters-") {
+    @Override
+    public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+      return getBooleanWithAltKeys(writeConfig, 
ENABLE_METADATA_INDEX_BLOOM_FILTER);
+    }
+  },
+  RECORD_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX, 
"record-index-") {
+    @Override
+    public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+      return getBooleanWithAltKeys(writeConfig, RECORD_INDEX_ENABLE_PROP);
+    }
+  },
+  
FUNCTIONAL_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX,
 "func-index-") {
+    @Override
+    public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+      // Functional index is created via sql and not via write path.
+      // HUDI-7662 tracks adding a separate config to enable/disable 
functional index.
+      return false;
+    }
+
+    @Override
+    public boolean isMetadataPartitionAvailable(HoodieTableMetaClient 
metaClient) {
+      return metaClient.getFunctionalIndexMetadata().isPresent();
+    }
+  };
 
   // Partition path in metadata table.
   private final String partitionPath;
   // FileId prefix used for all file groups in this partition.
   private final String fileIdPrefix;
 
+  /**
+   * Check if the metadata partition is enabled based on the metadata config.
+   */
+  public abstract boolean isMetadataPartitionEnabled(TypedProperties 
writeConfig);
+
+  /**
+   * Check if the metadata partition is available based on the table config.
+   */
+  public boolean isMetadataPartitionAvailable(HoodieTableMetaClient 
metaClient) {
+    return metaClient.getTableConfig().isMetadataPartitionAvailable(this);
+  }
+
   MetadataPartitionType(final String partitionPath, final String fileIdPrefix) 
{
     this.partitionPath = partitionPath;
     this.fileIdPrefix = fileIdPrefix;
@@ -70,6 +123,15 @@ public enum MetadataPartitionType {
         .collect(Collectors.toSet());
   }
 
+  /**
+   * Returns the list of metadata partition types enabled based on the 
metadata config and table config.
+   */
+  public static List<MetadataPartitionType> 
getEnabledPartitions(TypedProperties writeConfig, HoodieTableMetaClient 
metaClient) {
+    return Arrays.stream(values())
+        .filter(partitionType -> 
partitionType.isMetadataPartitionEnabled(writeConfig) || 
partitionType.isMetadataPartitionAvailable(metaClient))
+        .collect(Collectors.toList());
+  }
+
   @Override
   public String toString() {
     return "Metadata partition {"
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
new file mode 100644
index 00000000000..927934f1335
--- /dev/null
+++ 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.metadata;
+
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.HoodieFunctionalIndexMetadata;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.mockito.Mockito;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Tests for {@link MetadataPartitionType}.
+ */
+public class TestMetadataPartitionType {
+
+  @ParameterizedTest
+  @EnumSource(MetadataPartitionType.class)
+  public void testPartitionEnabledByConfigOnly(MetadataPartitionType 
partitionType) {
+    HoodieTableMetaClient metaClient = 
Mockito.mock(HoodieTableMetaClient.class);
+    HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+    // Simulate the configuration enabling given partition type, but the meta 
client not having it available (yet to initialize the partition)
+    Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+    
Mockito.when(tableConfig.isMetadataPartitionAvailable(partitionType)).thenReturn(false);
+    
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+    HoodieMetadataConfig.Builder metadataConfigBuilder = 
HoodieMetadataConfig.newBuilder();
+    int expectedEnabledPartitions;
+    switch (partitionType) {
+      case FILES:
+      case FUNCTIONAL_INDEX:
+        metadataConfigBuilder.enable(true);
+        expectedEnabledPartitions = 1;
+        break;
+      case COLUMN_STATS:
+        metadataConfigBuilder.enable(true).withMetadataIndexColumnStats(true);
+        expectedEnabledPartitions = 2;
+        break;
+      case BLOOM_FILTERS:
+        metadataConfigBuilder.enable(true).withMetadataIndexBloomFilter(true);
+        expectedEnabledPartitions = 2;
+        break;
+      case RECORD_INDEX:
+        metadataConfigBuilder.enable(true).withEnableRecordIndex(true);
+        expectedEnabledPartitions = 2;
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown partition type: " + 
partitionType);
+    }
+
+    List<MetadataPartitionType> enabledPartitions = 
MetadataPartitionType.getEnabledPartitions(metadataConfigBuilder.build().getProps(),
 metaClient);
+
+    // Verify partition type is enabled due to config
+    if (partitionType == MetadataPartitionType.FUNCTIONAL_INDEX) {
+      assertEquals(1, enabledPartitions.size(), "FUNCTIONAL_INDEX should be 
enabled by SQL, only FILES is enabled in this case.");
+      assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES));
+    } else {
+      assertEquals(expectedEnabledPartitions, enabledPartitions.size());
+      assertTrue(enabledPartitions.contains(partitionType));
+    }
+  }
+
+  @Test
+  public void testPartitionAvailableByMetaClientOnly() {
+    HoodieTableMetaClient metaClient = 
Mockito.mock(HoodieTableMetaClient.class);
+    HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+    // Simulate the meta client having RECORD_INDEX available but config not 
enabling it
+    Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+    
Mockito.when(tableConfig.isMetadataPartitionAvailable(MetadataPartitionType.FILES)).thenReturn(true);
+    
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+    
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(MetadataPartitionType.RECORD_INDEX)).thenReturn(true);
+    HoodieMetadataConfig metadataConfig = 
HoodieMetadataConfig.newBuilder().enable(true).withEnableRecordIndex(false).build();
+
+    List<MetadataPartitionType> enabledPartitions = 
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(), 
metaClient);
+
+    // Verify RECORD_INDEX and FILES is enabled due to availability
+    assertEquals(2, enabledPartitions.size(), "RECORD_INDEX and FILES should 
be available");
+    assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES), "FILES 
should be enabled by availability");
+    assertTrue(enabledPartitions.contains(MetadataPartitionType.RECORD_INDEX), 
"RECORD_INDEX should be enabled by availability");
+  }
+
+  @Test
+  public void testNoPartitionsEnabled() {
+    HoodieTableMetaClient metaClient = 
Mockito.mock(HoodieTableMetaClient.class);
+    HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+    // Neither config nor availability allows any partitions
+    Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+    
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+    
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(Mockito.any())).thenReturn(false);
+    HoodieMetadataConfig metadataConfig = 
HoodieMetadataConfig.newBuilder().enable(false).build();
+
+    List<MetadataPartitionType> enabledPartitions = 
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(), 
metaClient);
+
+    // Verify no partitions are enabled
+    assertTrue(enabledPartitions.isEmpty(), "No partitions should be enabled");
+  }
+
+  @Test
+  public void testFunctionalIndexPartitionEnabled() {
+    HoodieTableMetaClient metaClient = 
Mockito.mock(HoodieTableMetaClient.class);
+    HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+    // Simulate the meta client having FUNCTIONAL_INDEX available
+    Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+    
Mockito.when(tableConfig.isMetadataPartitionAvailable(MetadataPartitionType.FILES)).thenReturn(true);
+    
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.of(Mockito.mock(HoodieFunctionalIndexMetadata.class)));
+    
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(MetadataPartitionType.FUNCTIONAL_INDEX)).thenReturn(true);
+    HoodieMetadataConfig metadataConfig = 
HoodieMetadataConfig.newBuilder().enable(true).build();
+
+    List<MetadataPartitionType> enabledPartitions = 
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(), 
metaClient);
+
+    // Verify FUNCTIONAL_INDEX and FILES is enabled due to availability
+    assertEquals(2, enabledPartitions.size(), "FUNCTIONAL_INDEX and FILES 
should be available");
+    assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES), "FILES 
should be enabled by availability");
+    
assertTrue(enabledPartitions.contains(MetadataPartitionType.FUNCTIONAL_INDEX), 
"FUNCTIONAL_INDEX should be enabled by availability");
+  }
+
+  @Test
+  public void testGetMetadataPartitionsNeedingWriteStatusTracking() {
+    List<MetadataPartitionType> trackingPartitions = 
MetadataPartitionType.getMetadataPartitionsNeedingWriteStatusTracking();
+    
assertTrue(trackingPartitions.contains(MetadataPartitionType.RECORD_INDEX), 
"RECORD_INDEX should need write status tracking");
+    assertEquals(1, trackingPartitions.size(), "Only one partition should need 
write status tracking");
+  }
+}

Reply via email to