This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 9d689fd68f5 [HUDI-7648] Refactor MetadataPartitionType so as to
enahance reuse (#11067)
9d689fd68f5 is described below
commit 9d689fd68f544cbd2424790770cc2673701be928
Author: Sagar Sumit <[email protected]>
AuthorDate: Thu Apr 25 10:52:53 2024 +0530
[HUDI-7648] Refactor MetadataPartitionType so as to enahance reuse (#11067)
---
.../metadata/HoodieBackedTableMetadataWriter.java | 36 +----
.../hudi/metadata/MetadataPartitionType.java | 72 +++++++++-
.../hudi/metadata/TestMetadataPartitionType.java | 152 +++++++++++++++++++++
3 files changed, 223 insertions(+), 37 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index f20643a9e95..3064f4073f8 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -103,17 +103,17 @@ import static
org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION
import static
org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
import static
org.apache.hudi.common.table.timeline.HoodieTimeline.getIndexInflightInstant;
import static
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeIndexPlan;
+import static
org.apache.hudi.metadata.HoodieMetadataWriteUtils.createMetadataWriteConfig;
import static
org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
import static
org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionLatestFileSlicesIncludingInflight;
import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.getProjectedSchemaForFunctionalIndex;
import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.readRecordKeysFromBaseFiles;
-import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
-import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
import static org.apache.hudi.metadata.MetadataPartitionType.FILES;
import static org.apache.hudi.metadata.MetadataPartitionType.FUNCTIONAL_INDEX;
import static org.apache.hudi.metadata.MetadataPartitionType.RECORD_INDEX;
+import static
org.apache.hudi.metadata.MetadataPartitionType.getEnabledPartitions;
/**
* Writer implementation backed by an internal hudi table. Partition and file
listing are saved within an internal MOR table
@@ -167,21 +167,15 @@ public abstract class HoodieBackedTableMetadataWriter<I>
implements HoodieTableM
this.engineContext = engineContext;
this.hadoopConf = new SerializableConfiguration(hadoopConf);
this.metrics = Option.empty();
- this.enabledPartitionTypes = new ArrayList<>(4);
-
this.dataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf)
.setBasePath(dataWriteConfig.getBasePath())
.setTimeGeneratorConfig(dataWriteConfig.getTimeGeneratorConfig()).build();
-
+ this.enabledPartitionTypes =
getEnabledPartitions(dataWriteConfig.getProps(), dataMetaClient);
if (writeConfig.isMetadataTableEnabled()) {
- this.metadataWriteConfig =
HoodieMetadataWriteUtils.createMetadataWriteConfig(writeConfig,
failedWritesCleaningPolicy);
-
+ this.metadataWriteConfig = createMetadataWriteConfig(writeConfig,
failedWritesCleaningPolicy);
try {
- enablePartitions();
initRegistry();
-
initialized = initializeIfNeeded(dataMetaClient,
inflightInstantTimestamp);
-
} catch (IOException e) {
LOG.error("Failed to initialize metadata table", e);
}
@@ -202,28 +196,6 @@ public abstract class HoodieBackedTableMetadataWriter<I>
implements HoodieTableM
}
}
- /**
- * Enable metadata table partitions based on config.
- */
- private void enablePartitions() {
- final HoodieMetadataConfig metadataConfig =
dataWriteConfig.getMetadataConfig();
- if (dataWriteConfig.isMetadataTableEnabled() ||
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(FILES)) {
- this.enabledPartitionTypes.add(FILES);
- }
- if (metadataConfig.isBloomFilterIndexEnabled() ||
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(BLOOM_FILTERS)) {
- this.enabledPartitionTypes.add(BLOOM_FILTERS);
- }
- if (metadataConfig.isColumnStatsIndexEnabled() ||
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(COLUMN_STATS)) {
- this.enabledPartitionTypes.add(COLUMN_STATS);
- }
- if (dataWriteConfig.isRecordIndexEnabled() ||
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(RECORD_INDEX)) {
- this.enabledPartitionTypes.add(RECORD_INDEX);
- }
- if (dataMetaClient.getFunctionalIndexMetadata().isPresent()) {
- this.enabledPartitionTypes.add(FUNCTIONAL_INDEX);
- }
- }
-
protected abstract void initRegistry();
public HoodieWriteConfig getWriteConfig() {
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 91114067051..8c9a06a9df2 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -18,27 +18,80 @@
package org.apache.hudi.metadata;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE;
+import static
org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_BLOOM_FILTER;
+import static
org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS;
+import static
org.apache.hudi.common.config.HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP;
+import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys;
+
/**
* Partition types for metadata table.
*/
public enum MetadataPartitionType {
- FILES(HoodieTableMetadataUtil.PARTITION_NAME_FILES, "files-"),
- COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS,
"col-stats-"),
- BLOOM_FILTERS(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS,
"bloom-filters-"),
- RECORD_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX,
"record-index-"),
-
FUNCTIONAL_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX,
"func-index-");
+ FILES(HoodieTableMetadataUtil.PARTITION_NAME_FILES, "files-") {
+ @Override
+ public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+ return getBooleanWithAltKeys(writeConfig, ENABLE);
+ }
+ },
+ COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS,
"col-stats-") {
+ @Override
+ public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+ return getBooleanWithAltKeys(writeConfig,
ENABLE_METADATA_INDEX_COLUMN_STATS);
+ }
+ },
+ BLOOM_FILTERS(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS,
"bloom-filters-") {
+ @Override
+ public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+ return getBooleanWithAltKeys(writeConfig,
ENABLE_METADATA_INDEX_BLOOM_FILTER);
+ }
+ },
+ RECORD_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX,
"record-index-") {
+ @Override
+ public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+ return getBooleanWithAltKeys(writeConfig, RECORD_INDEX_ENABLE_PROP);
+ }
+ },
+
FUNCTIONAL_INDEX(HoodieTableMetadataUtil.PARTITION_NAME_FUNCTIONAL_INDEX_PREFIX,
"func-index-") {
+ @Override
+ public boolean isMetadataPartitionEnabled(TypedProperties writeConfig) {
+ // Functional index is created via sql and not via write path.
+ // HUDI-7662 tracks adding a separate config to enable/disable
functional index.
+ return false;
+ }
+
+ @Override
+ public boolean isMetadataPartitionAvailable(HoodieTableMetaClient
metaClient) {
+ return metaClient.getFunctionalIndexMetadata().isPresent();
+ }
+ };
// Partition path in metadata table.
private final String partitionPath;
// FileId prefix used for all file groups in this partition.
private final String fileIdPrefix;
+ /**
+ * Check if the metadata partition is enabled based on the metadata config.
+ */
+ public abstract boolean isMetadataPartitionEnabled(TypedProperties
writeConfig);
+
+ /**
+ * Check if the metadata partition is available based on the table config.
+ */
+ public boolean isMetadataPartitionAvailable(HoodieTableMetaClient
metaClient) {
+ return metaClient.getTableConfig().isMetadataPartitionAvailable(this);
+ }
+
MetadataPartitionType(final String partitionPath, final String fileIdPrefix)
{
this.partitionPath = partitionPath;
this.fileIdPrefix = fileIdPrefix;
@@ -70,6 +123,15 @@ public enum MetadataPartitionType {
.collect(Collectors.toSet());
}
+ /**
+ * Returns the list of metadata partition types enabled based on the
metadata config and table config.
+ */
+ public static List<MetadataPartitionType>
getEnabledPartitions(TypedProperties writeConfig, HoodieTableMetaClient
metaClient) {
+ return Arrays.stream(values())
+ .filter(partitionType ->
partitionType.isMetadataPartitionEnabled(writeConfig) ||
partitionType.isMetadataPartitionAvailable(metaClient))
+ .collect(Collectors.toList());
+ }
+
@Override
public String toString() {
return "Metadata partition {"
diff --git
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
new file mode 100644
index 00000000000..927934f1335
--- /dev/null
+++
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestMetadataPartitionType.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.metadata;
+
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.HoodieFunctionalIndexMetadata;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.mockito.Mockito;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Tests for {@link MetadataPartitionType}.
+ */
+public class TestMetadataPartitionType {
+
+ @ParameterizedTest
+ @EnumSource(MetadataPartitionType.class)
+ public void testPartitionEnabledByConfigOnly(MetadataPartitionType
partitionType) {
+ HoodieTableMetaClient metaClient =
Mockito.mock(HoodieTableMetaClient.class);
+ HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+ // Simulate the configuration enabling given partition type, but the meta
client not having it available (yet to initialize the partition)
+ Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+
Mockito.when(tableConfig.isMetadataPartitionAvailable(partitionType)).thenReturn(false);
+
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+ HoodieMetadataConfig.Builder metadataConfigBuilder =
HoodieMetadataConfig.newBuilder();
+ int expectedEnabledPartitions;
+ switch (partitionType) {
+ case FILES:
+ case FUNCTIONAL_INDEX:
+ metadataConfigBuilder.enable(true);
+ expectedEnabledPartitions = 1;
+ break;
+ case COLUMN_STATS:
+ metadataConfigBuilder.enable(true).withMetadataIndexColumnStats(true);
+ expectedEnabledPartitions = 2;
+ break;
+ case BLOOM_FILTERS:
+ metadataConfigBuilder.enable(true).withMetadataIndexBloomFilter(true);
+ expectedEnabledPartitions = 2;
+ break;
+ case RECORD_INDEX:
+ metadataConfigBuilder.enable(true).withEnableRecordIndex(true);
+ expectedEnabledPartitions = 2;
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown partition type: " +
partitionType);
+ }
+
+ List<MetadataPartitionType> enabledPartitions =
MetadataPartitionType.getEnabledPartitions(metadataConfigBuilder.build().getProps(),
metaClient);
+
+ // Verify partition type is enabled due to config
+ if (partitionType == MetadataPartitionType.FUNCTIONAL_INDEX) {
+ assertEquals(1, enabledPartitions.size(), "FUNCTIONAL_INDEX should be
enabled by SQL, only FILES is enabled in this case.");
+ assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES));
+ } else {
+ assertEquals(expectedEnabledPartitions, enabledPartitions.size());
+ assertTrue(enabledPartitions.contains(partitionType));
+ }
+ }
+
+ @Test
+ public void testPartitionAvailableByMetaClientOnly() {
+ HoodieTableMetaClient metaClient =
Mockito.mock(HoodieTableMetaClient.class);
+ HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+ // Simulate the meta client having RECORD_INDEX available but config not
enabling it
+ Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+
Mockito.when(tableConfig.isMetadataPartitionAvailable(MetadataPartitionType.FILES)).thenReturn(true);
+
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(MetadataPartitionType.RECORD_INDEX)).thenReturn(true);
+ HoodieMetadataConfig metadataConfig =
HoodieMetadataConfig.newBuilder().enable(true).withEnableRecordIndex(false).build();
+
+ List<MetadataPartitionType> enabledPartitions =
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(),
metaClient);
+
+ // Verify RECORD_INDEX and FILES is enabled due to availability
+ assertEquals(2, enabledPartitions.size(), "RECORD_INDEX and FILES should
be available");
+ assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES), "FILES
should be enabled by availability");
+ assertTrue(enabledPartitions.contains(MetadataPartitionType.RECORD_INDEX),
"RECORD_INDEX should be enabled by availability");
+ }
+
+ @Test
+ public void testNoPartitionsEnabled() {
+ HoodieTableMetaClient metaClient =
Mockito.mock(HoodieTableMetaClient.class);
+ HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+ // Neither config nor availability allows any partitions
+ Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.empty());
+
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(Mockito.any())).thenReturn(false);
+ HoodieMetadataConfig metadataConfig =
HoodieMetadataConfig.newBuilder().enable(false).build();
+
+ List<MetadataPartitionType> enabledPartitions =
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(),
metaClient);
+
+ // Verify no partitions are enabled
+ assertTrue(enabledPartitions.isEmpty(), "No partitions should be enabled");
+ }
+
+ @Test
+ public void testFunctionalIndexPartitionEnabled() {
+ HoodieTableMetaClient metaClient =
Mockito.mock(HoodieTableMetaClient.class);
+ HoodieTableConfig tableConfig = Mockito.mock(HoodieTableConfig.class);
+
+ // Simulate the meta client having FUNCTIONAL_INDEX available
+ Mockito.when(metaClient.getTableConfig()).thenReturn(tableConfig);
+
Mockito.when(tableConfig.isMetadataPartitionAvailable(MetadataPartitionType.FILES)).thenReturn(true);
+
Mockito.when(metaClient.getFunctionalIndexMetadata()).thenReturn(Option.of(Mockito.mock(HoodieFunctionalIndexMetadata.class)));
+
Mockito.when(metaClient.getTableConfig().isMetadataPartitionAvailable(MetadataPartitionType.FUNCTIONAL_INDEX)).thenReturn(true);
+ HoodieMetadataConfig metadataConfig =
HoodieMetadataConfig.newBuilder().enable(true).build();
+
+ List<MetadataPartitionType> enabledPartitions =
MetadataPartitionType.getEnabledPartitions(metadataConfig.getProps(),
metaClient);
+
+ // Verify FUNCTIONAL_INDEX and FILES is enabled due to availability
+ assertEquals(2, enabledPartitions.size(), "FUNCTIONAL_INDEX and FILES
should be available");
+ assertTrue(enabledPartitions.contains(MetadataPartitionType.FILES), "FILES
should be enabled by availability");
+
assertTrue(enabledPartitions.contains(MetadataPartitionType.FUNCTIONAL_INDEX),
"FUNCTIONAL_INDEX should be enabled by availability");
+ }
+
+ @Test
+ public void testGetMetadataPartitionsNeedingWriteStatusTracking() {
+ List<MetadataPartitionType> trackingPartitions =
MetadataPartitionType.getMetadataPartitionsNeedingWriteStatusTracking();
+
assertTrue(trackingPartitions.contains(MetadataPartitionType.RECORD_INDEX),
"RECORD_INDEX should need write status tracking");
+ assertEquals(1, trackingPartitions.size(), "Only one partition should need
write status tracking");
+ }
+}