This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 32adbe4dfb2 [HUDI-6256] Fix the data table archiving and MDT cleaning
config conf… (#8792)
32adbe4dfb2 is described below
commit 32adbe4dfb2a0976cb312c2fa14eb49f5a29a151
Author: flashJd <[email protected]>
AuthorDate: Fri Jun 2 09:22:17 2023 +0800
[HUDI-6256] Fix the data table archiving and MDT cleaning config conf…
(#8792)
* Fix the data table archiving and MDT cleaning config conflict
* Takes the MDT cleaning num commits as min(3, num_commits_DT), while 3 is
the hardcode max cleaning num commits for MDT
---------
Co-authored-by: Danny Chan <[email protected]>
---
.../hudi/metadata/HoodieMetadataWriteUtils.java | 2 +-
.../functional/TestHoodieBackedMetadata.java | 40 ++++++++++++++++++++++
.../client/functional/TestHoodieMetadataBase.java | 2 +-
3 files changed, 42 insertions(+), 2 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
index 5221f6523b0..df951ff3796 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
@@ -93,7 +93,7 @@ public class HoodieMetadataWriteUtils {
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
- .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
+ .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(),
DEFAULT_METADATA_CLEANER_COMMITS_RETAINED))
.build())
// we will trigger archive manually, to ensure only regular writer
invokes it
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index 10b134887c4..b540f97d806 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -538,6 +538,46 @@ public class TestHoodieBackedMetadata extends
TestHoodieMetadataBase {
assertEquals("0000004",
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
}
+ @ParameterizedTest
+ @EnumSource(HoodieTableType.class)
+ public void testMetadataArchivalCleanConfig(HoodieTableType tableType)
throws Exception {
+ init(tableType, false);
+ writeConfig = getWriteConfigBuilder(true, true, false)
+ .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+ .enable(true)
+ .enableMetrics(false)
+ .withMaxNumDeltaCommitsBeforeCompaction(1)
+ .build())
+ .withCleanConfig(HoodieCleanConfig.newBuilder()
+ .retainCommits(1)
+ .build())
+ .withArchivalConfig(HoodieArchivalConfig.newBuilder()
+ .archiveCommitsWith(2, 3)
+ .build())
+ .build();
+ initWriteConfigAndMetatableWriter(writeConfig, true);
+
+ AtomicInteger commitTime = new AtomicInteger(1);
+ // Trigger 4 regular writes in data table.
+ for (int i = 1; i <= 4; i++) {
+ doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()),
INSERT);
+ }
+
+ // The earliest deltacommit in the metadata table should be "0000001",
+ // and the "00000000000000" init deltacommit should be archived.
+ HoodieTableMetaClient metadataMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
+ HoodieActiveTimeline metadataTimeline =
metadataMetaClient.reloadActiveTimeline();
+ assertEquals("0000001",
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
+
+ getHoodieWriteClient(writeConfig);
+ // Trigger data table archive, should archive "0000001", "0000002"
+ archiveDataTable(writeConfig,
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build());
+ // Trigger a regular write operation. metadata timeline archival should
kick in and catch up with data table.
+ doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()),
INSERT);
+ metadataTimeline = metadataMetaClient.reloadActiveTimeline();
+ assertEquals("0000003",
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
+ }
+
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testMetadataInsertUpsertClean(HoodieTableType tableType) throws
Exception {
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
index 7974d9151a2..a8cd9a37739 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
@@ -400,7 +400,7 @@ public class TestHoodieMetadataBase extends
HoodieClientTestHarness {
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
- .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
+ .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(),
DEFAULT_METADATA_CLEANER_COMMITS_RETAINED))
.build())
// we will trigger archival manually, to control the instant times
.withArchivalConfig(HoodieArchivalConfig.newBuilder()