This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new a5e5c7ba1e2 [HUDI-6675] Fix Clean action will delete the whole table
(#9413)
a5e5c7ba1e2 is described below
commit a5e5c7ba1e2db1b3d8d8fd6da441998d54e57a80
Author: leosanqing <[email protected]>
AuthorDate: Mon Aug 14 11:28:14 2023 +0800
[HUDI-6675] Fix Clean action will delete the whole table (#9413)
The clean action mistakenly delete the whole table when the table is
non-partitioned.
---------
Co-authored-by: Sagar Sumit <[email protected]>
---
.../table/action/clean/CleanActionExecutor.java | 10 ++++-
.../java/org/apache/hudi/table/TestCleaner.java | 51 ++++++++++++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index c04f1ba8f21..05e1056324a 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -54,6 +54,8 @@ import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+
public class CleanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I,
K, O, HoodieCleanMetadata> {
private static final long serialVersionUID = 1L;
@@ -144,10 +146,14 @@ public class CleanActionExecutor<T, I, K, O> extends
BaseActionExecutor<T, I, K,
Map<String, PartitionCleanStat> partitionCleanStatsMap =
partitionCleanStats
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
- List<String> partitionsToBeDeleted =
cleanerPlan.getPartitionsToBeDeleted() != null ?
cleanerPlan.getPartitionsToBeDeleted() : new ArrayList<>();
+ List<String> partitionsToBeDeleted =
table.getMetaClient().getTableConfig().isTablePartitioned() &&
cleanerPlan.getPartitionsToBeDeleted() != null
+ ? cleanerPlan.getPartitionsToBeDeleted()
+ : new ArrayList<>();
partitionsToBeDeleted.forEach(entry -> {
try {
- deleteFileAndGetResult(table.getMetaClient().getFs(),
table.getMetaClient().getBasePath() + "/" + entry);
+ if (!isNullOrEmpty(entry)) {
+ deleteFileAndGetResult(table.getMetaClient().getFs(),
table.getMetaClient().getBasePath() + "/" + entry);
+ }
} catch (IOException e) {
LOG.warn("Partition deletion failed " + entry);
}
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index f8d37e859d8..c2aceae0b52 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -36,6 +36,7 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
@@ -94,6 +95,7 @@ import java.util.stream.Stream;
import scala.Tuple3;
+import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.NO_PARTITION_PATH;
import static
org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime;
import static
org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
@@ -352,6 +354,55 @@ public class TestCleaner extends HoodieCleanerTestBase {
}
}
+ /**
+ * Test clean non-partitioned table.
+ * This test is to ensure that the clean action does not clean the whole
table data.
+ */
+ @Test
+ public void testCleanNonPartitionedTable() throws IOException {
+ HoodieWriteConfig writeConfig = getConfigBuilder().withPath(basePath)
+ .withFileSystemViewConfig(new FileSystemViewStorageConfig.Builder()
+ .withEnableBackupForRemoteFileSystemView(false)
+ .build())
+ .withCleanConfig(HoodieCleanConfig.newBuilder()
+ .withAutoClean(false)
+ .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+ .retainCommits(1)
+ .build())
+ .withEmbeddedTimelineServerEnabled(false).build();
+ // datagen for non-partitioned table
+ initTestDataGenerator(new String[] {NO_PARTITION_PATH});
+ // init non-partitioned table
+ HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE,
HoodieFileFormat.PARQUET,
+ true, "org.apache.hudi.keygen.NonpartitionedKeyGenerator", true);
+
+ try (SparkRDDWriteClient client = new SparkRDDWriteClient(context,
writeConfig)) {
+ String instantTime;
+ for (int idx = 0; idx < 3; ++idx) {
+ instantTime = HoodieActiveTimeline.createNewInstantTime();
+ List<HoodieRecord> records = dataGen.generateInserts(instantTime, 1);
+ client.startCommitWithTime(instantTime);
+ client.insert(jsc.parallelize(records, 1), instantTime).collect();
+ }
+
+ instantTime = HoodieActiveTimeline.createNewInstantTime();
+ HoodieTable table = HoodieSparkTable.create(writeConfig, context);
+ Option<HoodieCleanerPlan> cleanPlan = table.scheduleCleaning(context,
instantTime, Option.empty());
+ assertEquals(cleanPlan.get().getPartitionsToBeDeleted().size(), 0);
+
assertEquals(cleanPlan.get().getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).size(),
1);
+ table.getMetaClient().reloadActiveTimeline();
+ String filePathToClean =
cleanPlan.get().getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).get(0).getFilePath();
+ // clean
+ HoodieCleanMetadata cleanMetadata = table.clean(context, instantTime);
+ // check the cleaned file
+
assertEquals(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().size(),
1);
+
assertTrue(filePathToClean.contains(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().get(0)));
+ // ensure table is not fully cleaned and has a file group
+ assertTrue(FSUtils.isTableExists(basePath, fs));
+
assertTrue(table.getFileSystemView().getAllFileGroups(NO_PARTITION_PATH).findAny().isPresent());
+ }
+ }
+
/**
* Tests no more than 1 clean is scheduled if hoodie.clean.allow.multiple
config is set to false.
*/