hddong commented on a change in pull request #1558: URL: https://github.com/apache/incubator-hudi/pull/1558#discussion_r428056297
########## File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java ########## @@ -263,13 +265,26 @@ private static int compact(JavaSparkContext jsc, String basePath, String tableNa } private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath, - String repairedOutputPath, String basePath, String dryRun) { + String repairedOutputPath, String basePath, boolean dryRun, String dedupeType) { DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc), - FSUtils.getFs(basePath, jsc.hadoopConfiguration())); - job.fixDuplicates(Boolean.parseBoolean(dryRun)); + FSUtils.getFs(basePath, jsc.hadoopConfiguration()), getDedupeType(dedupeType)); + job.fixDuplicates(dryRun); return 0; } + private static Enumeration.Value getDedupeType(String type) { + switch (type) { + case "insertType": + return DeDupeType.insertType(); + case "updateType": + return DeDupeType.updateType(); + case "upsertType": + return DeDupeType.upsertType(); + default: + throw new IllegalArgumentException("Please provide valid dedupe type!"); + } + } + Review comment: Can use `DeDupeType.withName("insertType")` instead. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org