[GitHub] [incubator-hudi] yanghua commented on a change in pull request #1554: [HUDI-704]Add test for RepairsCommand
yanghua commented on a change in pull request #1554: URL: https://github.com/apache/incubator-hudi/pull/1554#discussion_r420784920 ## File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java ## @@ -64,19 +69,35 @@ public String deduplicate( @CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath, @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path", - mandatory = true) final String sparkPropertiesPath) + unspecifiedDefaultValue = "") String sparkPropertiesPath, + @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master, Review comment: `"Spark Master "` -> `"Spark Master"`? ## File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java ## @@ -64,19 +69,35 @@ public String deduplicate( @CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath, @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path", - mandatory = true) final String sparkPropertiesPath) + unspecifiedDefaultValue = "") String sparkPropertiesPath, + @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master, + @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", + help = "Spark executor memory") final String sparkMemory, + @CliOption(key = {"dryrun"}, + help = "Should we actually remove duplicates or just run and store result to repairedOutputPath", + unspecifiedDefaultValue = "true") final boolean dryRun) throws Exception { +if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) { + sparkPropertiesPath = + Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala()); +} + SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); -sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath, -HoodieCLI.getTableMetaClient().getBasePath()); +sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), master, sparkMemory, Review comment: The same suggestion, we should try to define a data structure? We can refactor it later. ## File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java ## @@ -73,8 +73,8 @@ public static void main(String[] args) throws Exception { returnCode = rollback(jsc, args[1], args[2]); break; case DEDUPLICATE: -assert (args.length == 4); -returnCode = deduplicatePartitionPath(jsc, args[1], args[2], args[3]); +assert (args.length == 7); +returnCode = deduplicatePartitionPath(jsc, args[3], args[4], args[5], args[6]); Review comment: IMHO, we also need to refactor the arg parse. But not in this PR. ## File path: hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java ## @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.cli.integ; + +import org.apache.avro.Schema; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.cli.AbstractShellIntegrationTest; +import org.apache.hudi.cli.HoodieCLI; +import org.apache.hudi.cli.commands.RepairsCommand; +import org.apache.hudi.cli.commands.TableCommand; +import org.apache.hudi.common.HoodieClientTestUtils; +import org.apache.hudi.common.HoodieTestDataGenerator; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieBaseFile; +import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; +import
[GitHub] [incubator-hudi] yanghua commented on a change in pull request #1554: [HUDI-704]Add test for RepairsCommand
yanghua commented on a change in pull request #1554: URL: https://github.com/apache/incubator-hudi/pull/1554#discussion_r418430527 ## File path: hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java ## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.cli.commands; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hudi.cli.AbstractShellIntegrationTest; +import org.apache.hudi.cli.HoodieCLI; +import org.apache.hudi.cli.HoodiePrintHelper; +import org.apache.hudi.cli.HoodieTableHeaderFields; +import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator; +import org.apache.hudi.common.HoodieTestDataGenerator; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.springframework.shell.core.CommandResult; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class for {@link RepairsCommand}. + */ +public class TestRepairsCommand extends AbstractShellIntegrationTest { + + private String tablePath; + + @Before + public void init() throws IOException { +String tableName = "test_table"; +tablePath = basePath + File.separator + tableName; + +// Create table and connect +new TableCommand().createTable( +tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), +"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); + } + + /** + * Test case for dry run 'repair addpartitionmeta'. + */ + @Test + public void testAddPartitionMetaWithDryRun() throws IOException { +// create commit instant +new File(tablePath + "/.hoodie/100.commit").createNewFile(); + +// create partition path +String partition1 = tablePath + File.separator + HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH; +String partition2 = tablePath + File.separator + HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH; +String partition3 = tablePath + File.separator + HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH; +fs.mkdirs(new Path(partition1)); +fs.mkdirs(new Path(partition2)); +fs.mkdirs(new Path(partition3)); + +// default is dry run. +CommandResult cr = getShell().executeCommand("repair addpartitionmeta"); +Assert.assertTrue(cr.isSuccess()); + +// expected all 'No'. +String[][] rows = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, tablePath) +.stream() +.map(partition -> new String[]{partition, "No", "None"}) +.toArray(String[][]::new); +String expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH, +HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_REPAIR_ACTION}, rows); + +Assert.assertEquals(expected, cr.getResult().toString()); + } + + /** + * Test case for real run 'repair addpartitionmeta'. + */ + @Test + public void testAddPartitionMetaWithRealRun() throws IOException { +// create commit instant +new File(tablePath + "/.hoodie/100.commit").createNewFile(); Review comment: ditto ## File path: hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java ## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + *