smarthi commented on a change in pull request #1449: [HUDI-698]Add unit test for CleansCommand URL: https://github.com/apache/incubator-hudi/pull/1449#discussion_r399828686
########## File path: hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.cli.commands; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hudi.cli.AbstractShellIntegrationTest; +import org.apache.hudi.cli.HoodieCLI; +import org.apache.hudi.cli.HoodiePrintHelper; +import org.apache.hudi.cli.TableHeader; +import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator; +import org.apache.hudi.common.model.HoodieCleaningPolicy; +import org.apache.hudi.common.model.HoodiePartitionMetadata; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.parquet.Strings; +import org.junit.Before; +import org.junit.Test; +import org.springframework.shell.core.CommandResult; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Iterator; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Test Cases for {@link CleansCommand}. + */ +public class TestCleansCommand extends AbstractShellIntegrationTest { + + private String tablePath; + private String propsFilePath; + + @Before + public void init() throws IOException { + String tableName = "test_table"; + tablePath = basePath + File.separator + tableName; + String localPropsFilePath = this.getClass().getClassLoader().getResource("clean.properties").getPath(); + propsFilePath = "/tmp/clean.properties"; + initDFS(); + jsc.hadoopConfiguration().addResource(dfs.getConf()); + HoodieCLI.conf = dfs.getConf(); + + dfs.mkdir(new Path("/tmp"), FsPermission.getDefault()); + dfs.copyFromLocalFile(new Path(localPropsFilePath), new Path("/tmp")); + + // Create table and connect + new TableCommand().createTable( + tablePath, tableName, + "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload"); + + Configuration conf = HoodieCLI.conf; + + metaClient = HoodieCLI.getTableMetaClient(); + // Create four commits + for (int i = 100; i < 104; i++) { + String timestamp = String.valueOf(i); + // Requested Compaction + HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath, + new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, timestamp), conf); + // Inflight Compaction + HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath, + new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, timestamp), conf); + HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, conf); + } + + metaClient = HoodieTableMetaClient.reload(metaClient); + // reload the timeline and get all the commits before archive + metaClient.getActiveTimeline().reload(); + } + + /** + * Test case for show all cleans. + */ + @Test + public void testShowCleans() throws Exception { + // First, run clean + dfs.create(new Path(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH + + File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)); + + SparkMain.clean(jsc, HoodieCLI.basePath, "local", propsFilePath, "2G", new ArrayList<>()); + assertEquals("Loaded 1 clean and the count should match", 1, + metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); + + CommandResult cr = getShell().executeCommand("cleans show"); + assertTrue(cr.isSuccess()); + + HoodieInstant clean = metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().findFirst().get(); + TableHeader header = + new TableHeader().addTableHeaderField("CleanTime").addTableHeaderField("EarliestCommandRetained") + .addTableHeaderField("Total Files Deleted").addTableHeaderField("Total Time Taken"); + List<Comparable[]> rows = new ArrayList<>(); + + // EarliestCommandRetained should be 102, since hoodie.cleaner.commits.retained=2 + // Total Time Taken should be -1, since hoodie.metrics.on is false by default + rows.add(new Comparable[]{clean.getTimestamp(), "102", "0", "-1"}); + + String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows); + assertEquals(expected, cr.getResult().toString()); + } + + /** + * Test case for cleans run. + */ + @Test + public void testRunClean() throws IOException, InterruptedException, URISyntaxException { + // First, there should none of clean instant. + assertEquals(0, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); + + // Create partition metadata + dfs.create(new Path(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH + + File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)); + dfs.create(new Path(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH + + File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)); + + SparkEnvCommand.env.put("SPARK_MASTER", "local"); + + List<String> configs = new ArrayList(); + Iterator<Map.Entry<String, String>> iterator = dfs.getConf().iterator(); + while (iterator.hasNext()) { + Map.Entry<String, String> e = iterator.next(); + configs.add(e.getKey() + "=" + e.getValue()); + } + String hadoopConf = Strings.join(configs, " "); Review comment: use standard java for this - instead of Parquet.Strings.join() ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
