This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new d25e7609886 [HUDI-5924] Fixing cli clean command to trim down a subset 
based on start and end (#8169)
d25e7609886 is described below

commit d25e760988690ed9dc2ce0f74996c004940275b6
Author: Sivabalan Narayanan <[email protected]>
AuthorDate: Sat Sep 23 23:06:02 2023 -0400

    [HUDI-5924] Fixing cli clean command to trim down a subset based on start 
and end (#8169)
    
    Adds support to trim the timeline for hudi cli clean command. Also adds 
option to load from archive timeline.
    
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 .../apache/hudi/cli/commands/CleansCommand.java    | 10 +++-
 .../org/apache/hudi/cli/commands/DiffCommand.java  | 41 +++-----------
 .../java/org/apache/hudi/cli/utils/CLIUtils.java   | 64 ++++++++++++++++++++++
 3 files changed, 80 insertions(+), 35 deletions(-)

diff --git 
a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
index de0e4aa1098..c650f2ec4d7 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
@@ -24,10 +24,12 @@ import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.HoodiePrintHelper;
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.cli.utils.CLIUtils;
 import org.apache.hudi.cli.utils.InputStreamConsumer;
 import org.apache.hudi.cli.utils.SparkUtil;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -57,12 +59,18 @@ public class CleansCommand {
   public String showCleans(
       @ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = 
"-1") final Integer limit,
       @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue 
= "") final String sortByField,
+      @ShellOption(value = {"--startTs"}, help = "start time for cleans, 
default: now - 10 days",
+          defaultValue = ShellOption.NULL) String startTs,
+      @ShellOption(value = {"--endTs"}, help = "end time for clean, default: 
upto latest",
+          defaultValue = ShellOption.NULL) String endTs,
+      @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include 
archived commits as well",
+          defaultValue = "false") final boolean includeArchivedTimeline,
       @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = 
"false") final boolean descending,
       @ShellOption(value = {"--headeronly"}, help = "Print Header Only",
           defaultValue = "false") final boolean headerOnly)
       throws IOException {
 
-    HoodieActiveTimeline activeTimeline = 
HoodieCLI.getTableMetaClient().getActiveTimeline();
+    HoodieDefaultTimeline activeTimeline = 
CLIUtils.getTimelineInRange(startTs, endTs, includeArchivedTimeline);
     HoodieTimeline timeline = 
activeTimeline.getCleanerTimeline().filterCompletedInstants();
     List<HoodieInstant> cleans = 
timeline.getReverseOrderedInstants().collect(Collectors.toList());
     List<Comparable[]> rows = new ArrayList<>();
diff --git 
a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
index 01e6da421a6..9d0780751b4 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
@@ -19,14 +19,11 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.HoodiePrintHelper;
 import org.apache.hudi.cli.HoodieTableHeaderFields;
+import org.apache.hudi.cli.utils.CLIUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieWriteStat;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
 import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.NumericUtils;
@@ -45,11 +42,6 @@ import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
-import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
-import static org.apache.hudi.common.util.StringUtils.nonEmpty;
-import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
-
 /**
  * Given a file id or partition value, this command line utility tracks the 
changes to the file group or partition across range of commits.
  * Usage: diff file --fileId <fileId>
@@ -64,16 +56,16 @@ public class DiffCommand {
   public String diffFile(
       @ShellOption(value = {"--fileId"}, help = "File ID to diff across range 
of commits") String fileId,
       @ShellOption(value = {"--startTs"}, help = "start time for compactions, 
default: now - 10 days",
-              defaultValue = ShellOption.NULL) String startTs,
+          defaultValue = ShellOption.NULL) String startTs,
       @ShellOption(value = {"--endTs"}, help = "end time for compactions, 
default: now - 1 day",
-              defaultValue = ShellOption.NULL) String endTs,
+          defaultValue = ShellOption.NULL) String endTs,
       @ShellOption(value = {"--limit"}, help = "Limit compactions", 
defaultValue = "-1") final Integer limit,
       @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue 
= "") final String sortByField,
       @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = 
"false") final boolean descending,
       @ShellOption(value = {"--headeronly"}, help = "Print Header Only", 
defaultValue = "false") final boolean headerOnly,
       @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include 
archived commits as well",
           defaultValue = "false") final boolean includeArchivedTimeline) 
throws IOException {
-    HoodieDefaultTimeline timeline = getTimelineInRange(startTs, endTs, 
includeArchivedTimeline);
+    HoodieDefaultTimeline timeline = CLIUtils.getTimelineInRange(startTs, 
endTs, includeArchivedTimeline);
     return printCommitsWithMetadataForFileId(timeline, limit, sortByField, 
descending, headerOnly, "", fileId);
   }
 
@@ -81,38 +73,19 @@ public class DiffCommand {
   public String diffPartition(
       @ShellOption(value = {"--partitionPath"}, help = "Relative partition 
path to diff across range of commits") String partitionPath,
       @ShellOption(value = {"--startTs"}, help = "start time for compactions, 
default: now - 10 days",
-              defaultValue = ShellOption.NULL) String startTs,
+          defaultValue = ShellOption.NULL) String startTs,
       @ShellOption(value = {"--endTs"}, help = "end time for compactions, 
default: now - 1 day",
-              defaultValue = ShellOption.NULL) String endTs,
+          defaultValue = ShellOption.NULL) String endTs,
       @ShellOption(value = {"--limit"}, help = "Limit compactions", 
defaultValue = "-1") final Integer limit,
       @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue 
= "") final String sortByField,
       @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = 
"false") final boolean descending,
       @ShellOption(value = {"--headeronly"}, help = "Print Header Only", 
defaultValue = "false") final boolean headerOnly,
       @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include 
archived commits as well",
           defaultValue = "false") final boolean includeArchivedTimeline) 
throws IOException {
-    HoodieDefaultTimeline timeline = getTimelineInRange(startTs, endTs, 
includeArchivedTimeline);
+    HoodieDefaultTimeline timeline = CLIUtils.getTimelineInRange(startTs, 
endTs, includeArchivedTimeline);
     return printCommitsWithMetadataForPartition(timeline, limit, sortByField, 
descending, headerOnly, "", partitionPath);
   }
 
-  private HoodieDefaultTimeline getTimelineInRange(String startTs, String 
endTs, boolean includeArchivedTimeline) {
-    if (isNullOrEmpty(startTs)) {
-      startTs = getTimeDaysAgo(10);
-    }
-    if (isNullOrEmpty(endTs)) {
-      endTs = getTimeDaysAgo(1);
-    }
-    checkArgument(nonEmpty(startTs), "startTs is null or empty");
-    checkArgument(nonEmpty(endTs), "endTs is null or empty");
-    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
-    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
-    if (includeArchivedTimeline) {
-      HoodieArchivedTimeline archivedTimeline = 
metaClient.getArchivedTimeline();
-      archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
-      return archivedTimeline.findInstantsInRange(startTs, 
endTs).mergeTimeline(activeTimeline);
-    }
-    return activeTimeline;
-  }
-
   private String printCommitsWithMetadataForFileId(HoodieDefaultTimeline 
timeline,
                                                    final Integer limit,
                                                    final String sortByField,
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java
new file mode 100644
index 00000000000..f04418e1898
--- /dev/null
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.cli.utils;
+
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
+import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
+
+import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
+import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+import static org.apache.hudi.common.util.StringUtils.nonEmpty;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
+/**
+ * Utils class for cli commands.
+ */
+public class CLIUtils {
+  /**
+   * Gets a {@link HoodieDefaultTimeline} instance containing the instants in 
the specified range.
+   *
+   * @param startTs                 Start instant time.
+   * @param endTs                   End instant time.
+   * @param includeArchivedTimeline Whether to include intants from the 
archived timeline.
+   * @return a {@link HoodieDefaultTimeline} instance containing the instants 
in the specified range.
+   */
+  public static HoodieDefaultTimeline getTimelineInRange(String startTs, 
String endTs, boolean includeArchivedTimeline) {
+    if (isNullOrEmpty(startTs)) {
+      startTs = getTimeDaysAgo(10);
+    }
+    if (isNullOrEmpty(endTs)) {
+      endTs = getTimeDaysAgo(1);
+    }
+    checkArgument(nonEmpty(startTs), "startTs is null or empty");
+    checkArgument(nonEmpty(endTs), "endTs is null or empty");
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    if (includeArchivedTimeline) {
+      HoodieArchivedTimeline archivedTimeline = 
metaClient.getArchivedTimeline();
+      archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
+      return archivedTimeline.findInstantsInRange(startTs, 
endTs).mergeTimeline(activeTimeline);
+    }
+    return activeTimeline;
+  }
+
+}

Reply via email to