This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d25e7609886 [HUDI-5924] Fixing cli clean command to trim down a subset
based on start and end (#8169)
d25e7609886 is described below
commit d25e760988690ed9dc2ce0f74996c004940275b6
Author: Sivabalan Narayanan <[email protected]>
AuthorDate: Sat Sep 23 23:06:02 2023 -0400
[HUDI-5924] Fixing cli clean command to trim down a subset based on start
and end (#8169)
Adds support to trim the timeline for hudi cli clean command. Also adds
option to load from archive timeline.
Co-authored-by: Y Ethan Guo <[email protected]>
---
.../apache/hudi/cli/commands/CleansCommand.java | 10 +++-
.../org/apache/hudi/cli/commands/DiffCommand.java | 41 +++-----------
.../java/org/apache/hudi/cli/utils/CLIUtils.java | 64 ++++++++++++++++++++++
3 files changed, 80 insertions(+), 35 deletions(-)
diff --git
a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
index de0e4aa1098..c650f2ec4d7 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
@@ -24,10 +24,12 @@ import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.cli.utils.CLIUtils;
import org.apache.hudi.cli.utils.InputStreamConsumer;
import org.apache.hudi.cli.utils.SparkUtil;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -57,12 +59,18 @@ public class CleansCommand {
public String showCleans(
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue =
"-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue
= "") final String sortByField,
+ @ShellOption(value = {"--startTs"}, help = "start time for cleans,
default: now - 10 days",
+ defaultValue = ShellOption.NULL) String startTs,
+ @ShellOption(value = {"--endTs"}, help = "end time for clean, default:
upto latest",
+ defaultValue = ShellOption.NULL) String endTs,
+ @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include
archived commits as well",
+ defaultValue = "false") final boolean includeArchivedTimeline,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue =
"false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly)
throws IOException {
- HoodieActiveTimeline activeTimeline =
HoodieCLI.getTableMetaClient().getActiveTimeline();
+ HoodieDefaultTimeline activeTimeline =
CLIUtils.getTimelineInRange(startTs, endTs, includeArchivedTimeline);
HoodieTimeline timeline =
activeTimeline.getCleanerTimeline().filterCompletedInstants();
List<HoodieInstant> cleans =
timeline.getReverseOrderedInstants().collect(Collectors.toList());
List<Comparable[]> rows = new ArrayList<>();
diff --git
a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
index 01e6da421a6..9d0780751b4 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DiffCommand.java
@@ -19,14 +19,11 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
+import org.apache.hudi.cli.utils.CLIUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.NumericUtils;
@@ -45,11 +42,6 @@ import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
-import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
-import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
-import static org.apache.hudi.common.util.StringUtils.nonEmpty;
-import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
-
/**
* Given a file id or partition value, this command line utility tracks the
changes to the file group or partition across range of commits.
* Usage: diff file --fileId <fileId>
@@ -64,16 +56,16 @@ public class DiffCommand {
public String diffFile(
@ShellOption(value = {"--fileId"}, help = "File ID to diff across range
of commits") String fileId,
@ShellOption(value = {"--startTs"}, help = "start time for compactions,
default: now - 10 days",
- defaultValue = ShellOption.NULL) String startTs,
+ defaultValue = ShellOption.NULL) String startTs,
@ShellOption(value = {"--endTs"}, help = "end time for compactions,
default: now - 1 day",
- defaultValue = ShellOption.NULL) String endTs,
+ defaultValue = ShellOption.NULL) String endTs,
@ShellOption(value = {"--limit"}, help = "Limit compactions",
defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue
= "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue =
"false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--includeArchivedTimeline"}, help = "Include
archived commits as well",
defaultValue = "false") final boolean includeArchivedTimeline)
throws IOException {
- HoodieDefaultTimeline timeline = getTimelineInRange(startTs, endTs,
includeArchivedTimeline);
+ HoodieDefaultTimeline timeline = CLIUtils.getTimelineInRange(startTs,
endTs, includeArchivedTimeline);
return printCommitsWithMetadataForFileId(timeline, limit, sortByField,
descending, headerOnly, "", fileId);
}
@@ -81,38 +73,19 @@ public class DiffCommand {
public String diffPartition(
@ShellOption(value = {"--partitionPath"}, help = "Relative partition
path to diff across range of commits") String partitionPath,
@ShellOption(value = {"--startTs"}, help = "start time for compactions,
default: now - 10 days",
- defaultValue = ShellOption.NULL) String startTs,
+ defaultValue = ShellOption.NULL) String startTs,
@ShellOption(value = {"--endTs"}, help = "end time for compactions,
default: now - 1 day",
- defaultValue = ShellOption.NULL) String endTs,
+ defaultValue = ShellOption.NULL) String endTs,
@ShellOption(value = {"--limit"}, help = "Limit compactions",
defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue
= "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue =
"false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--includeArchivedTimeline"}, help = "Include
archived commits as well",
defaultValue = "false") final boolean includeArchivedTimeline)
throws IOException {
- HoodieDefaultTimeline timeline = getTimelineInRange(startTs, endTs,
includeArchivedTimeline);
+ HoodieDefaultTimeline timeline = CLIUtils.getTimelineInRange(startTs,
endTs, includeArchivedTimeline);
return printCommitsWithMetadataForPartition(timeline, limit, sortByField,
descending, headerOnly, "", partitionPath);
}
- private HoodieDefaultTimeline getTimelineInRange(String startTs, String
endTs, boolean includeArchivedTimeline) {
- if (isNullOrEmpty(startTs)) {
- startTs = getTimeDaysAgo(10);
- }
- if (isNullOrEmpty(endTs)) {
- endTs = getTimeDaysAgo(1);
- }
- checkArgument(nonEmpty(startTs), "startTs is null or empty");
- checkArgument(nonEmpty(endTs), "endTs is null or empty");
- HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
- HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
- if (includeArchivedTimeline) {
- HoodieArchivedTimeline archivedTimeline =
metaClient.getArchivedTimeline();
- archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
- return archivedTimeline.findInstantsInRange(startTs,
endTs).mergeTimeline(activeTimeline);
- }
- return activeTimeline;
- }
-
private String printCommitsWithMetadataForFileId(HoodieDefaultTimeline
timeline,
final Integer limit,
final String sortByField,
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java
b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java
new file mode 100644
index 00000000000..f04418e1898
--- /dev/null
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CLIUtils.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.cli.utils;
+
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
+import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
+
+import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
+import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+import static org.apache.hudi.common.util.StringUtils.nonEmpty;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
+/**
+ * Utils class for cli commands.
+ */
+public class CLIUtils {
+ /**
+ * Gets a {@link HoodieDefaultTimeline} instance containing the instants in
the specified range.
+ *
+ * @param startTs Start instant time.
+ * @param endTs End instant time.
+ * @param includeArchivedTimeline Whether to include intants from the
archived timeline.
+ * @return a {@link HoodieDefaultTimeline} instance containing the instants
in the specified range.
+ */
+ public static HoodieDefaultTimeline getTimelineInRange(String startTs,
String endTs, boolean includeArchivedTimeline) {
+ if (isNullOrEmpty(startTs)) {
+ startTs = getTimeDaysAgo(10);
+ }
+ if (isNullOrEmpty(endTs)) {
+ endTs = getTimeDaysAgo(1);
+ }
+ checkArgument(nonEmpty(startTs), "startTs is null or empty");
+ checkArgument(nonEmpty(endTs), "endTs is null or empty");
+ HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+ HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+ if (includeArchivedTimeline) {
+ HoodieArchivedTimeline archivedTimeline =
metaClient.getArchivedTimeline();
+ archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
+ return archivedTimeline.findInstantsInRange(startTs,
endTs).mergeTimeline(activeTimeline);
+ }
+ return activeTimeline;
+ }
+
+}