[
https://issues.apache.org/jira/browse/HADOOP-19254?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17912350#comment-17912350
]
ASF GitHub Bot commented on HADOOP-19254:
-----------------------------------------
HarshitGupta11 commented on code in PR #7197:
URL: https://github.com/apache/hadoop/pull/7197#discussion_r1912702911
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -1,84 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.hadoop.fs.shell;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+import java.util.stream.Collectors;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BulkDelete;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.stream.Collectors;
-
public class BulkDeleteCommand extends FsCommand {
- public static void registerCommands(CommandFactory factory) {
- factory.addClass(BulkDeleteCommand.class, "-bulkDelete");
- }
- public static final String name = "bulkDelete";
+ public static void registerCommands(CommandFactory factory) {
+ factory.addClass(BulkDeleteCommand.class, "-bulkDelete");
+ }
- public static final String READ_FROM_FILE = "readFromFile";
+ private static final Logger LOG =
LoggerFactory.getLogger(BulkDeleteCommand.class.getName());
- public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>]
[<basePath> <paths>]";
+ public static final String NAME = "bulkDelete";
- public static final String DESCRIPTION = "Deletes the set of files under
the given path. If a list of paths " +
- "is provided then the paths are deleted directly. User can also
point to the file where the paths are" +
- "listed as full object names.";
+ /**
+ * File Name parameter to be specified at command line.
+ */
+ public static final String READ_FROM_FILE = "readFromFile";
- private String fileName;
+ /**
+ * Page size parameter specified at command line.
+ */
+ public static final String PAGE_SIZE = "pageSize";
- /*
- Making the class stateful as the PathData initialization for all args is
not needed
- */
- LinkedList<String> childArgs;
- protected BulkDeleteCommand() {
- this.childArgs = new LinkedList<>();
+ public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" +
PAGE_SIZE
+ + "] [<pageSize>] [<basePath> <paths>]";
+
+ public static final String DESCRIPTION = "Deletes the set of files under the
given <path>.\n" +
+ "If a list of paths is provided at command line then the paths are
deleted directly.\n" +
+ "User can also point to the file where the paths are listed as full
object names using the \"fileName\"" +
+ "parameter. The presence of a file name takes precedence over the
list of objects.\n" +
+ "Page size refers to the size of each bulk delete batch." +
+ "Users can specify the page size using \"pageSize\" command
parameter." +
+ "Default value is 1.\n";
+
+ private String fileName;
+
+ private int pageSize;
+
+ /**
+ * Making the class stateful as the PathData initialization for all args is
not needed
+ */
+ LinkedList<String> childArgs;
+
+ protected BulkDeleteCommand() {
+ this.childArgs = new LinkedList<>();
+ }
+
+ protected BulkDeleteCommand(Configuration conf) {
+ super(conf);
+ this.childArgs = new LinkedList<>();
+ this.pageSize = 1;
+ }
+
+ /**
+ * Processes the command line options and initialize the variables.
+ *
+ * @param args the command line arguments
+ * @throws IOException in case of wrong arguments passed
+ */
+ @Override
+ protected void processOptions(LinkedList<String> args) throws IOException {
+ CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE);
+ cf.addOptionWithValue(READ_FROM_FILE);
+ cf.addOptionWithValue(PAGE_SIZE);
+ cf.parse(args);
+ fileName = cf.getOptValue(READ_FROM_FILE);
+ if (cf.getOptValue(PAGE_SIZE) != null) {
+ pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE));
+ } else {
+ pageSize = 1;
}
+ }
- protected BulkDeleteCommand(Configuration conf) {super(conf);}
+ /**
+ * Processes the command line arguments and stores the child arguments in a
list.
+ *
+ * @param args strings to expand into {@link PathData} objects
+ * @return the base path of the bulk delete command.
+ * @throws IOException if the wrong number of arguments specified
+ */
+ @Override
+ protected LinkedList<PathData> expandArguments(LinkedList<String> args)
throws IOException {
+ if (fileName == null && args.size() < 2) {
+ throw new IOException("Invalid Number of Arguments. Expected more");
+ }
+ LinkedList<PathData> pathData = new LinkedList<>();
+ pathData.add(new PathData(args.get(0), getConf()));
+ args.remove(0);
+ this.childArgs = args;
+ return pathData;
+ }
- @Override
- protected void processOptions(LinkedList<String> args) throws IOException {
- CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE);
- cf.addOptionWithValue(READ_FROM_FILE);
- cf.parse(args);
- fileName = cf.getOptValue(READ_FROM_FILE);
+ /**
+ * Deletes the objects using the bulk delete api.
+ *
+ * @param bulkDelete Bulkdelete object exposing the API
+ * @param paths list of paths to be deleted in the base path
+ * @throws IOException on error in execution of the delete command
+ */
+ void deleteInBatches(BulkDelete bulkDelete, List<Path> paths) throws
IOException {
+ Batch<Path> batches = new Batch<>(paths, pageSize);
+ while (batches.hasNext()) {
+ try {
+ List<Map.Entry<Path, String>> result =
bulkDelete.bulkDelete(batches.next());
+ LOG.debug("Deleted Result:{}", result.toString());
+ } catch (IllegalArgumentException e) {
+ LOG.error("Caught exception while deleting", e);
+ throw new IOException(e);
+ }
}
+ }
- @Override
- protected LinkedList<PathData> expandArguments(LinkedList<String> args)
throws IOException {
- if(fileName == null && args.size() < 2) {
- throw new IOException("Invalid Number of Arguments. Expected
more");
+ @Override
+ protected void processArguments(LinkedList<PathData> args) throws
IOException {
+ PathData basePath = args.get(0);
+ LOG.info("Deleting files under:{}", basePath);
+ List<Path> pathList = new ArrayList<>();
+ if (fileName != null) {
+ LOG.info("Reading from file:{}", fileName);
+ FileSystem localFile = FileSystem.get(getConf());
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ localFile.open(new Path(fileName)), StandardCharsets.UTF_8));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("#")) {
+ pathList.add(new Path(line));
Review Comment:
done, removed empty lines and lines that start with #
> Implement bulk delete command as hadoop fs command operation
> -------------------------------------------------------------
>
> Key: HADOOP-19254
> URL: https://issues.apache.org/jira/browse/HADOOP-19254
> Project: Hadoop Common
> Issue Type: Improvement
> Components: fs
> Affects Versions: 3.4.1
> Reporter: Mukund Thakur
> Assignee: Harshit Gupta
> Priority: Major
> Labels: pull-request-available
>
> {code}
> hadoop fs -bulkdelete <base-url> <file>
> {code}
> Key uses
> * QE: Testing from python and other scripting languages
> * cluster maintenance: actual bulk deletion operations from the store
> one thought there: we MUST qualify paths with / elements: if a passed in path
> ends in /, it means "delete a marker", not "delete a dir"'. and if it doesn't
> have one then it's an object.. This makes it possible to be used to delete
> surplus markers or where there is a file above another file...cloudstore
> listobjects finds this
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]