[ 
https://issues.apache.org/jira/browse/HADOOP-19254?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17916169#comment-17916169
 ] 

ASF GitHub Bot commented on HADOOP-19254:
-----------------------------------------

HarshitGupta11 commented on code in PR #7197:
URL: https://github.com/apache/hadoop/pull/7197#discussion_r1925856394


##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -34,161 +34,174 @@
 
 public class BulkDeleteCommand extends FsCommand {
 
-    public static void registerCommands(CommandFactory factory) {
-        factory.addClass(BulkDeleteCommand.class, "-bulkDelete");
+  public static void registerCommands(CommandFactory factory) {
+    factory.addClass(BulkDeleteCommand.class, "-bulkDelete");
+  }
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(BulkDeleteCommand.class.getName());
+
+  public static final String NAME = "bulkDelete";
+
+  /**
+   * File Name parameter to be specified at command line.
+   */
+  public static final String READ_FROM_FILE = "readFromFile";
+
+  /**
+   * Page size parameter specified at command line.
+   */
+  public static final String PAGE_SIZE = "pageSize";
+
+
+  public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" + 
PAGE_SIZE
+          + "] [<pageSize>] [<basePath> <paths>]";
+
+  public static final String DESCRIPTION = "Deletes the set of files under the 
given <path>.\n" +
+          "If a list of paths is provided at command line then the paths are 
deleted directly.\n" +
+          "User can also point to the file where the paths are listed as full 
object names using the \"fileName\"" +
+          "parameter. The presence of a file name takes precedence over the 
list of objects.\n" +
+          "Page size refers to the size of each bulk delete batch." +
+          "Users can specify the page size using \"pageSize\" command 
parameter." +
+          "Default value is 1.\n";
+
+  private String fileName;
+
+  private int pageSize;
+
+  /**
+   * Making the class stateful as the PathData initialization for all args is 
not needed
+   */
+  LinkedList<String> childArgs;
+
+  protected BulkDeleteCommand() {
+    this.childArgs = new LinkedList<>();
+  }
+
+  protected BulkDeleteCommand(Configuration conf) {
+    super(conf);
+    this.childArgs = new LinkedList<>();
+    this.pageSize = 1;
+  }
+
+  /**
+   * Processes the command line options and initialize the variables.
+   *
+   * @param args the command line arguments
+   * @throws IOException in case of wrong arguments passed
+   */
+  @Override
+  protected void processOptions(LinkedList<String> args) throws IOException {
+    CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE);
+    cf.addOptionWithValue(READ_FROM_FILE);
+    cf.addOptionWithValue(PAGE_SIZE);
+    cf.parse(args);
+    fileName = cf.getOptValue(READ_FROM_FILE);
+    if (cf.getOptValue(PAGE_SIZE) != null) {
+      pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE));
+    } else {
+      pageSize = 1;
     }
-
-    private static final Logger LOG = 
LoggerFactory.getLogger(BulkDeleteCommand.class.getName());
-
-    public static final String name = "bulkDelete";
-
-    /**
-     * File Name parameter to be specified at command line
-     */
-    public static final String READ_FROM_FILE = "readFromFile";
-
-    /**
-     * Page size parameter specified at command line
-     */
-    public static final String PAGE_SIZE = "pageSize";
-
-
-    public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" 
+
-        PAGE_SIZE + "] [<pageSize>] [<basePath> <paths>]";
-
-    public static final String DESCRIPTION = "Deletes the set of files under 
the given <path>.\n" +
-            "If a list of paths is provided at command line then the paths are 
deleted directly.\n" +
-            "User can also point to the file where the paths are listed as 
full object names using the \"fileName\"" +
-            "parameter. The presence of a file name takes precedence over the 
list of objects.\n" +
-            "Page size refers to the size of each bulk delete batch." +
-            "Users can specify the page size using \"pageSize\" command 
parameter." +
-            "Default value is 1.\n";
-
-    private String fileName;
-
-    private int pageSize;
-
-    /*
-    Making the class stateful as the PathData initialization for all args is 
not needed
-     */
-    LinkedList<String> childArgs;
-
-    protected BulkDeleteCommand() {
-        this.childArgs = new LinkedList<>();
+  }
+
+  /**
+   * Processes the command line arguments and stores the child arguments in a 
list.
+   *
+   * @param args strings to expand into {@link PathData} objects
+   * @return the base path of the bulk delete command.
+   * @throws IOException if the wrong number of arguments specified
+   */
+  @Override
+  protected LinkedList<PathData> expandArguments(LinkedList<String> args) 
throws IOException {
+    if (fileName == null && args.size() < 2) {
+      throw new IOException("Invalid Number of Arguments. Expected more");
     }
-
-    protected BulkDeleteCommand(Configuration conf) {super(conf);}
-
-    /**
-     * Processes the command line options and initialize the variables
-     * @param args the command line arguments
-     * @throws IOException in case of wrong arguments passed
-     */
-    @Override
-    protected void processOptions(LinkedList<String> args) throws IOException {
-        CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE);
-        cf.addOptionWithValue(READ_FROM_FILE);
-        cf.addOptionWithValue(PAGE_SIZE);
-        cf.parse(args);
-        fileName = cf.getOptValue(READ_FROM_FILE);
-        if(cf.getOptValue(PAGE_SIZE) != null) {
-            pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE));
-        } else {
-            pageSize = 1;
-        }
+    LinkedList<PathData> pathData = new LinkedList<>();
+    pathData.add(new PathData(args.get(0), getConf()));
+    args.remove(0);
+    this.childArgs = args;
+    return pathData;
+  }
+
+  /**
+   * Deletes the objects using the bulk delete api.
+   *
+   * @param bulkDelete Bulkdelete object exposing the API
+   * @param paths      list of paths to be deleted in the base path
+   * @throws IOException on error in execution of the delete command
+   */
+  void deleteInBatches(BulkDelete bulkDelete, List<Path> paths) throws 
IOException {
+    Batch<Path> batches = new Batch<>(paths, pageSize);
+    while (batches.hasNext()) {
+      try {
+        List<Map.Entry<Path, String>> result = 
bulkDelete.bulkDelete(batches.next());
+        LOG.debug("Deleted Result:{}", result.toString());
+      } catch (IllegalArgumentException e) {
+        LOG.error("Caught exception while deleting", e);
+        throw new IOException(e);

Review Comment:
   done





> Implement bulk delete command as hadoop fs command operation 
> -------------------------------------------------------------
>
>                 Key: HADOOP-19254
>                 URL: https://issues.apache.org/jira/browse/HADOOP-19254
>             Project: Hadoop Common
>          Issue Type: Improvement
>          Components: fs
>    Affects Versions: 3.4.1
>            Reporter: Mukund Thakur
>            Assignee: Harshit Gupta
>            Priority: Major
>              Labels: pull-request-available
>
> {code}
> hadoop fs -bulkdelete <base-url> <file> 
> {code}
> Key uses
> * QE: Testing from python and other scripting languages
> * cluster maintenance: actual bulk deletion operations from the store
> one thought there: we MUST qualify paths with / elements: if a passed in path 
> ends in /, it means "delete a marker", not "delete a dir"'. and if it doesn't 
> have one then it's an object.. This makes it possible to be used to delete 
> surplus markers or where there is a file above another file...cloudstore 
> listobjects finds this



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to