mukund-thakur commented on code in PR #7197:
URL: https://github.com/apache/hadoop/pull/7197#discussion_r1873967731
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -73,12 +105,40 @@ protected void processArguments(LinkedList<PathData> args)
throws IOException {
BufferedReader br = new BufferedReader(new
InputStreamReader(localFile.open(new Path(fileName))));
String line;
while((line = br.readLine()) != null) {
- pathList.add(new Path(line));
+ if(!line.startsWith("#")) {
+ pathList.add(new Path(line));
+ }
}
} else {
pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList()));
}
BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path);
- bulkDelete.bulkDelete(pathList);
+ deleteInBatches(bulkDelete, pathList);
+ }
+
+ static class Batch<T> {
Review Comment:
Add java doc everywhere.
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -73,12 +105,40 @@ protected void processArguments(LinkedList<PathData> args)
throws IOException {
BufferedReader br = new BufferedReader(new
InputStreamReader(localFile.open(new Path(fileName))));
String line;
while((line = br.readLine()) != null) {
- pathList.add(new Path(line));
+ if(!line.startsWith("#")) {
+ pathList.add(new Path(line));
+ }
}
} else {
pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList()));
}
BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path);
- bulkDelete.bulkDelete(pathList);
+ deleteInBatches(bulkDelete, pathList);
+ }
+
+ static class Batch<T> {
+ List<T> data;
+ int batchSize;
+ int currentLocation;
+
+ Batch(List<T> data, int batchSize) {
+ this.data = Collections.unmodifiableList(data);
+ this.batchSize = batchSize;
+ this.currentLocation = 0;
+ }
+
+ boolean hasNext() {
+ return this.currentLocation < this.data.size();
+ }
+
+ List<T> next() {
+ List<T> ret = new ArrayList<>();
+ int i = currentLocation;
+ for(; i < Math.min(currentLocation + batchSize, data.size()); i++)
{
Review Comment:
I think while loop would be better suited here.
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -24,14 +38,19 @@ public static void registerCommands(CommandFactory factory)
{
public static final String READ_FROM_FILE = "readFromFile";
- public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>]
[<basePath> <paths>]";
+ public static final String PAGE_SIZE = "pageSize";
+
+ public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] ["
+
+ PAGE_SIZE + "] [<pageSize>] [<basePath> <paths>]";
public static final String DESCRIPTION = "Deletes the set of files under
the given path. If a list of paths " +
Review Comment:
Add the description with the options. see Delete.java for example.
Does this description gets printed as a part of help to the user?
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -24,14 +38,19 @@ public static void registerCommands(CommandFactory factory)
{
public static final String READ_FROM_FILE = "readFromFile";
- public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>]
[<basePath> <paths>]";
+ public static final String PAGE_SIZE = "pageSize";
+
+ public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] ["
+
+ PAGE_SIZE + "] [<pageSize>] [<basePath> <paths>]";
public static final String DESCRIPTION = "Deletes the set of files under
the given path. If a list of paths " +
Review Comment:
Is there any doc for all these commands? We should add somewhere?
At least in the bulkdelete.md.
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java:
##########
@@ -63,6 +88,13 @@ protected LinkedList<PathData>
expandArguments(LinkedList<String> args) throws I
return pathData;
}
+ void deleteInBatches(BulkDelete bulkDelete, List<Path> paths) throws
IOException {
+ Batch<Path> batches = new Batch<>(paths, pageSize);
+ while(batches.hasNext()) {
+ bulkDelete.bulkDelete(batches.next());
Review Comment:
Use try catch and print the failed ones if it failed half way.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]