steveloughran commented on code in PR #7197: URL: https://github.com/apache/hadoop/pull/7197#discussion_r1915261962
########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java: ########## @@ -1,84 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.shell; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.stream.Collectors; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BulkDelete; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.stream.Collectors; - public class BulkDeleteCommand extends FsCommand { - public static void registerCommands(CommandFactory factory) { - factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); - } - public static final String name = "bulkDelete"; + public static void registerCommands(CommandFactory factory) { + factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + } - public static final String READ_FROM_FILE = "readFromFile"; + private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); - public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [<basePath> <paths>]"; + public static final String NAME = "bulkDelete"; - public static final String DESCRIPTION = "Deletes the set of files under the given path. If a list of paths " + - "is provided then the paths are deleted directly. User can also point to the file where the paths are" + - "listed as full object names."; + /** + * File Name parameter to be specified at command line. + */ + public static final String READ_FROM_FILE = "readFromFile"; - private String fileName; + /** + * Page size parameter specified at command line. + */ + public static final String PAGE_SIZE = "pageSize"; - /* - Making the class stateful as the PathData initialization for all args is not needed - */ - LinkedList<String> childArgs; - protected BulkDeleteCommand() { - this.childArgs = new LinkedList<>(); + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" + PAGE_SIZE + + "] [<pageSize>] [<basePath> <paths>]"; + + public static final String DESCRIPTION = "Deletes the set of files under the given <path>.\n" + + "If a list of paths is provided at command line then the paths are deleted directly.\n" + + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + + "parameter. The presence of a file name takes precedence over the list of objects.\n" + + "Page size refers to the size of each bulk delete batch." + + "Users can specify the page size using \"pageSize\" command parameter." + + "Default value is 1.\n"; + + private String fileName; + + private int pageSize; + + /** + * Making the class stateful as the PathData initialization for all args is not needed + */ + LinkedList<String> childArgs; + + protected BulkDeleteCommand() { + this.childArgs = new LinkedList<>(); + } + + protected BulkDeleteCommand(Configuration conf) { + super(conf); + this.childArgs = new LinkedList<>(); + this.pageSize = 1; + } + + /** + * Processes the command line options and initialize the variables. + * + * @param args the command line arguments + * @throws IOException in case of wrong arguments passed + */ + @Override + protected void processOptions(LinkedList<String> args) throws IOException { + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); + cf.addOptionWithValue(READ_FROM_FILE); + cf.addOptionWithValue(PAGE_SIZE); + cf.parse(args); + fileName = cf.getOptValue(READ_FROM_FILE); + if (cf.getOptValue(PAGE_SIZE) != null) { + pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); + } else { + pageSize = 1; } + } - protected BulkDeleteCommand(Configuration conf) {super(conf);} + /** + * Processes the command line arguments and stores the child arguments in a list. + * + * @param args strings to expand into {@link PathData} objects + * @return the base path of the bulk delete command. + * @throws IOException if the wrong number of arguments specified + */ + @Override + protected LinkedList<PathData> expandArguments(LinkedList<String> args) throws IOException { + if (fileName == null && args.size() < 2) { + throw new IOException("Invalid Number of Arguments. Expected more"); Review Comment: yes ########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java: ########## @@ -34,161 +34,174 @@ public class BulkDeleteCommand extends FsCommand { - public static void registerCommands(CommandFactory factory) { - factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + public static void registerCommands(CommandFactory factory) { + factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + } + + private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); + + public static final String NAME = "bulkDelete"; + + /** + * File Name parameter to be specified at command line. + */ + public static final String READ_FROM_FILE = "readFromFile"; + + /** + * Page size parameter specified at command line. + */ + public static final String PAGE_SIZE = "pageSize"; + + + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" + PAGE_SIZE + + "] [<pageSize>] [<basePath> <paths>]"; + + public static final String DESCRIPTION = "Deletes the set of files under the given <path>.\n" + + "If a list of paths is provided at command line then the paths are deleted directly.\n" + + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + + "parameter. The presence of a file name takes precedence over the list of objects.\n" + + "Page size refers to the size of each bulk delete batch." + + "Users can specify the page size using \"pageSize\" command parameter." + + "Default value is 1.\n"; + + private String fileName; + + private int pageSize; + + /** + * Making the class stateful as the PathData initialization for all args is not needed + */ + LinkedList<String> childArgs; + + protected BulkDeleteCommand() { + this.childArgs = new LinkedList<>(); + } + + protected BulkDeleteCommand(Configuration conf) { + super(conf); + this.childArgs = new LinkedList<>(); + this.pageSize = 1; + } + + /** + * Processes the command line options and initialize the variables. + * + * @param args the command line arguments + * @throws IOException in case of wrong arguments passed + */ + @Override + protected void processOptions(LinkedList<String> args) throws IOException { + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); + cf.addOptionWithValue(READ_FROM_FILE); + cf.addOptionWithValue(PAGE_SIZE); + cf.parse(args); + fileName = cf.getOptValue(READ_FROM_FILE); + if (cf.getOptValue(PAGE_SIZE) != null) { + pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); + } else { + pageSize = 1; } - - private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); - - public static final String name = "bulkDelete"; - - /** - * File Name parameter to be specified at command line - */ - public static final String READ_FROM_FILE = "readFromFile"; - - /** - * Page size parameter specified at command line - */ - public static final String PAGE_SIZE = "pageSize"; - - - public static final String USAGE = "-[ " + READ_FROM_FILE + "] [<file>] [" + - PAGE_SIZE + "] [<pageSize>] [<basePath> <paths>]"; - - public static final String DESCRIPTION = "Deletes the set of files under the given <path>.\n" + - "If a list of paths is provided at command line then the paths are deleted directly.\n" + - "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + - "parameter. The presence of a file name takes precedence over the list of objects.\n" + - "Page size refers to the size of each bulk delete batch." + - "Users can specify the page size using \"pageSize\" command parameter." + - "Default value is 1.\n"; - - private String fileName; - - private int pageSize; - - /* - Making the class stateful as the PathData initialization for all args is not needed - */ - LinkedList<String> childArgs; - - protected BulkDeleteCommand() { - this.childArgs = new LinkedList<>(); + } + + /** + * Processes the command line arguments and stores the child arguments in a list. + * + * @param args strings to expand into {@link PathData} objects + * @return the base path of the bulk delete command. + * @throws IOException if the wrong number of arguments specified + */ + @Override + protected LinkedList<PathData> expandArguments(LinkedList<String> args) throws IOException { + if (fileName == null && args.size() < 2) { + throw new IOException("Invalid Number of Arguments. Expected more"); } - - protected BulkDeleteCommand(Configuration conf) {super(conf);} - - /** - * Processes the command line options and initialize the variables - * @param args the command line arguments - * @throws IOException in case of wrong arguments passed - */ - @Override - protected void processOptions(LinkedList<String> args) throws IOException { - CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); - cf.addOptionWithValue(READ_FROM_FILE); - cf.addOptionWithValue(PAGE_SIZE); - cf.parse(args); - fileName = cf.getOptValue(READ_FROM_FILE); - if(cf.getOptValue(PAGE_SIZE) != null) { - pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); - } else { - pageSize = 1; - } + LinkedList<PathData> pathData = new LinkedList<>(); + pathData.add(new PathData(args.get(0), getConf())); + args.remove(0); + this.childArgs = args; + return pathData; + } + + /** + * Deletes the objects using the bulk delete api. + * + * @param bulkDelete Bulkdelete object exposing the API + * @param paths list of paths to be deleted in the base path + * @throws IOException on error in execution of the delete command + */ + void deleteInBatches(BulkDelete bulkDelete, List<Path> paths) throws IOException { + Batch<Path> batches = new Batch<>(paths, pageSize); + while (batches.hasNext()) { + try { + List<Map.Entry<Path, String>> result = bulkDelete.bulkDelete(batches.next()); + LOG.debug("Deleted Result:{}", result.toString()); + } catch (IllegalArgumentException e) { + LOG.error("Caught exception while deleting", e); + throw new IOException(e); Review Comment: good point. yes. Ideally quoting the path too, so if anything involving spaces are a problem then we find it ########## hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java: ########## @@ -27,103 +27,134 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.HadoopTestBase; import org.assertj.core.api.Assertions; import org.junit.BeforeClass; import org.junit.Test; -public class TestBulkDeleteCommand extends HadoopTestBase { - private static Configuration conf; - private static FsShell shell; - private static LocalFileSystem lfs; - private static Path testRootDir; +public class TestBulkDeleteCommand extends HadoopTestBase { + private static Configuration conf; + private static FsShell shell; + private static LocalFileSystem lfs; + private static Path testRootDir; - @BeforeClass - public static void setup() throws IOException { - conf = new Configuration(); - shell = new FsShell(conf); - lfs = FileSystem.getLocal(conf); - testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( - "testFsShellBulkDelete"))); - lfs.delete(testRootDir, true); - lfs.mkdirs(testRootDir); - lfs.setWorkingDirectory(testRootDir); - } + @BeforeClass + public static void setup() throws IOException { + conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( + "testFsShellBulkDelete"))); + lfs.delete(testRootDir, true); + lfs.mkdirs(testRootDir); + lfs.setWorkingDirectory(testRootDir); + } - @Test - public void testDefaults() throws IOException { - LinkedList<String> options = new LinkedList<>(); - BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(); - bulkDeleteCommand.processOptions(options); - assertTrue(bulkDeleteCommand.childArgs.isEmpty()); - } + @Test + public void testDefaults() throws IOException { + LinkedList<String> options = new LinkedList<>(); + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + bulkDeleteCommand.processOptions(options); + assertTrue(bulkDeleteCommand.childArgs.isEmpty()); + } - @Test - public void testArguments() throws IOException, URISyntaxException { - BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); - LinkedList<String> arguments = new LinkedList<>(); - String arg1 = "file:///file/name/1"; - String arg2 = "file:///file/name/2"; - arguments.add(arg1); - arguments.add(arg2); - LinkedList<PathData> pathData = bulkDeleteCommand.expandArguments(arguments); - Assertions.assertThat(pathData.size()). - describedAs("Only one root path must be present").isEqualTo(1); - Assertions.assertThat(pathData.get(0).path.toUri().getPath()). - describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); - Assertions.assertThat(bulkDeleteCommand.childArgs.size()). - describedAs("Only one other argument was passed to the command"). - isEqualTo(1); - Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). - describedAs("Children arguments must match").isEqualTo(arg2); - } + @Test + public void testArguments() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList<String> arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + String arg2 = "file:///file/name/1/2"; + arguments.add(arg1); + arguments.add(arg2); + LinkedList<PathData> pathData = bulkDeleteCommand.expandArguments(arguments); + Assertions.assertThat(pathData.size()). + describedAs("Only one root path must be present").isEqualTo(1); + Assertions.assertThat(pathData.get(0).path.toUri().getPath()). + describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); + Assertions.assertThat(bulkDeleteCommand.childArgs.size()). + describedAs("Only one other argument was passed to the command"). + isEqualTo(1); + Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). + describedAs("Children arguments must match").isEqualTo(arg2); + } - @Test - public void testLocalFileDeletion() throws IOException { - String deletionDir = "toDelete"; - String baseFileName = "file_"; - Path baseDir = new Path(testRootDir, deletionDir); - List<String> listOfPaths = new ArrayList<>(); - for(int i = 0; i < 100; i++) { - Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); - listOfPaths.add(p.toUri().toString()); - } - List<String> finalCommandList = new ArrayList<>(); - finalCommandList.add("-bulkDelete"); - finalCommandList.add(baseDir.toUri().toString()); - finalCommandList.addAll(listOfPaths); - shell.run(finalCommandList.toArray(new String[0])); - Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) - .as("All the files should have been deleted").isEqualTo(false); + @Test + public void testWrongArguments() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList<String> arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + arguments.add(arg1); + Assertions.assertThatThrownBy(() -> bulkDeleteCommand.expandArguments(arguments)). + describedAs("No children to be deleted specified in the command."). + isInstanceOf(IOException.class); + } + @Test + public void testLocalFileDeletion() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + List<String> listOfPaths = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + ContractTestUtils.touch(lfs, p); } + RemoteIterator<LocatedFileStatus> remoteIterator = lfs.listFiles(baseDir, false); + while (remoteIterator.hasNext()) { + listOfPaths.add(remoteIterator.next().getPath().toUri().toString()); + } + List<String> finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add(baseDir.toUri().toString()); + finalCommandList.addAll(listOfPaths); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted under the path:" + + baseDir).isEqualTo(false); - @Test - public void testLocalFileDeletionWithFileName() throws IOException { - String deletionDir = "toDelete"; - String baseFileName = "file_"; - Path baseDir = new Path(testRootDir, deletionDir); - Path fileWithDeletePaths = new Path(testRootDir, "fileWithDeletePaths"); - FSDataOutputStream fsDataOutputStream = lfs.create(fileWithDeletePaths, true); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); - for(int i = 0; i < 100; i++) { - Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); - br.write(p.toUri().toString()); - br.newLine(); - } - br.flush(); // flush the file to write the contents - br.close(); // close the writer - List<String> finalCommandList = new ArrayList<>(); - finalCommandList.add("-bulkDelete"); - finalCommandList.add("-readFromFile"); - finalCommandList.add(fileWithDeletePaths.toUri().toString()); - finalCommandList.add(baseDir.toUri().toString()); - shell.run(finalCommandList.toArray(new String[0])); - Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) - .as("All the files should have been deleted").isEqualTo(false); + } + @Test + public void testLocalFileDeletionWithFileName() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + Path fileWithDeletePaths = new Path(testRootDir, "fileWithDeletePaths"); + FSDataOutputStream fsDataOutputStream = lfs.create(fileWithDeletePaths, true); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); + for (int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + ContractTestUtils.touch(lfs, p); + br.write(p.toUri().toString()); + br.newLine(); } + br.flush(); // flush the file to write the contents + br.close(); // close the writer + List<String> finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add("-readFromFile"); + finalCommandList.add(fileWithDeletePaths.toUri().toString()); + finalCommandList.add(baseDir.toUri().toString()); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted under the path:" + + baseDir).isEqualTo(false); + + } + + @Test + public void testWrongArgumentsWithNonChildFile() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList<String> arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + String arg2 = "file:///file/name"; + arguments.add(arg1); + arguments.add(arg2); + LinkedList<PathData> pathData = bulkDeleteCommand.expandArguments(arguments); + Assertions.assertThatThrownBy(() -> bulkDeleteCommand.processArguments(pathData)). + describedAs("Child paths must be contained inside the base path"). + isInstanceOf(IOException.class); Review Comment: ...but there's a risk that different implementations may raise different subclasses -I don't think we are that strict in the specification about what to return -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
