[
https://issues.apache.org/jira/browse/HDFS-17475?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17838890#comment-17838890
]
ASF GitHub Bot commented on HDFS-17475:
---------------------------------------
kokonguyen191 commented on code in PR #6745:
URL: https://github.com/apache/hadoop/pull/6745#discussion_r1571957646
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java:
##########
@@ -641,6 +663,207 @@ private void closeBlockReaders() {
}
+ private class VerifyReadableCommand extends DebugCommand {
+ private DistributedFileSystem dfs;
+ private boolean suppressed = false;
+
+ VerifyReadableCommand() {
+ super("verifyReadable",
+ "verifyReadable "
+ + "[-path <path> | -input <input>] "
+ + "[-output <output>] "
+ + "[-concurrency <concurrency>] "
+ + "[-suppressed]",
+ " Verify if one or multiple paths are fully readable and have no
missing blocks.");
+ }
+
+ @Override
+ int run(List<String> args) throws IOException {
+ if (args.isEmpty()) {
+ System.out.println(usageText);
+ System.out.println(helpText + System.lineSeparator());
+ return 1;
+ }
+ dfs = AdminHelper.getDFS(getConf());
+ String pathStr = StringUtils.popOptionWithArgument("-path", args);
+ String inputStr = StringUtils.popOptionWithArgument("-input", args);
+ String outputStr = StringUtils.popOptionWithArgument("-output", args);
+ String concurrencyStr =
StringUtils.popOptionWithArgument("-concurrency", args);
+ suppressed = StringUtils.popOption("-suppressed", args);
+ if (pathStr == null && inputStr == null) {
+ System.out.println("Either -path or -input must be present.");
+ System.out.println(usageText);
+ System.out.println(helpText + System.lineSeparator());
+ return 1;
+ }
+ try {
+ return handleArgs(pathStr, inputStr, outputStr, concurrencyStr);
+ } catch (Exception e) {
+ System.err.println(
+ "Got IOE: " + StringUtils.stringifyException(e) + " for command: "
+ StringUtils.join(
+ ",", args));
+ return 1;
+ }
+ }
+
+ private int handleArgs(String pathStr, String inputStr, String outputStr,
String concurrencyStr)
+ throws IOException, ExecutionException, InterruptedException {
+ BufferedWriter writer = null;
+ try {
+ if (outputStr != null) {
+ File output = new File(outputStr);
+ writer = new BufferedWriter(new
OutputStreamWriter(Files.newOutputStream(output.toPath()),
+ StandardCharsets.UTF_8));
+ }
+
+ // -path takes priority over -input
+ if (pathStr != null) {
+ int result = handlePath(new Path(pathStr));
+ writeToOutput(writer, pathStr, result);
+ return result;
+ }
+
+ // -input must be defined by this point
+ File input = new File(inputStr);
+ if (!input.exists()) {
+ return 1;
+ }
+ BufferedReader reader = new BufferedReader(
+ new InputStreamReader(Files.newInputStream(input.toPath()),
StandardCharsets.UTF_8));
+ Set<Path> paths = new HashSet<>();
+ String line;
+ while ((line = reader.readLine()) != null) {
+ paths.add(new Path(line.trim()));
+ }
+ reader.close();
+ int concurrency = concurrencyStr == null ? 1 :
Integer.parseInt(concurrencyStr);
+ return handlePaths(paths, writer, concurrency);
+ } finally {
+ if (writer != null) {
+ writer.flush();
+ writer.close();
+ }
+ }
+ }
+
+ private void writeToOutput(BufferedWriter writer, String path, int result)
throws IOException {
Review Comment:
@ZanderXu I moved all the details to a separate `PathResult` class to make
it cleaner and changed `-suppressed` to `-verbose` to include this option. Not
sure if that's too much printing.
> Add a command to check if files are readable
> --------------------------------------------
>
> Key: HDFS-17475
> URL: https://issues.apache.org/jira/browse/HDFS-17475
> Project: Hadoop HDFS
> Issue Type: Improvement
> Components: hdfs
> Reporter: Felix N
> Assignee: Felix N
> Priority: Minor
> Labels: pull-request-available
>
> Sometimes a job can fail due to one unreadable file down the line due to
> missing replicas or dead DNs or other reason. This command should allow users
> to check whether files are readable by checking for metadata on DNs without
> executing full read pipelines of the files.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]