HADOOP-13079. Add -q option to Ls to print ? instead of non-printable characters. Contributed by John Zhuge.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0accc330 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0accc330 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0accc330 Branch: refs/heads/HDFS-7240 Commit: 0accc3306d830c3f2b16c4b8abf68729c7aba6cb Parents: 28b66ae Author: Andrew Wang <w...@apache.org> Authored: Mon Jun 13 11:43:46 2016 -0700 Committer: Andrew Wang <w...@apache.org> Committed: Mon Jun 13 11:43:46 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/fs/shell/Ls.java | 28 +++++-- .../apache/hadoop/fs/shell/PrintableString.java | 72 ++++++++++++++++ .../src/site/markdown/FileSystemShell.md | 3 +- .../org/apache/hadoop/fs/TestFsShellList.java | 78 ++++++++++++++++++ .../hadoop/fs/shell/TestPrintableString.java | 87 ++++++++++++++++++++ .../apache/hadoop/fs/shell/package-info.java | 26 ++++++ .../src/test/resources/testConf.xml | 6 +- 7 files changed, 291 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java index 67348c6..47e87f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java @@ -48,6 +48,7 @@ class Ls extends FsCommand { private static final String OPTION_PATHONLY = "C"; private static final String OPTION_DIRECTORY = "d"; private static final String OPTION_HUMAN = "h"; + private static final String OPTION_HIDENONPRINTABLE = "q"; private static final String OPTION_RECURSIVE = "R"; private static final String OPTION_REVERSE = "r"; private static final String OPTION_MTIME = "t"; @@ -55,10 +56,11 @@ class Ls extends FsCommand { private static final String OPTION_SIZE = "S"; public static final String NAME = "ls"; - public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-" - + OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" + OPTION_RECURSIVE - + "] [-" + OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE - + "] [-" + OPTION_ATIME + "] [<path> ...]"; + public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-" + + OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" + + OPTION_HIDENONPRINTABLE + "] [-" + OPTION_RECURSIVE + "] [-" + + OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] [-" + + OPTION_ATIME + "] [<path> ...]"; public static final String DESCRIPTION = "List the contents that match the specified file pattern. If " + @@ -77,6 +79,8 @@ class Ls extends FsCommand { " -" + OPTION_HUMAN + " Formats the sizes of files in a human-readable fashion\n" + " rather than a number of bytes.\n" + + " -" + OPTION_HIDENONPRINTABLE + + " Print ? instead of non-printable characters.\n" + " -" + OPTION_RECURSIVE + " Recursively list the contents of directories.\n" + " -" + OPTION_MTIME + @@ -104,6 +108,9 @@ class Ls extends FsCommand { protected boolean humanReadable = false; + /** Whether to print ? instead of non-printable characters. */ + private boolean hideNonPrintable = false; + protected Ls() {} protected Ls(Configuration conf) { @@ -119,14 +126,16 @@ class Ls extends FsCommand { @Override protected void processOptions(LinkedList<String> args) throws IOException { - CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, OPTION_PATHONLY, - OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE, + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, + OPTION_PATHONLY, OPTION_DIRECTORY, OPTION_HUMAN, + OPTION_HIDENONPRINTABLE, OPTION_RECURSIVE, OPTION_REVERSE, OPTION_MTIME, OPTION_SIZE, OPTION_ATIME); cf.parse(args); pathOnly = cf.getOpt(OPTION_PATHONLY); dirRecurse = !cf.getOpt(OPTION_DIRECTORY); setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse); humanReadable = cf.getOpt(OPTION_HUMAN); + hideNonPrintable = cf.getOpt(OPTION_HIDENONPRINTABLE); orderReverse = cf.getOpt(OPTION_REVERSE); orderTime = cf.getOpt(OPTION_MTIME); orderSize = !orderTime && cf.getOpt(OPTION_SIZE); @@ -163,6 +172,11 @@ class Ls extends FsCommand { return this.humanReadable; } + @InterfaceAudience.Private + private boolean isHideNonPrintable() { + return hideNonPrintable; + } + /** * Should directory contents be displayed in reverse order * @return true reverse order, false default order @@ -241,7 +255,7 @@ class Ls extends FsCommand { dateFormat.format(new Date(isUseAtime() ? stat.getAccessTime() : stat.getModificationTime())), - item); + isHideNonPrintable() ? new PrintableString(item.toString()) : item); out.println(line); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java new file mode 100644 index 0000000..df68f40 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.shell; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The {code PrintableString} class converts any string to a printable string + * by replacing non-printable characters with ?. + * + * Categories of Unicode non-printable characters: + * <ul> + * <li> Control characters (Cc) + * <li> Formatting Unicode (Cf) + * <li> Private use Unicode (Co) + * <li> Unassigned Unicode (Cn) + * <li> Standalone surrogate (Unfortunately no matching Unicode category) + * </ul> + * + * @see Character + * @see <a href="http://www.unicode.org/">The Unicode Consortium</a> + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +class PrintableString { + private static final char REPLACEMENT_CHAR = '?'; + + private final String printableString; + + PrintableString(String rawString) { + StringBuilder stringBuilder = new StringBuilder(rawString.length()); + for (int offset = 0; offset < rawString.length();) { + int codePoint = rawString.codePointAt(offset); + offset += Character.charCount(codePoint); + + switch (Character.getType(codePoint)) { + case Character.CONTROL: // Cc + case Character.FORMAT: // Cf + case Character.PRIVATE_USE: // Co + case Character.SURROGATE: // Cs + case Character.UNASSIGNED: // Cn + stringBuilder.append(REPLACEMENT_CHAR); + break; + default: + stringBuilder.append(Character.toChars(codePoint)); + break; + } + } + printableString = stringBuilder.toString(); + } + + public String toString() { + return printableString; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md index 5790bb7..1723426 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md @@ -384,13 +384,14 @@ Return usage output. ls ---- -Usage: `hadoop fs -ls [-C] [-d] [-h] [-R] [-t] [-S] [-r] [-u] <args> ` +Usage: `hadoop fs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] <args> ` Options: * -C: Display the paths of files and directories only. * -d: Directories are listed as plain files. * -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 67108864). +* -q: Print ? instead of non-printable characters. * -R: Recursively list subdirectories encountered. * -t: Sort output by modification time (most recent first). * -S: Sort output by file size. http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java new file mode 100644 index 0000000..03720d3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; + +import org.apache.hadoop.conf.Configuration; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test FsShell -ls command. + */ +public class TestFsShellList { + private static Configuration conf; + private static FsShell shell; + private static LocalFileSystem lfs; + private static Path testRootDir; + + @BeforeClass + public static void setup() throws Exception { + conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + lfs.setVerifyChecksum(true); + lfs.setWriteChecksum(true); + + String root = System.getProperty("test.build.data", "test/build/data"); + testRootDir = lfs.makeQualified(new Path(root, "testFsShellList")); + assertThat(lfs.mkdirs(testRootDir), is(true)); + } + + @AfterClass + public static void teardown() throws Exception { + lfs.delete(testRootDir, true); + } + + private void createFile(Path filePath) throws Exception { + FSDataOutputStream out = lfs.create(filePath); + out.writeChars("I am " + filePath); + out.close(); + assertThat(lfs.exists(lfs.getChecksumFile(filePath)), is(true)); + } + + @Test + public void testList() throws Exception { + createFile(new Path(testRootDir, "abc")); + String[] lsArgv = new String[]{"-ls", testRootDir.toString()}; + assertThat(shell.run(lsArgv), is(0)); + + createFile(new Path(testRootDir, "abc\bd\tef")); + createFile(new Path(testRootDir, "ghi")); + createFile(new Path(testRootDir, "qq\r123")); + lsArgv = new String[]{"-ls", testRootDir.toString()}; + assertThat(shell.run(lsArgv), is(0)); + + lsArgv = new String[]{"-ls", "-q", testRootDir.toString()}; + assertThat(shell.run(lsArgv), is(0)); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java new file mode 100644 index 0000000..8e09fc2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.shell; + +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import org.junit.Test; + +/** + * Test {@code PrintableString} class. + */ +public class TestPrintableString { + + private void expect(String reason, String raw, String expected) { + assertThat(reason, new PrintableString(raw).toString(), is(expected)); + } + + /** + * Test printable characters. + */ + @Test + public void testPrintableCharacters() throws Exception { + // ASCII + expect("Should keep ASCII letter", "abcdef237", "abcdef237"); + expect("Should keep ASCII symbol", " !\"|}~", " !\"|}~"); + + // Unicode BMP + expect("Should keep Georgian U+1050 and Box Drawing U+2533", + "\u1050\u2533--", "\u1050\u2533--"); + + // Unicode SMP + expect("Should keep Linear B U+10000 and Phoenician U+10900", + "\uD800\uDC00'''\uD802\uDD00", "\uD800\uDC00'''\uD802\uDD00"); + } + + /** + * Test non-printable characters. + */ + @Test + public void testNonPrintableCharacters() throws Exception { + // Control characters + expect("Should replace single control character", "abc\rdef", "abc?def"); + expect("Should replace multiple control characters", + "\babc\tdef", "?abc?def"); + expect("Should replace all control characters", "\f\f\b\n", "????"); + expect("Should replace mixed characters starting with a control", + "\027ab\0", "?ab?"); + + // Formatting Unicode + expect("Should replace Byte Order Mark", "-\uFEFF--", "-?--"); + expect("Should replace Invisible Separator", "\u2063\t", "??"); + + // Private use Unicode + expect("Should replace private use U+E000", "\uE000", "?"); + expect("Should replace private use U+E123 and U+F432", + "\uE123abc\uF432", "?abc?"); + expect("Should replace private use in Plane 15 and 16: U+F0000 and " + + "U+10FFFD, but keep U+1050", + "x\uDB80\uDC00y\uDBFF\uDFFDz\u1050", "x?y?z\u1050"); + + // Unassigned Unicode + expect("Should replace unassigned U+30000 and U+DFFFF", + "-\uD880\uDC00-\uDB3F\uDFFF-", "-?-?-"); + + // Standalone surrogate character (not in a pair) + expect("Should replace standalone surrogate U+DB80", "x\uDB80yz", "x?yz"); + expect("Should replace standalone surrogate mixed with valid pair", + "x\uDB80\uD802\uDD00yz", "x?\uD802\uDD00yz"); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java new file mode 100644 index 0000000..47a4e7a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Package for {@code org.apache.hadoop.fs.shell} test classes. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.shell; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index 3ad6d66..bbbc1ec 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -54,7 +54,7 @@ <comparators> <comparator> <type>RegexpComparator</type> - <expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[<path> \.\.\.\] :( |\t)*</expected-output> + <expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-q\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[<path> \.\.\.\] :( |\t)*</expected-output> </comparator> <comparator> <type>RegexpComparator</type> @@ -106,6 +106,10 @@ </comparator> <comparator> <type>RegexpComparator</type> + <expected-output>^\s*-q\s+Print \? instead of non-printable characters\.( )*</expected-output> + </comparator> + <comparator> + <type>RegexpComparator</type> <expected-output>^\s*rather than a number of bytes\.( )*</expected-output> </comparator> <comparator> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org