HADOOP-13079. Add -q option to Ls to print ? instead of non-printable 
characters. Contributed by John Zhuge.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0accc330
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0accc330
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0accc330

Branch: refs/heads/HDFS-7240
Commit: 0accc3306d830c3f2b16c4b8abf68729c7aba6cb
Parents: 28b66ae
Author: Andrew Wang <w...@apache.org>
Authored: Mon Jun 13 11:43:46 2016 -0700
Committer: Andrew Wang <w...@apache.org>
Committed: Mon Jun 13 11:43:46 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/fs/shell/Ls.java     | 28 +++++--
 .../apache/hadoop/fs/shell/PrintableString.java | 72 ++++++++++++++++
 .../src/site/markdown/FileSystemShell.md        |  3 +-
 .../org/apache/hadoop/fs/TestFsShellList.java   | 78 ++++++++++++++++++
 .../hadoop/fs/shell/TestPrintableString.java    | 87 ++++++++++++++++++++
 .../apache/hadoop/fs/shell/package-info.java    | 26 ++++++
 .../src/test/resources/testConf.xml             |  6 +-
 7 files changed, 291 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
index 67348c6..47e87f5 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
@@ -48,6 +48,7 @@ class Ls extends FsCommand {
   private static final String OPTION_PATHONLY = "C";
   private static final String OPTION_DIRECTORY = "d";
   private static final String OPTION_HUMAN = "h";
+  private static final String OPTION_HIDENONPRINTABLE = "q";
   private static final String OPTION_RECURSIVE = "R";
   private static final String OPTION_REVERSE = "r";
   private static final String OPTION_MTIME = "t";
@@ -55,10 +56,11 @@ class Ls extends FsCommand {
   private static final String OPTION_SIZE = "S";
 
   public static final String NAME = "ls";
-  public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-"
-      + OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" + OPTION_RECURSIVE
-      + "] [-" + OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE
-      + "] [-" + OPTION_ATIME + "] [<path> ...]";
+  public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-" +
+      OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" +
+      OPTION_HIDENONPRINTABLE + "] [-" + OPTION_RECURSIVE + "] [-" +
+      OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] [-" +
+      OPTION_ATIME + "] [<path> ...]";
 
   public static final String DESCRIPTION =
       "List the contents that match the specified file pattern. If " +
@@ -77,6 +79,8 @@ class Ls extends FsCommand {
           "  -" + OPTION_HUMAN +
           "  Formats the sizes of files in a human-readable fashion\n" +
           "      rather than a number of bytes.\n" +
+          "  -" + OPTION_HIDENONPRINTABLE +
+          "  Print ? instead of non-printable characters.\n" +
           "  -" + OPTION_RECURSIVE +
           "  Recursively list the contents of directories.\n" +
           "  -" + OPTION_MTIME +
@@ -104,6 +108,9 @@ class Ls extends FsCommand {
 
   protected boolean humanReadable = false;
 
+  /** Whether to print ? instead of non-printable characters. */
+  private boolean hideNonPrintable = false;
+
   protected Ls() {}
 
   protected Ls(Configuration conf) {
@@ -119,14 +126,16 @@ class Ls extends FsCommand {
   @Override
   protected void processOptions(LinkedList<String> args)
   throws IOException {
-    CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, OPTION_PATHONLY,
-        OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE,
+    CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE,
+        OPTION_PATHONLY, OPTION_DIRECTORY, OPTION_HUMAN,
+        OPTION_HIDENONPRINTABLE, OPTION_RECURSIVE, OPTION_REVERSE,
         OPTION_MTIME, OPTION_SIZE, OPTION_ATIME);
     cf.parse(args);
     pathOnly = cf.getOpt(OPTION_PATHONLY);
     dirRecurse = !cf.getOpt(OPTION_DIRECTORY);
     setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse);
     humanReadable = cf.getOpt(OPTION_HUMAN);
+    hideNonPrintable = cf.getOpt(OPTION_HIDENONPRINTABLE);
     orderReverse = cf.getOpt(OPTION_REVERSE);
     orderTime = cf.getOpt(OPTION_MTIME);
     orderSize = !orderTime && cf.getOpt(OPTION_SIZE);
@@ -163,6 +172,11 @@ class Ls extends FsCommand {
     return this.humanReadable;
   }
 
+  @InterfaceAudience.Private
+  private boolean isHideNonPrintable() {
+    return hideNonPrintable;
+  }
+
   /**
    * Should directory contents be displayed in reverse order
    * @return true reverse order, false default order
@@ -241,7 +255,7 @@ class Ls extends FsCommand {
         dateFormat.format(new Date(isUseAtime()
             ? stat.getAccessTime()
             : stat.getModificationTime())),
-        item);
+        isHideNonPrintable() ? new PrintableString(item.toString()) : item);
     out.println(line);
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java
new file mode 100644
index 0000000..df68f40
--- /dev/null
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PrintableString.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.shell;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * The {code PrintableString} class converts any string to a printable string
+ * by replacing non-printable characters with ?.
+ *
+ * Categories of Unicode non-printable characters:
+ * <ul>
+ * <li> Control characters   (Cc)
+ * <li> Formatting Unicode   (Cf)
+ * <li> Private use Unicode  (Co)
+ * <li> Unassigned Unicode   (Cn)
+ * <li> Standalone surrogate (Unfortunately no matching Unicode category)
+ * </ul>
+ *
+ * @see Character
+ * @see <a href="http://www.unicode.org/";>The Unicode Consortium</a>
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+class PrintableString {
+  private static final char REPLACEMENT_CHAR = '?';
+
+  private final String printableString;
+
+  PrintableString(String rawString) {
+    StringBuilder stringBuilder = new StringBuilder(rawString.length());
+    for (int offset = 0; offset < rawString.length();) {
+      int codePoint = rawString.codePointAt(offset);
+      offset += Character.charCount(codePoint);
+
+      switch (Character.getType(codePoint)) {
+      case Character.CONTROL:     // Cc
+      case Character.FORMAT:      // Cf
+      case Character.PRIVATE_USE: // Co
+      case Character.SURROGATE:   // Cs
+      case Character.UNASSIGNED:  // Cn
+        stringBuilder.append(REPLACEMENT_CHAR);
+        break;
+      default:
+        stringBuilder.append(Character.toChars(codePoint));
+        break;
+      }
+    }
+    printableString = stringBuilder.toString();
+  }
+
+  public String toString() {
+    return printableString;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md 
b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
index 5790bb7..1723426 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
@@ -384,13 +384,14 @@ Return usage output.
 ls
 ----
 
-Usage: `hadoop fs -ls [-C] [-d] [-h] [-R] [-t] [-S] [-r] [-u] <args> `
+Usage: `hadoop fs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] <args> `
 
 Options:
 
 * -C: Display the paths of files and directories only.
 * -d: Directories are listed as plain files.
 * -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 
67108864).
+* -q: Print ? instead of non-printable characters.
 * -R: Recursively list subdirectories encountered.
 * -t: Sort output by modification time (most recent first).
 * -S: Sort output by file size.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java
new file mode 100644
index 0000000..03720d3
--- /dev/null
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellList.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test FsShell -ls command.
+ */
+public class TestFsShellList {
+  private static Configuration conf;
+  private static FsShell shell;
+  private static LocalFileSystem lfs;
+  private static Path testRootDir;
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    conf = new Configuration();
+    shell = new FsShell(conf);
+    lfs = FileSystem.getLocal(conf);
+    lfs.setVerifyChecksum(true);
+    lfs.setWriteChecksum(true);
+
+    String root = System.getProperty("test.build.data", "test/build/data");
+    testRootDir = lfs.makeQualified(new Path(root, "testFsShellList"));
+    assertThat(lfs.mkdirs(testRootDir), is(true));
+  }
+
+  @AfterClass
+  public static void teardown() throws Exception {
+    lfs.delete(testRootDir, true);
+  }
+
+  private void createFile(Path filePath) throws Exception {
+    FSDataOutputStream out = lfs.create(filePath);
+    out.writeChars("I am " + filePath);
+    out.close();
+    assertThat(lfs.exists(lfs.getChecksumFile(filePath)), is(true));
+  }
+
+  @Test
+  public void testList() throws Exception {
+    createFile(new Path(testRootDir, "abc"));
+    String[] lsArgv = new String[]{"-ls", testRootDir.toString()};
+    assertThat(shell.run(lsArgv), is(0));
+
+    createFile(new Path(testRootDir, "abc\bd\tef"));
+    createFile(new Path(testRootDir, "ghi"));
+    createFile(new Path(testRootDir, "qq\r123"));
+    lsArgv = new String[]{"-ls", testRootDir.toString()};
+    assertThat(shell.run(lsArgv), is(0));
+
+    lsArgv = new String[]{"-ls", "-q", testRootDir.toString()};
+    assertThat(shell.run(lsArgv), is(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java
new file mode 100644
index 0000000..8e09fc2
--- /dev/null
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.shell;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+
+/**
+ * Test {@code PrintableString} class.
+ */
+public class TestPrintableString {
+
+  private void expect(String reason, String raw, String expected) {
+    assertThat(reason, new PrintableString(raw).toString(), is(expected));
+  }
+
+  /**
+   * Test printable characters.
+   */
+  @Test
+  public void testPrintableCharacters() throws Exception {
+    // ASCII
+    expect("Should keep ASCII letter", "abcdef237", "abcdef237");
+    expect("Should keep ASCII symbol", " !\"|}~", " !\"|}~");
+
+    // Unicode BMP
+    expect("Should keep Georgian U+1050 and Box Drawing U+2533",
+        "\u1050\u2533--", "\u1050\u2533--");
+
+    // Unicode SMP
+    expect("Should keep Linear B U+10000 and Phoenician U+10900",
+        "\uD800\uDC00'''\uD802\uDD00", "\uD800\uDC00'''\uD802\uDD00");
+  }
+
+  /**
+   * Test non-printable characters.
+   */
+  @Test
+  public void testNonPrintableCharacters() throws Exception {
+    // Control characters
+    expect("Should replace single control character", "abc\rdef", "abc?def");
+    expect("Should replace multiple control characters",
+        "\babc\tdef", "?abc?def");
+    expect("Should replace all control characters", "\f\f\b\n", "????");
+    expect("Should replace mixed characters starting with a control",
+        "\027ab\0", "?ab?");
+
+    // Formatting Unicode
+    expect("Should replace Byte Order Mark", "-\uFEFF--", "-?--");
+    expect("Should replace Invisible Separator", "\u2063\t", "??");
+
+    // Private use Unicode
+    expect("Should replace private use U+E000", "\uE000", "?");
+    expect("Should replace private use U+E123 and U+F432",
+        "\uE123abc\uF432", "?abc?");
+    expect("Should replace private use in Plane 15 and 16: U+F0000 and " +
+        "U+10FFFD, but keep U+1050",
+        "x\uDB80\uDC00y\uDBFF\uDFFDz\u1050", "x?y?z\u1050");
+
+    // Unassigned Unicode
+    expect("Should replace unassigned U+30000 and U+DFFFF",
+        "-\uD880\uDC00-\uDB3F\uDFFF-", "-?-?-");
+
+    // Standalone surrogate character (not in a pair)
+    expect("Should replace standalone surrogate U+DB80", "x\uDB80yz", "x?yz");
+    expect("Should replace standalone surrogate mixed with valid pair",
+        "x\uDB80\uD802\uDD00yz", "x?\uD802\uDD00yz");
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java
new file mode 100644
index 0000000..47a4e7a
--- /dev/null
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Package for {@code org.apache.hadoop.fs.shell} test classes.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+package org.apache.hadoop.fs.shell;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0accc330/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml 
b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
index 3ad6d66..bbbc1ec 100644
--- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
@@ -54,7 +54,7 @@
       <comparators>
         <comparator>
           <type>RegexpComparator</type>
-          <expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] 
\[-r\] \[-u\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
+          <expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-q\] \[-R\] \[-t\] 
\[-S\] \[-r\] \[-u\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
         </comparator>
         <comparator>
           <type>RegexpComparator</type>
@@ -106,6 +106,10 @@
         </comparator>
         <comparator>
           <type>RegexpComparator</type>
+          <expected-output>^\s*-q\s+Print \? instead of non-printable 
characters\.( )*</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
           <expected-output>^\s*rather than a number of bytes\.( 
)*</expected-output>
         </comparator>
         <comparator>


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to