On 26/07/18 18:23, Paul Eggert wrote:
> Pádraig Brady wrote:
>> I've pushed the c_iscntrl patch since it's simplest
>> and probably most appropriate patch for an existing release.
> 
> Yes, that makes sense for a quick patch. However, for the next release I 
> think 
> it'd be better to catch encoding errors and multibyte control characters, 
> given 
> the problems noted. I installed the attached further patch to try to do this. 
> This fixes the problem that Bruno noted, along with two others; my earlier 
> patch 
> neglected the possibility that mbrtowc can return 0, and it incorrectly 
> assumed 
> wide control characters always have a single-byte representation.
> 
> Either way the original bug appears to be fix so I'm boldly closing the bug 
> report.

Reviewing this, I dislike the way that we're now enforcing that
the file system locale needs to match the current user's locale
or otherwise df will not output all original characters.
That has the potential to break scripts, as mismatched
encodings is a common issue.

In the attached I've taken the original less aggressive replacement
policy when not outputting to a tty, leaving more sanitizing to the tty case.

cheers,
Pádraig
>From 97bc0e17065950f96a6e1350d1ed8db65ebfee96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Sun, 3 Mar 2019 14:35:18 -0800
Subject: [PATCH] df: don't require file system and display encodings to match

* src/df.c (replace_problematic_chars): A new wrapper to be
more conservative in our replacement when not connected to a tty.
* tests/df/problematic-chars.sh: Add a test case.
---
 src/df.c                      | 34 +++++++++++++++++++++++++++++++---
 tests/df/problematic-chars.sh | 29 +++++++++++++++++++++++------
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/src/df.c b/src/df.c
index 1eb7bcd..041f282 100644
--- a/src/df.c
+++ b/src/df.c
@@ -23,6 +23,7 @@
 #include <sys/types.h>
 #include <getopt.h>
 #include <assert.h>
+#include <c-ctype.h>
 #include <wchar.h>
 #include <wctype.h>
 
@@ -273,10 +274,26 @@ static struct option const long_options[] =
   {NULL, 0, NULL, 0}
 };
 
+/* Replace problematic chars with '?'.
+   Since only control characters are currently considered,
+   this should work in all encodings.  */
+
+static void
+replace_control_chars (char *cell)
+{
+  char *p = cell;
+  while (*p)
+    {
+      if (c_iscntrl (to_uchar (*p)))
+        *p = '?';
+      p++;
+    }
+}
+
 /* Replace problematic chars with '?'.  */
 
 static void
-hide_problematic_chars (char *cell)
+replace_invalid_chars (char *cell)
 {
   char *srcend = cell + strlen (cell);
   char *dst = cell;
@@ -310,6 +327,17 @@ hide_problematic_chars (char *cell)
   *dst = '\0';
 }
 
+static void
+replace_problematic_chars (char *cell)
+{
+  static int tty_out = -1;
+  if (tty_out < 0)
+    tty_out = isatty (STDOUT_FILENO);
+
+  (tty_out ? replace_invalid_chars : replace_control_chars) (cell) ;
+}
+
+
 /* Dynamically allocate a row of pointers in TABLE, which
    can then be accessed with standard 2D array notation.  */
 
@@ -591,7 +619,7 @@ get_header (void)
       if (!cell)
         xalloc_die ();
 
-      hide_problematic_chars (cell);
+      replace_problematic_chars (cell);
 
       table[nrows - 1][col] = cell;
 
@@ -1205,7 +1233,7 @@ get_dev (char const *disk, char const *mount_point, char const* file,
       if (!cell)
         assert (!"empty cell");
 
-      hide_problematic_chars (cell);
+      replace_problematic_chars (cell);
       size_t cell_width = mbswidth (cell, 0);
       columns[col]->width = MAX (columns[col]->width, cell_width);
       table[nrows - 1][col] = cell;
diff --git a/tests/df/problematic-chars.sh b/tests/df/problematic-chars.sh
index 34e743b..aa4c131 100755
--- a/tests/df/problematic-chars.sh
+++ b/tests/df/problematic-chars.sh
@@ -17,14 +17,17 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
-print_ver_ df
+print_ver_ df printf
 require_root_
 
+
+# Ensure a new line in a mount point only outputs a single line
+
 mnt='mount
 point'
 
 cwd=$(pwd)
-cleanup_() { cd /; umount "$cwd/$mnt"; }
+cleanup_() { umount "$cwd/$mnt"; }
 
 skip=0
 # Create a file system, then mount it.
@@ -33,14 +36,28 @@ dd if=/dev/zero of=blob bs=8192 count=200 > /dev/null 2>&1 \
 mkdir "$mnt"                                 || skip=1
 mkfs -t ext2 -F blob \
   || skip_ "failed to create ext2 file system"
-
 mount -oloop blob "$mnt"                     || skip=1
-
 test $skip = 1 \
   && skip_ "insufficient mount/ext2 support"
-
 test $(df "$mnt" | wc -l) = 2 || fail=1
-
 test "$fail" = 1 && dump_mount_list_
 
+
+# Ensure mount points not matching the current user encoding are output
+
+unset LC_ALL
+f=$LOCALE_FR_UTF8
+: ${LOCALE_FR_UTF8=none}
+if test "$LOCALE_FR_UTF8" != "none"; then
+
+  cleanup_ || framework_failure_
+
+  mnt="$(env printf 'm\xf3unt p\xf3int')"
+  mkdir "$mnt" || framework_failure_
+  mount -oloop blob "$mnt" || skip_ "unable to mount $mnt"
+
+  LC_ALL=$f df --output=target "$mnt" > df.out || fail=1
+  test "$(basename "$(tail -n1 df.out)")" = "$mnt" || fail=1
+fi
+
 Exit $fail
-- 
2.9.3

Reply via email to