There are some display glitches when the UTF-8 locale is used.
So far I know about hexdump -C (reported by naddy) and tcpdump -X.
Both print invalid UTF-8 to the screen.

The problem is that latin1 characters end up being printed by applications
that use ctype(3) functions like isprint(3). Any latin1 characters that
are not ASCII aren't valid UTF-8, so they shouldn't be considered
printable if the UTF-8 locale is active. However, isprint(3) currently
returns non-zero for them in the UTF-8 locale.

The same problem has been fixed in FreeBSD some time ago,
albeit with a much more elaborate diff:
http://svn.freebsd.org/viewvc/base?view=revision&revision=172619

Once this is fixed, fixing display glitches is as simple as calling
setlocale() from affected applications so that the ctype tab is
initialized correctly, as done below for hexdump and tcpdump.

Note that tcpdump needs to call setlocale() *before* dropping privs
because it won't find the /usr/share/locale definition files after chroot().

While here, kill some dead code in __make_ctype_tabs().

It's probably not correct for the "C" locale either to consider any
non-ASCII characters printable, but that's another story.

Index: lib/libc/locale/runeglue.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/runeglue.c,v
retrieving revision 1.1
diff -u -p -r1.1 runeglue.c
--- lib/libc/locale/runeglue.c  7 Aug 2005 10:16:24 -0000       1.1
+++ lib/libc/locale/runeglue.c  15 Jan 2011 15:36:08 -0000
@@ -58,19 +58,29 @@
 int
 __make_ctype_tabs(_RuneLocale *rl)
 {
-       int i;
+       int i, max_sb_limit;
        struct old_tabs *p;
 
        p = malloc(sizeof *p);
        if (!p)
                return -1;
 
+       /* By default, fill the ctype tab completely. */
+       max_sb_limit = CTYPE_NUM_CHARS;
+
+       /* In UTF-8-encoded locales, the single-byte ctype functions
+        * must only return non-zero values for ASCII characters.
+        * Any non-ASCII single-byte character is not a valid UTF-8 sequence.
+        */
+       if (strcmp(rl->rl_encoding, "UTF8") == 0)
+               max_sb_limit = 128;
+
        rl->rl_tabs = p;
        p->ctype_tab[0] = 0;
        p->toupper_tab[0] = EOF;
        p->tolower_tab[0] = EOF;
-       for (i = 0; i < CTYPE_NUM_CHARS; i++) {
-               p->ctype_tab[i + 1]=0;
+       for (i = 0; i < max_sb_limit; i++) {
+               p->ctype_tab[i + 1] = 0;
                if (rl->rl_runetype[i] & _CTYPE_U)
                        p->ctype_tab[i + 1] |= _U;
                if (rl->rl_runetype[i] & _CTYPE_L)
@@ -86,23 +96,22 @@ __make_ctype_tabs(_RuneLocale *rl)
                if (rl->rl_runetype[i] & _CTYPE_X)
                        p->ctype_tab[i + 1] |= _X;
                /*
-                * TWEAK!  _B has been used incorrectly (or with older
-                * declaration) in ctype.h isprint() macro.
+                * _B has been used incorrectly (or with older declaration)
+                * in ctype.h isprint() macro.
                 * _B does not mean isblank, it means "isprint && !isgraph".
                 * the following is okay since isblank() was hardcoded in
                 * function (i.e. isblank() is inherently locale unfriendly).
                 */
-#if 1
                if ((rl->rl_runetype[i] & (_CTYPE_R | _CTYPE_G))
                    == _CTYPE_R)
                        p->ctype_tab[i + 1] |= _B;
-#else
-               if (rl->rl_runetype[i] & _CTYPE_B)
-                       p->ctype_tab[i + 1] |= _B;
-#endif
+
                p->toupper_tab[i + 1] = (short)rl->rl_mapupper[i];
                p->tolower_tab[i + 1] = (short)rl->rl_maplower[i];
        }
+       for (i = max_sb_limit; i < CTYPE_NUM_CHARS; i++)
+               p->ctype_tab[i + 1] = 0;
+
        return 0;
 }
 
Index: usr.bin/hexdump/hexdump.c
===================================================================
RCS file: /cvs/src/usr.bin/hexdump/hexdump.c,v
retrieving revision 1.14
diff -u -p -r1.14 hexdump.c
--- usr.bin/hexdump/hexdump.c   12 Oct 2010 17:23:21 -0000      1.14
+++ usr.bin/hexdump/hexdump.c   15 Jan 2011 15:38:19 -0000
@@ -32,6 +32,7 @@
 
 #include <sys/param.h>
 #include <err.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -73,6 +74,7 @@ main(int argc, char *argv[])
                rewrite(tfs);
 
        (void)next(argv);
+       (void)setlocale(LC_CTYPE, "");
        display();
        exit(exitval);
 }
Index: usr.sbin/tcpdump/privsep.c
===================================================================
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
retrieving revision 1.28
diff -u -p -r1.28 privsep.c
--- usr.sbin/tcpdump/privsep.c  17 Apr 2009 22:31:24 -0000      1.28
+++ usr.sbin/tcpdump/privsep.c  5 Mar 2011 00:23:55 -0000
@@ -32,6 +32,7 @@
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <locale.h>
 #include <netdb.h>
 #include <paths.h>
 #include <pwd.h>
@@ -161,6 +162,9 @@ priv_init(int argc, char **argv)
                pw = getpwnam("_tcpdump");
                if (pw == NULL)
                        errx(1, "unknown user _tcpdump");
+
+               /* set the locale before chrooting */
+               (void)setlocale(LC_CTYPE, "");
 
                /* chroot, drop privs and return */
                if (chroot(pw->pw_dir) != 0)

Reply via email to