On Sat, Mar 05, 2011 at 01:45:40AM +0100, Stefan Sperling wrote: > There are some display glitches when the UTF-8 locale is used. > So far I know about hexdump -C (reported by naddy) and tcpdump -X. > Both print invalid UTF-8 to the screen. > > The problem is that latin1 characters end up being printed by applications > that use ctype(3) functions like isprint(3). Any latin1 characters that > are not ASCII aren't valid UTF-8, so they shouldn't be considered > printable if the UTF-8 locale is active. However, isprint(3) currently > returns non-zero for them in the UTF-8 locale.
No feedback yet. Anyone? > The same problem has been fixed in FreeBSD some time ago, > albeit with a much more elaborate diff: > http://svn.freebsd.org/viewvc/base?view=revision&revision=172619 > > Once this is fixed, fixing display glitches is as simple as calling > setlocale() from affected applications so that the ctype tab is > initialized correctly, as done below for hexdump and tcpdump. > > Note that tcpdump needs to call setlocale() *before* dropping privs > because it won't find the /usr/share/locale definition files after chroot(). > > While here, kill some dead code in __make_ctype_tabs(). > > It's probably not correct for the "C" locale either to consider any > non-ASCII characters printable, but that's another story. > > Index: lib/libc/locale/runeglue.c > =================================================================== > RCS file: /cvs/src/lib/libc/locale/runeglue.c,v > retrieving revision 1.1 > diff -u -p -r1.1 runeglue.c > --- lib/libc/locale/runeglue.c 7 Aug 2005 10:16:24 -0000 1.1 > +++ lib/libc/locale/runeglue.c 15 Jan 2011 15:36:08 -0000 > @@ -58,19 +58,29 @@ > int > __make_ctype_tabs(_RuneLocale *rl) > { > - int i; > + int i, max_sb_limit; > struct old_tabs *p; > > p = malloc(sizeof *p); > if (!p) > return -1; > > + /* By default, fill the ctype tab completely. */ > + max_sb_limit = CTYPE_NUM_CHARS; > + > + /* In UTF-8-encoded locales, the single-byte ctype functions > + * must only return non-zero values for ASCII characters. > + * Any non-ASCII single-byte character is not a valid UTF-8 sequence. > + */ > + if (strcmp(rl->rl_encoding, "UTF8") == 0) > + max_sb_limit = 128; > + > rl->rl_tabs = p; > p->ctype_tab[0] = 0; > p->toupper_tab[0] = EOF; > p->tolower_tab[0] = EOF; > - for (i = 0; i < CTYPE_NUM_CHARS; i++) { > - p->ctype_tab[i + 1]=0; > + for (i = 0; i < max_sb_limit; i++) { > + p->ctype_tab[i + 1] = 0; > if (rl->rl_runetype[i] & _CTYPE_U) > p->ctype_tab[i + 1] |= _U; > if (rl->rl_runetype[i] & _CTYPE_L) > @@ -86,23 +96,22 @@ __make_ctype_tabs(_RuneLocale *rl) > if (rl->rl_runetype[i] & _CTYPE_X) > p->ctype_tab[i + 1] |= _X; > /* > - * TWEAK! _B has been used incorrectly (or with older > - * declaration) in ctype.h isprint() macro. > + * _B has been used incorrectly (or with older declaration) > + * in ctype.h isprint() macro. > * _B does not mean isblank, it means "isprint && !isgraph". > * the following is okay since isblank() was hardcoded in > * function (i.e. isblank() is inherently locale unfriendly). > */ > -#if 1 > if ((rl->rl_runetype[i] & (_CTYPE_R | _CTYPE_G)) > == _CTYPE_R) > p->ctype_tab[i + 1] |= _B; > -#else > - if (rl->rl_runetype[i] & _CTYPE_B) > - p->ctype_tab[i + 1] |= _B; > -#endif > + > p->toupper_tab[i + 1] = (short)rl->rl_mapupper[i]; > p->tolower_tab[i + 1] = (short)rl->rl_maplower[i]; > } > + for (i = max_sb_limit; i < CTYPE_NUM_CHARS; i++) > + p->ctype_tab[i + 1] = 0; > + > return 0; > } > > Index: usr.bin/hexdump/hexdump.c > =================================================================== > RCS file: /cvs/src/usr.bin/hexdump/hexdump.c,v > retrieving revision 1.14 > diff -u -p -r1.14 hexdump.c > --- usr.bin/hexdump/hexdump.c 12 Oct 2010 17:23:21 -0000 1.14 > +++ usr.bin/hexdump/hexdump.c 15 Jan 2011 15:38:19 -0000 > @@ -32,6 +32,7 @@ > > #include <sys/param.h> > #include <err.h> > +#include <locale.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > @@ -73,6 +74,7 @@ main(int argc, char *argv[]) > rewrite(tfs); > > (void)next(argv); > + (void)setlocale(LC_CTYPE, ""); > display(); > exit(exitval); > } > Index: usr.sbin/tcpdump/privsep.c > =================================================================== > RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v > retrieving revision 1.28 > diff -u -p -r1.28 privsep.c > --- usr.sbin/tcpdump/privsep.c 17 Apr 2009 22:31:24 -0000 1.28 > +++ usr.sbin/tcpdump/privsep.c 5 Mar 2011 00:23:55 -0000 > @@ -32,6 +32,7 @@ > #include <err.h> > #include <errno.h> > #include <fcntl.h> > +#include <locale.h> > #include <netdb.h> > #include <paths.h> > #include <pwd.h> > @@ -161,6 +162,9 @@ priv_init(int argc, char **argv) > pw = getpwnam("_tcpdump"); > if (pw == NULL) > errx(1, "unknown user _tcpdump"); > + > + /* set the locale before chrooting */ > + (void)setlocale(LC_CTYPE, ""); > > /* chroot, drop privs and return */ > if (chroot(pw->pw_dir) != 0)