Hi These look fine to me.
On Sun, Apr 03, 2011 at 03:57:40PM +0200, Stefan Sperling wrote: > On Sat, Mar 05, 2011 at 01:45:40AM +0100, Stefan Sperling wrote: > > There are some display glitches when the UTF-8 locale is used. > > So far I know about hexdump -C (reported by naddy) and tcpdump -X. > > Both print invalid UTF-8 to the screen. > > > > The problem is that latin1 characters end up being printed by applications > > that use ctype(3) functions like isprint(3). Any latin1 characters that > > are not ASCII aren't valid UTF-8, so they shouldn't be considered > > printable if the UTF-8 locale is active. However, isprint(3) currently > > returns non-zero for them in the UTF-8 locale. > > No feedback yet. Anyone? > > > The same problem has been fixed in FreeBSD some time ago, > > albeit with a much more elaborate diff: > > http://svn.freebsd.org/viewvc/base?view=revision&revision=172619 > > > > Once this is fixed, fixing display glitches is as simple as calling > > setlocale() from affected applications so that the ctype tab is > > initialized correctly, as done below for hexdump and tcpdump. > > > > Note that tcpdump needs to call setlocale() *before* dropping privs > > because it won't find the /usr/share/locale definition files after chroot(). > > > > While here, kill some dead code in __make_ctype_tabs(). > > > > It's probably not correct for the "C" locale either to consider any > > non-ASCII characters printable, but that's another story. > > > > Index: lib/libc/locale/runeglue.c > > =================================================================== > > RCS file: /cvs/src/lib/libc/locale/runeglue.c,v > > retrieving revision 1.1 > > diff -u -p -r1.1 runeglue.c > > --- lib/libc/locale/runeglue.c 7 Aug 2005 10:16:24 -0000 1.1 > > +++ lib/libc/locale/runeglue.c 15 Jan 2011 15:36:08 -0000 > > @@ -58,19 +58,29 @@ > > int > > __make_ctype_tabs(_RuneLocale *rl) > > { > > - int i; > > + int i, max_sb_limit; > > struct old_tabs *p; > > > > p = malloc(sizeof *p); > > if (!p) > > return -1; > > > > + /* By default, fill the ctype tab completely. */ > > + max_sb_limit = CTYPE_NUM_CHARS; > > + > > + /* In UTF-8-encoded locales, the single-byte ctype functions > > + * must only return non-zero values for ASCII characters. > > + * Any non-ASCII single-byte character is not a valid UTF-8 sequence. > > + */ > > + if (strcmp(rl->rl_encoding, "UTF8") == 0) > > + max_sb_limit = 128; > > + > > rl->rl_tabs = p; > > p->ctype_tab[0] = 0; > > p->toupper_tab[0] = EOF; > > p->tolower_tab[0] = EOF; > > - for (i = 0; i < CTYPE_NUM_CHARS; i++) { > > - p->ctype_tab[i + 1]=0; > > + for (i = 0; i < max_sb_limit; i++) { > > + p->ctype_tab[i + 1] = 0; > > if (rl->rl_runetype[i] & _CTYPE_U) > > p->ctype_tab[i + 1] |= _U; > > if (rl->rl_runetype[i] & _CTYPE_L) > > @@ -86,23 +96,22 @@ __make_ctype_tabs(_RuneLocale *rl) > > if (rl->rl_runetype[i] & _CTYPE_X) > > p->ctype_tab[i + 1] |= _X; > > /* > > - * TWEAK! _B has been used incorrectly (or with older > > - * declaration) in ctype.h isprint() macro. > > + * _B has been used incorrectly (or with older declaration) > > + * in ctype.h isprint() macro. > > * _B does not mean isblank, it means "isprint && !isgraph". > > * the following is okay since isblank() was hardcoded in > > * function (i.e. isblank() is inherently locale unfriendly). > > */ > > -#if 1 > > if ((rl->rl_runetype[i] & (_CTYPE_R | _CTYPE_G)) > > == _CTYPE_R) > > p->ctype_tab[i + 1] |= _B; > > -#else > > - if (rl->rl_runetype[i] & _CTYPE_B) > > - p->ctype_tab[i + 1] |= _B; > > -#endif > > + > > p->toupper_tab[i + 1] = (short)rl->rl_mapupper[i]; > > p->tolower_tab[i + 1] = (short)rl->rl_maplower[i]; > > } > > + for (i = max_sb_limit; i < CTYPE_NUM_CHARS; i++) > > + p->ctype_tab[i + 1] = 0; > > + > > return 0; > > } > > > > Index: usr.bin/hexdump/hexdump.c > > =================================================================== > > RCS file: /cvs/src/usr.bin/hexdump/hexdump.c,v > > retrieving revision 1.14 > > diff -u -p -r1.14 hexdump.c > > --- usr.bin/hexdump/hexdump.c 12 Oct 2010 17:23:21 -0000 1.14 > > +++ usr.bin/hexdump/hexdump.c 15 Jan 2011 15:38:19 -0000 > > @@ -32,6 +32,7 @@ > > > > #include <sys/param.h> > > #include <err.h> > > +#include <locale.h> > > #include <stdio.h> > > #include <stdlib.h> > > #include <string.h> > > @@ -73,6 +74,7 @@ main(int argc, char *argv[]) > > rewrite(tfs); > > > > (void)next(argv); > > + (void)setlocale(LC_CTYPE, ""); > > display(); > > exit(exitval); > > } > > Index: usr.sbin/tcpdump/privsep.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v > > retrieving revision 1.28 > > diff -u -p -r1.28 privsep.c > > --- usr.sbin/tcpdump/privsep.c 17 Apr 2009 22:31:24 -0000 1.28 > > +++ usr.sbin/tcpdump/privsep.c 5 Mar 2011 00:23:55 -0000 > > @@ -32,6 +32,7 @@ > > #include <err.h> > > #include <errno.h> > > #include <fcntl.h> > > +#include <locale.h> > > #include <netdb.h> > > #include <paths.h> > > #include <pwd.h> > > @@ -161,6 +162,9 @@ priv_init(int argc, char **argv) > > pw = getpwnam("_tcpdump"); > > if (pw == NULL) > > errx(1, "unknown user _tcpdump"); > > + > > + /* set the locale before chrooting */ > > + (void)setlocale(LC_CTYPE, ""); > > > > /* chroot, drop privs and return */ > > if (chroot(pw->pw_dir) != 0)
