Hi

These look fine to me.


On Sun, Apr 03, 2011 at 03:57:40PM +0200, Stefan Sperling wrote:
> On Sat, Mar 05, 2011 at 01:45:40AM +0100, Stefan Sperling wrote:
> > There are some display glitches when the UTF-8 locale is used.
> > So far I know about hexdump -C (reported by naddy) and tcpdump -X.
> > Both print invalid UTF-8 to the screen.
> > 
> > The problem is that latin1 characters end up being printed by applications
> > that use ctype(3) functions like isprint(3). Any latin1 characters that
> > are not ASCII aren't valid UTF-8, so they shouldn't be considered
> > printable if the UTF-8 locale is active. However, isprint(3) currently
> > returns non-zero for them in the UTF-8 locale.
> 
> No feedback yet. Anyone?
>  
> > The same problem has been fixed in FreeBSD some time ago,
> > albeit with a much more elaborate diff:
> > http://svn.freebsd.org/viewvc/base?view=revision&revision=172619
> > 
> > Once this is fixed, fixing display glitches is as simple as calling
> > setlocale() from affected applications so that the ctype tab is
> > initialized correctly, as done below for hexdump and tcpdump.
> > 
> > Note that tcpdump needs to call setlocale() *before* dropping privs
> > because it won't find the /usr/share/locale definition files after chroot().
> > 
> > While here, kill some dead code in __make_ctype_tabs().
> > 
> > It's probably not correct for the "C" locale either to consider any
> > non-ASCII characters printable, but that's another story.
> > 
> > Index: lib/libc/locale/runeglue.c
> > ===================================================================
> > RCS file: /cvs/src/lib/libc/locale/runeglue.c,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 runeglue.c
> > --- lib/libc/locale/runeglue.c      7 Aug 2005 10:16:24 -0000       1.1
> > +++ lib/libc/locale/runeglue.c      15 Jan 2011 15:36:08 -0000
> > @@ -58,19 +58,29 @@
> >  int
> >  __make_ctype_tabs(_RuneLocale *rl)
> >  {
> > -   int i;
> > +   int i, max_sb_limit;
> >     struct old_tabs *p;
> >  
> >     p = malloc(sizeof *p);
> >     if (!p)
> >             return -1;
> >  
> > +   /* By default, fill the ctype tab completely. */
> > +   max_sb_limit = CTYPE_NUM_CHARS;
> > +
> > +   /* In UTF-8-encoded locales, the single-byte ctype functions
> > +    * must only return non-zero values for ASCII characters.
> > +    * Any non-ASCII single-byte character is not a valid UTF-8 sequence.
> > +    */
> > +   if (strcmp(rl->rl_encoding, "UTF8") == 0)
> > +           max_sb_limit = 128;
> > +
> >     rl->rl_tabs = p;
> >     p->ctype_tab[0] = 0;
> >     p->toupper_tab[0] = EOF;
> >     p->tolower_tab[0] = EOF;
> > -   for (i = 0; i < CTYPE_NUM_CHARS; i++) {
> > -           p->ctype_tab[i + 1]=0;
> > +   for (i = 0; i < max_sb_limit; i++) {
> > +           p->ctype_tab[i + 1] = 0;
> >             if (rl->rl_runetype[i] & _CTYPE_U)
> >                     p->ctype_tab[i + 1] |= _U;
> >             if (rl->rl_runetype[i] & _CTYPE_L)
> > @@ -86,23 +96,22 @@ __make_ctype_tabs(_RuneLocale *rl)
> >             if (rl->rl_runetype[i] & _CTYPE_X)
> >                     p->ctype_tab[i + 1] |= _X;
> >             /*
> > -            * TWEAK!  _B has been used incorrectly (or with older
> > -            * declaration) in ctype.h isprint() macro.
> > +            * _B has been used incorrectly (or with older declaration)
> > +            * in ctype.h isprint() macro.
> >              * _B does not mean isblank, it means "isprint && !isgraph".
> >              * the following is okay since isblank() was hardcoded in
> >              * function (i.e. isblank() is inherently locale unfriendly).
> >              */
> > -#if 1
> >             if ((rl->rl_runetype[i] & (_CTYPE_R | _CTYPE_G))
> >                 == _CTYPE_R)
> >                     p->ctype_tab[i + 1] |= _B;
> > -#else
> > -           if (rl->rl_runetype[i] & _CTYPE_B)
> > -                   p->ctype_tab[i + 1] |= _B;
> > -#endif
> > +
> >             p->toupper_tab[i + 1] = (short)rl->rl_mapupper[i];
> >             p->tolower_tab[i + 1] = (short)rl->rl_maplower[i];
> >     }
> > +   for (i = max_sb_limit; i < CTYPE_NUM_CHARS; i++)
> > +           p->ctype_tab[i + 1] = 0;
> > +
> >     return 0;
> >  }
> >  
> > Index: usr.bin/hexdump/hexdump.c
> > ===================================================================
> > RCS file: /cvs/src/usr.bin/hexdump/hexdump.c,v
> > retrieving revision 1.14
> > diff -u -p -r1.14 hexdump.c
> > --- usr.bin/hexdump/hexdump.c       12 Oct 2010 17:23:21 -0000      1.14
> > +++ usr.bin/hexdump/hexdump.c       15 Jan 2011 15:38:19 -0000
> > @@ -32,6 +32,7 @@
> >  
> >  #include <sys/param.h>
> >  #include <err.h>
> > +#include <locale.h>
> >  #include <stdio.h>
> >  #include <stdlib.h>
> >  #include <string.h>
> > @@ -73,6 +74,7 @@ main(int argc, char *argv[])
> >             rewrite(tfs);
> >  
> >     (void)next(argv);
> > +   (void)setlocale(LC_CTYPE, "");
> >     display();
> >     exit(exitval);
> >  }
> > Index: usr.sbin/tcpdump/privsep.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
> > retrieving revision 1.28
> > diff -u -p -r1.28 privsep.c
> > --- usr.sbin/tcpdump/privsep.c      17 Apr 2009 22:31:24 -0000      1.28
> > +++ usr.sbin/tcpdump/privsep.c      5 Mar 2011 00:23:55 -0000
> > @@ -32,6 +32,7 @@
> >  #include <err.h>
> >  #include <errno.h>
> >  #include <fcntl.h>
> > +#include <locale.h>
> >  #include <netdb.h>
> >  #include <paths.h>
> >  #include <pwd.h>
> > @@ -161,6 +162,9 @@ priv_init(int argc, char **argv)
> >             pw = getpwnam("_tcpdump");
> >             if (pw == NULL)
> >                     errx(1, "unknown user _tcpdump");
> > +
> > +           /* set the locale before chrooting */
> > +           (void)setlocale(LC_CTYPE, "");
> >  
> >             /* chroot, drop privs and return */
> >             if (chroot(pw->pw_dir) != 0)

Reply via email to