On Sat, Mar 05, 2011 at 01:45:40AM +0100, Stefan Sperling wrote:
> There are some display glitches when the UTF-8 locale is used.
> So far I know about hexdump -C (reported by naddy) and tcpdump -X.
> Both print invalid UTF-8 to the screen.
> 
> The problem is that latin1 characters end up being printed by applications
> that use ctype(3) functions like isprint(3). Any latin1 characters that
> are not ASCII aren't valid UTF-8, so they shouldn't be considered
> printable if the UTF-8 locale is active. However, isprint(3) currently
> returns non-zero for them in the UTF-8 locale.

No feedback yet. Anyone?
 
> The same problem has been fixed in FreeBSD some time ago,
> albeit with a much more elaborate diff:
> http://svn.freebsd.org/viewvc/base?view=revision&revision=172619
> 
> Once this is fixed, fixing display glitches is as simple as calling
> setlocale() from affected applications so that the ctype tab is
> initialized correctly, as done below for hexdump and tcpdump.
> 
> Note that tcpdump needs to call setlocale() *before* dropping privs
> because it won't find the /usr/share/locale definition files after chroot().
> 
> While here, kill some dead code in __make_ctype_tabs().
> 
> It's probably not correct for the "C" locale either to consider any
> non-ASCII characters printable, but that's another story.
> 
> Index: lib/libc/locale/runeglue.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/locale/runeglue.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 runeglue.c
> --- lib/libc/locale/runeglue.c        7 Aug 2005 10:16:24 -0000       1.1
> +++ lib/libc/locale/runeglue.c        15 Jan 2011 15:36:08 -0000
> @@ -58,19 +58,29 @@
>  int
>  __make_ctype_tabs(_RuneLocale *rl)
>  {
> -     int i;
> +     int i, max_sb_limit;
>       struct old_tabs *p;
>  
>       p = malloc(sizeof *p);
>       if (!p)
>               return -1;
>  
> +     /* By default, fill the ctype tab completely. */
> +     max_sb_limit = CTYPE_NUM_CHARS;
> +
> +     /* In UTF-8-encoded locales, the single-byte ctype functions
> +      * must only return non-zero values for ASCII characters.
> +      * Any non-ASCII single-byte character is not a valid UTF-8 sequence.
> +      */
> +     if (strcmp(rl->rl_encoding, "UTF8") == 0)
> +             max_sb_limit = 128;
> +
>       rl->rl_tabs = p;
>       p->ctype_tab[0] = 0;
>       p->toupper_tab[0] = EOF;
>       p->tolower_tab[0] = EOF;
> -     for (i = 0; i < CTYPE_NUM_CHARS; i++) {
> -             p->ctype_tab[i + 1]=0;
> +     for (i = 0; i < max_sb_limit; i++) {
> +             p->ctype_tab[i + 1] = 0;
>               if (rl->rl_runetype[i] & _CTYPE_U)
>                       p->ctype_tab[i + 1] |= _U;
>               if (rl->rl_runetype[i] & _CTYPE_L)
> @@ -86,23 +96,22 @@ __make_ctype_tabs(_RuneLocale *rl)
>               if (rl->rl_runetype[i] & _CTYPE_X)
>                       p->ctype_tab[i + 1] |= _X;
>               /*
> -              * TWEAK!  _B has been used incorrectly (or with older
> -              * declaration) in ctype.h isprint() macro.
> +              * _B has been used incorrectly (or with older declaration)
> +              * in ctype.h isprint() macro.
>                * _B does not mean isblank, it means "isprint && !isgraph".
>                * the following is okay since isblank() was hardcoded in
>                * function (i.e. isblank() is inherently locale unfriendly).
>                */
> -#if 1
>               if ((rl->rl_runetype[i] & (_CTYPE_R | _CTYPE_G))
>                   == _CTYPE_R)
>                       p->ctype_tab[i + 1] |= _B;
> -#else
> -             if (rl->rl_runetype[i] & _CTYPE_B)
> -                     p->ctype_tab[i + 1] |= _B;
> -#endif
> +
>               p->toupper_tab[i + 1] = (short)rl->rl_mapupper[i];
>               p->tolower_tab[i + 1] = (short)rl->rl_maplower[i];
>       }
> +     for (i = max_sb_limit; i < CTYPE_NUM_CHARS; i++)
> +             p->ctype_tab[i + 1] = 0;
> +
>       return 0;
>  }
>  
> Index: usr.bin/hexdump/hexdump.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/hexdump/hexdump.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 hexdump.c
> --- usr.bin/hexdump/hexdump.c 12 Oct 2010 17:23:21 -0000      1.14
> +++ usr.bin/hexdump/hexdump.c 15 Jan 2011 15:38:19 -0000
> @@ -32,6 +32,7 @@
>  
>  #include <sys/param.h>
>  #include <err.h>
> +#include <locale.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -73,6 +74,7 @@ main(int argc, char *argv[])
>               rewrite(tfs);
>  
>       (void)next(argv);
> +     (void)setlocale(LC_CTYPE, "");
>       display();
>       exit(exitval);
>  }
> Index: usr.sbin/tcpdump/privsep.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 privsep.c
> --- usr.sbin/tcpdump/privsep.c        17 Apr 2009 22:31:24 -0000      1.28
> +++ usr.sbin/tcpdump/privsep.c        5 Mar 2011 00:23:55 -0000
> @@ -32,6 +32,7 @@
>  #include <err.h>
>  #include <errno.h>
>  #include <fcntl.h>
> +#include <locale.h>
>  #include <netdb.h>
>  #include <paths.h>
>  #include <pwd.h>
> @@ -161,6 +162,9 @@ priv_init(int argc, char **argv)
>               pw = getpwnam("_tcpdump");
>               if (pw == NULL)
>                       errx(1, "unknown user _tcpdump");
> +
> +             /* set the locale before chrooting */
> +             (void)setlocale(LC_CTYPE, "");
>  
>               /* chroot, drop privs and return */
>               if (chroot(pw->pw_dir) != 0)

Reply via email to