On 2015/10/27 06:51, Ted Unangst wrote:
> This adds a quite limited understanding of utf-8 to hexdump. I've found it
> helpful trying to see exactly what's coming out of some utilities instead of
> trying to decode utf-8 by hand.

Should it only do this for a utf-8 terminal?

> Index: display.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/hexdump/display.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 display.c
> --- display.c 16 Jan 2015 06:40:08 -0000      1.21
> +++ display.c 27 Oct 2015 10:50:09 -0000
> @@ -106,6 +106,17 @@ display(void)
>       }
>  }
>  
> +static int
> +isu8cont(unsigned char c)
> +{
> +     return (c & 0xc0) == 0x80;
> +}
> +static int
> +isu8start(unsigned char c)
> +{
> +     return (c & 0xc0) == 0xc0;
> +}
> +
>  static __inline void
>  print(PR *pr, u_char *bp)
>  {
> @@ -163,7 +174,16 @@ print(PR *pr, u_char *bp)
>               }
>               break;
>       case F_P:
> -             (void)printf(pr->fmt, isprint(*bp) ? *bp : '.');
> +             if (isu8start(*bp)) {
> +                     unsigned char *pp = bp + 1;
> +                     (void)printf(pr->fmt, *bp);
> +                     while (isu8cont(*pp))
> +                             (void)printf(pr->fmt, *pp++);
> +             } else if (isu8cont(*bp)) {
> +                     (void)printf(pr->fmt, ' ');
> +             } else {
> +                     (void)printf(pr->fmt, isprint(*bp) ? *bp : '.');
> +             }
>               break;
>       case F_STR:
>               (void)printf(pr->fmt, (char *)bp);
> 

Reply via email to