This makes sense to me.

I think there is no sane way to handle UTF-8 characters of width != 1...


On Thu, Apr 12, 2012 at 05:44:19AM -0700, Matthew Dempsky wrote:
> While reading the mosh research paper[1], I noticed we don't have
> IUTF8, which is necessary for backspace to work correctly in canonical
> mode (ICANON) with UTF-8 characters.
> 
> [1] http://mosh.mit.edu/mosh-paper-draft.pdf
> 
> I took a quick stab at implementing it, and it didn't seem too bad.
> See kernel diff below along with a corresponding fix to xterm(1) to
> actually make use of it on OpenBSD.
> 
> I've only played with it for a little while, but I haven't noticed any
> issues with various mixes of backspace, ^W, and tabs.  I'm going to
> try to write some regress tests for it now, so I'm very interested if
> anyone notices any issues or differences in behavior from Linux.
> 
> I suspect Unicode combining characters behave oddly, but Linux doesn't
> worry about those either as far as I can tell.
> 
> 
> Index: src/sys/sys/termios.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/termios.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 termios.h
> --- src/sys/sys/termios.h     26 Dec 2009 09:46:17 -0000      1.11
> +++ src/sys/sys/termios.h     12 Apr 2012 12:33:04 -0000
> @@ -101,7 +101,8 @@
>  #if __BSD_VISIBLE
>  #define      IXANY           0x00000800      /* any char will restart after 
> stop */
>  #define      IUCLC           0x00001000      /* translate upper to lower 
> case */
> -#define IMAXBEL              0x00002000      /* ring bell on input queue 
> full */
> +#define      IMAXBEL         0x00002000      /* ring bell on input queue 
> full */
> +#define      IUTF8           0x00004000      /* input stream is UTF-8 
> encoded */
>  #endif /* __BSD_VISIBLE */
>  
>  /*
> Index: src/sys/kern/tty.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/tty.c,v
> retrieving revision 1.94
> diff -u -p -r1.94 tty.c
> --- src/sys/kern/tty.c        23 Mar 2012 15:51:26 -0000      1.94
> +++ src/sys/kern/tty.c        12 Apr 2012 12:33:04 -0000
> @@ -163,6 +163,8 @@ u_char const char_type[] = {
>  #define      tolower(c)      ((c) - 'A' + 'a')
>  #define      toupper(c)      ((c) - 'a' + 'A')
>  
> +#define iscont(tp, c)        (((tp)->t_iflag & IUTF8) && (c) >= 0x80 && (c) 
> < 0xc0)
> +
>  struct ttylist_head ttylist; /* TAILQ_HEAD */
>  int tty_count;
>  
> @@ -441,8 +443,12 @@ parmrk:                          (void)putc(0377 | 
> TTY_QUOTE, 
>                * erase (^H / ^?)
>                */
>               if (CCEQ(cc[VERASE], c)) {
> -                     if (tp->t_rawq.c_cc)
> -                             ttyrub(unputc(&tp->t_rawq), tp);
> +                     do {
> +                             c = unputc(&tp->t_rawq);
> +                             if (c == -1)
> +                                     break;
> +                             ttyrub(c, tp);
> +                     } while (iscont(tp, c));
>                       goto endcase;
>               }
>               /*
> @@ -698,7 +704,8 @@ ttyoutput(int c, struct tty *tp)
>               col = 0;
>               break;
>       case ORDINARY:
> -             ++col;
> +             if (!iscont(tp, c))
> +                     ++col;
>               break;
>       case TAB:
>               col = (col + 8) & ~7;
> @@ -1874,6 +1881,8 @@ ttyrub(int c, struct tty *tp)
>       int tabc, s;
>  
>       if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
> +             return;
> +     if (iscont(tp, c))
>               return;
>       CLR(tp->t_lflag, FLUSHO);
>       if (ISSET(tp->t_lflag, ECHOE)) {
> Index: xenocara/app/xterm/main.c
> ===================================================================
> RCS file: /cvs/xenocara/app/xterm/main.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 main.c
> --- xenocara/app/xterm/main.c 16 Nov 2011 21:14:25 -0000      1.22
> +++ xenocara/app/xterm/main.c 12 Apr 2012 12:33:04 -0000
> @@ -3836,7 +3836,7 @@ spawnXTerm(XtermWidget xw)
>               /* input: nl->nl, don't ignore cr, cr->nl */
>               UIntClr(tio.c_iflag, (INLCR | IGNCR));
>               tio.c_iflag |= ICRNL;
> -#if OPT_WIDE_CHARS && defined(linux) && defined(IUTF8)
> +#if OPT_WIDE_CHARS && defined(IUTF8)
>  #if OPT_LUIT_PROG
>               if (command_to_exec_with_luit == 0)
>  #endif

Reply via email to