Hi,
I did submit this problem[1] earlier but with an incomplete analysis and
fix. Here's a second attempt.

This does only occur when running ksh with emacs mode under tmux. How to
re-produce:

1. Run ksh under tmux.

2. Input the following characters, without spaces:

   a (any character) ^B (backward-char) ö (any UTF-8 character)

3. At this point, the prompt gets overwritten.

Since ksh read a single byte of input, it will display a partial UTF-8
character before the whole character has been read. This is especially
troublesome when the cursor is not placed at the end of the line. In the
scenario above, after reading the first byte of 'ö' the following
sequence will be displayed:

  0xc3 0x61 0x08

That is the first byte of 'ö' (0xc3), 'a' (0x61), '\b' (0x08). tmux
does the right thing here, since 0xc3 is a valid UTF-8 start byte it
expects it to be followed by a UTF-8 continuation byte which is not the
case. The two first bytes (0xc3, 0x61) are discarded and the parser is
reset to its initial state causing the backspace to be accepted and the
first character in the prompt to be overwritten.

After the second byte of 'ö' (0xb6) is read by ksh, the following
sequence will be displayed:

   0x08 0xc3 0xb6 0x61 0x08

That is '\b' (0x08), 'ö' (0xc3, 0xb6), 'a' (0x61), '\b' (0x08). Since
ksh assumes the cursor is correctly positioned it displays a leading
backspace in order to move passed the first character. This is however
not true causing another character in the prompt to be overwritten.

Below is diff that make sure to read a whole UTF-8 character in
x_emacs() prior doing another iteration of the main-loop which solves
the problem. It does not validate UTF-8 input but instead assumes every
such character is valid.

Comments and feedback are much appreciated.

[1] http://marc.info/?l=openbsd-misc&m=148509346310901&w=2

Index: emacs.c
===================================================================
RCS file: /cvs/src/bin/ksh/emacs.c,v
retrieving revision 1.67
diff -u -p -r1.67 emacs.c
--- emacs.c     12 May 2017 14:37:52 -0000      1.67
+++ emacs.c     14 May 2017 08:21:26 -0000
@@ -98,6 +98,7 @@ static int    x_col;
 static int     x_displen;
 static int     x_arg;          /* general purpose arg */
 static int     x_arg_defaulted;/* x_arg not explicitly set; defaulted to 1 */
+static int     x_getc_again;
 
 static int     xlp_valid;
 /* end from 4.9 edit.h } */
@@ -142,6 +143,7 @@ static int  x_fold_case(int);
 static char    *x_lastcp(void);
 static void    do_complete(int, Comp_type);
 static int     isu8cont(unsigned char);
+static int     u8len(unsigned char);
 
 /* proto's for keybindings */
 static int     x_abort(int);
@@ -272,6 +274,21 @@ isu8cont(unsigned char c)
        return (c & (0x80 | 0x40)) == 0x80;
 }
 
+static int
+u8len(unsigned char c)
+{
+       switch (c & 0xF0) {
+       case 0xF0:
+               return 4;
+       case 0xE0:
+               return 3;
+       case 0xC0:
+               return 2;
+       default:
+               return 1;
+       }
+}
+
 int
 x_emacs(char *buf, size_t len)
 {
@@ -318,10 +335,12 @@ x_emacs(char *buf, size_t len)
        x_last_command = NULL;
        while (1) {
                x_flush();
-               if ((c = x_e_getc()) < 0)
-                       return 0;
+               do {
+                       if ((c = x_e_getc()) < 0)
+                               return 0;
 
-               line[at++] = c;
+                       line[at++] = c;
+               } while (x_getc_again > 0);
                line[at] = '\0';
 
                if (x_arg == -1) {
@@ -364,7 +383,10 @@ x_emacs(char *buf, size_t len)
                } else {
                        if (submatch)
                                continue;
-                       if (at == 1)
+                       if (at > 1) {
+                               x_ins(line);
+                               ret = KSTD;
+                       } else if (at == 1)
                                ret = x_insert(c);
                        else
                                ret = x_error(c); /* not matched meta sequence 
*/
@@ -1887,8 +1909,12 @@ x_e_getc(void)
                        macro_args = NULL;
                        c = x_getc();
                }
-       } else
+       } else {
                c = x_getc();
+               if (x_getc_again == 0)
+                       x_getc_again = u8len(c);
+               x_getc_again--;
+       }
 
        return c;
 }

Reply via email to