Hi, I did submit this problem[1] earlier but with an incomplete analysis and fix. Here's a second attempt.
This does only occur when running ksh with emacs mode under tmux. How to re-produce: 1. Run ksh under tmux. 2. Input the following characters, without spaces: a (any character) ^B (backward-char) ö (any UTF-8 character) 3. At this point, the prompt gets overwritten. Since ksh read a single byte of input, it will display a partial UTF-8 character before the whole character has been read. This is especially troublesome when the cursor is not placed at the end of the line. In the scenario above, after reading the first byte of 'ö' the following sequence will be displayed: 0xc3 0x61 0x08 That is the first byte of 'ö' (0xc3), 'a' (0x61), '\b' (0x08). tmux does the right thing here, since 0xc3 is a valid UTF-8 start byte it expects it to be followed by a UTF-8 continuation byte which is not the case. The two first bytes (0xc3, 0x61) are discarded and the parser is reset to its initial state causing the backspace to be accepted and the first character in the prompt to be overwritten. After the second byte of 'ö' (0xb6) is read by ksh, the following sequence will be displayed: 0x08 0xc3 0xb6 0x61 0x08 That is '\b' (0x08), 'ö' (0xc3, 0xb6), 'a' (0x61), '\b' (0x08). Since ksh assumes the cursor is correctly positioned it displays a leading backspace in order to move passed the first character. This is however not true causing another character in the prompt to be overwritten. Below is diff that make sure to read a whole UTF-8 character in x_emacs() prior doing another iteration of the main-loop which solves the problem. It does not validate UTF-8 input but instead assumes every such character is valid. Comments and feedback are much appreciated. [1] http://marc.info/?l=openbsd-misc&m=148509346310901&w=2 Index: emacs.c =================================================================== RCS file: /cvs/src/bin/ksh/emacs.c,v retrieving revision 1.67 diff -u -p -r1.67 emacs.c --- emacs.c 12 May 2017 14:37:52 -0000 1.67 +++ emacs.c 14 May 2017 08:21:26 -0000 @@ -98,6 +98,7 @@ static int x_col; static int x_displen; static int x_arg; /* general purpose arg */ static int x_arg_defaulted;/* x_arg not explicitly set; defaulted to 1 */ +static int x_getc_again; static int xlp_valid; /* end from 4.9 edit.h } */ @@ -142,6 +143,7 @@ static int x_fold_case(int); static char *x_lastcp(void); static void do_complete(int, Comp_type); static int isu8cont(unsigned char); +static int u8len(unsigned char); /* proto's for keybindings */ static int x_abort(int); @@ -272,6 +274,21 @@ isu8cont(unsigned char c) return (c & (0x80 | 0x40)) == 0x80; } +static int +u8len(unsigned char c) +{ + switch (c & 0xF0) { + case 0xF0: + return 4; + case 0xE0: + return 3; + case 0xC0: + return 2; + default: + return 1; + } +} + int x_emacs(char *buf, size_t len) { @@ -318,10 +335,12 @@ x_emacs(char *buf, size_t len) x_last_command = NULL; while (1) { x_flush(); - if ((c = x_e_getc()) < 0) - return 0; + do { + if ((c = x_e_getc()) < 0) + return 0; - line[at++] = c; + line[at++] = c; + } while (x_getc_again > 0); line[at] = '\0'; if (x_arg == -1) { @@ -364,7 +383,10 @@ x_emacs(char *buf, size_t len) } else { if (submatch) continue; - if (at == 1) + if (at > 1) { + x_ins(line); + ret = KSTD; + } else if (at == 1) ret = x_insert(c); else ret = x_error(c); /* not matched meta sequence */ @@ -1887,8 +1909,12 @@ x_e_getc(void) macro_args = NULL; c = x_getc(); } - } else + } else { c = x_getc(); + if (x_getc_again == 0) + x_getc_again = u8len(c); + x_getc_again--; + } return c; }