Hi Martin, hi Theo,
thank you for your feedback!
Martijn van Duren wrote on Sun, Jan 17, 2016 at 12:06:26PM +0100:
> When applying this patch and running in an environment without an
> UTF-8 LC_ALL or LC_CTYPE the isu8cont gives the reverse problem of
> the current ksh having UTF-8 filenames with UTF-8 enabled.
> It becomes impossible to properly remove the utf-8 characters where
> the screen is badly redrawn, while in the old situation these could
> be removed byte-wise when in POSIX mode.
>
> Example (caret is cursor):
> $ cd ./Muziek/Mot??rhead/
> ^
>
> $ cd ./Muziek/Mot??rhead/
>^
>
> $ ccd ./Muziek/Mot??rhead
>
> Since (afaik) utf-8 isn't supported on the console this could become an
> annoying situation in some situations.
All those seem valid concerns. So we should only change behaviour
based on the locale(1), which means that the shell has to call
setlocale(3) - of course only in the non-ramdisk case.
Patch updated in two places:
1. Call setlocale(3) in main().
2. Check the locale in isu8cont().
> I reckon it would be cleaner to do it like ls where the character
> is first pulled through mbtowc and based on it's return printed or
> presented as an ? per byte, or at the very least treated as a single
> byte.
Right now, i'm trying to be extremely conservative and not use any
multibyte library functions. But yes, ultimately, that will be
needed, in particular to support zero-width and double-width
characters. But i don't deem the time ripe to attempt that just yet.
Theo (tb@) pointed out in private that entering a two-byte UTF-8
character produces the following echo to the terminal with the
first version of the patch: first byte, backspace, first byte,
second byte. While that works at least on some terminals, that's
certainly not ideal. The following updated version of the patch
fixes this by keeping minimal state in display() and avoiding
the backup and reprint of the start byte if nothing unrelated
happened in between. No changes outside display().
More testing and feedback is certainly welcome.
Yours,
Ingo
Index: main.c
===
RCS file: /cvs/src/bin/ksh/main.c,v
retrieving revision 1.78
diff -u -p -r1.78 main.c
--- main.c 30 Dec 2015 09:07:00 - 1.78
+++ main.c 17 Jan 2016 16:40:03 -
@@ -8,6 +8,7 @@
#include
#include
+#include
#include
#include
#include
@@ -153,6 +154,8 @@ main(int argc, char *argv[])
kshname = argv[0];
#ifndef MKNOD
+ setlocale(LC_CTYPE, "");
+
if (pledge("stdio rpath wpath cpath fattr flock getpw proc exec tty",
NULL) == -1) {
perror("pledge");
Index: vi.c
===
RCS file: /cvs/src/bin/ksh/vi.c,v
retrieving revision 1.39
diff -u -p -r1.39 vi.c
--- vi.c22 Dec 2015 08:39:26 - 1.39
+++ vi.c17 Jan 2016 16:40:03 -
@@ -12,6 +12,7 @@
#include /* completion */
#include
+#include
#include
#include "sh.h"
@@ -21,11 +22,11 @@
#define CTRL(c)(c & 0x1f)
struct edstate {
- int winleft;
- char*cbuf;
- int cbufsize;
- int linelen;
- int cursor;
+ char*cbuf; /* main buffer to build the command line */
+ int cbufsize; /* number of bytes allocated for cbuf */
+ int linelen;/* current number of bytes in cbuf */
+ int winleft;/* first byte# in cbuf to be displayed */
+ int cursor; /* byte# in cbuf having the cursor */
};
@@ -68,6 +69,7 @@ static void vi_pprompt(int);
static voidvi_error(void);
static voidvi_macro_reset(void);
static int x_vi_putbuf(const char *, size_t);
+static int isu8cont(unsigned char);
#define C_ 0x1 /* a valid command that isn't a M_, E_, U_ */
#define M_ 0x2 /* movement command (h, l, etc.) */
@@ -148,7 +150,7 @@ static void restore_edstate(struct edst
static voidfree_edstate(struct edstate *old);
static struct edstate ebuf;
-static struct edstate undobuf = { 0, undocbuf, CMDLEN, 0, 0 };
+static struct edstate undobuf = { undocbuf, CMDLEN, 0, 0, 0 };
static struct edstate *es;/* current editor state */
static struct edstate *undo;
@@ -157,7 +159,7 @@ static char ibuf[CMDLEN]; /* input buff
static int first_insert; /* set when starting in insert mode */
static int saved_inslen; /* saved inslen for first insert */
static int inslen; /* length of input buffer */
-static int srchlen;/* length of current search pattern */
+static int srchlen;/* number of bytes in search pattern */
static charybuf[CMDLEN]; /* yank buffer */
static int yanklen;/* length of yank buffer