Hi there,

I got annoyed with ksh(1) for messing up my command line after
accidentally entering an umlaut and decided to take a stab at teaching
it some utf8. The diff is inspired by Ted Unangst's recent patches for
e.g. rs[0].

It works for my use cases and seems to handle 2-byte (ßüöä) and 3-byte
(€) sequences quite well; I hope it does so for longer ones, too. Maybe
it's of use for someone else as well.

Cheers,
Frederic

[0] https://marc.info/?l=openbsd-tech&m=144560099607564


Index: emacs.c
===================================================================
RCS file: /cvs/src/bin/ksh/emacs.c,v
retrieving revision 1.60
diff -u -p -r1.60 emacs.c
--- emacs.c     19 Oct 2015 14:42:16 -0000      1.60
+++ emacs.c     10 Nov 2015 12:31:27 -0000
@@ -49,7 +49,7 @@ struct        x_ftab {
 #define        is_cfs(c)       (c == ' ' || c == '\t' || c == '"' || c == '\'')

 /* Separator for motion */
-#define        is_mfs(c)       (!(isalnum((unsigned char)c) || c == '_' || c 
== '$'))
+#define        is_mfs(c)       (!(isu8lead((unsigned char) c) || 
isu8cont((unsigned char) c) || isalnum((unsigned char)c) || c == '_' || c == 
'$'))

 /* Arguments for do_complete()
  * 0 = enumerate  M-= complete as much as possible and then list
@@ -198,6 +198,10 @@ static int x_comment(int);
 static int     x_debug_info(int);
 #endif

+/* utf8 support */
+static int isu8cont(unsigned char);
+static int isu8lead(unsigned char);
+
 static const struct x_ftab x_ftab[] = {
        { x_abort,              "abort",                        0 },
        { x_beg_hist,           "beginning-of-history",         0 },
@@ -263,6 +267,25 @@ static const struct x_ftab x_ftab[] = {
 };

 int
+isu8cont(unsigned char c)
+{
+       return ((c & (0x80 | 0x40)) == 0x80);
+}
+
+int
+isu8lead(unsigned char c)
+{
+       if ((c & 0xE0) == 0xC0)
+               return 1;
+       if ((c & 0xF0) == 0xE0)
+               return 2;
+       if ((c & 0xF8) == 0xF0)
+               return 3;
+
+       return 0;
+}
+
+int
 x_emacs(char *buf, size_t len)
 {
        struct kb_entry         *k, *kmatch = NULL;
@@ -468,6 +491,8 @@ x_del_back(int c)
        }
        if (x_arg > col)
                x_arg = col;
+       while(x_arg <= col && isu8cont(*(xcp - x_arg)))
+               x_arg++;
        x_goto(xcp - x_arg);
        x_delete(x_arg, false);
        return KSTD;
@@ -621,7 +646,7 @@ x_fword(void)
 static void
 x_goto(char *cp)
 {
-       if (cp < xbp || cp >= (xbp + x_displen)) {
+       if (cp < xbp || cp >= xlp) {
                /* we are heading off screen */
                xcp = cp;
                x_adjust();
@@ -660,6 +685,8 @@ x_size(int c)
                return 4;       /* Kludge, tabs are always four spaces. */
        if (iscntrl(c))         /* control char */
                return 2;
+       if (isu8cont(c))        /* utf8 continuation byte */
+               return 0;
        return 1;
 }

@@ -669,7 +696,8 @@ x_zots(char *str)
        int     adj = x_adj_done;

        x_lastcp();
-       while (*str && str < xlp && adj == x_adj_done)
+       while (*str && (isu8cont(*str) || str < xlp)
+              && adj == x_adj_done)
                x_zotc(*str++);
 }

@@ -697,6 +725,8 @@ x_mv_back(int c)
        }
        if (x_arg > col)
                x_arg = col;
+       while(x_arg <= col && isu8cont(*(xcp - x_arg)))
+               x_arg++;
        x_goto(xcp - x_arg);
        return KSTD;
 }
@@ -710,6 +740,7 @@ x_mv_forw(int c)
                x_e_putc(BEL);
                return KSTD;
        }
+       x_arg += isu8lead(*xcp);
        if (x_arg > nleft)
                x_arg = nleft;
        x_goto(xcp + x_arg);
@@ -1025,7 +1056,7 @@ x_redraw(int limit)
                if (xep > xlp)
                        i = 0;                  /* we fill the line */
                else
-                       i = limit - (xlp - xbp);
+                       i = limit - x_col;

                for (j = 0; j < i && x_col < (xx_cols - 2); j++)
                        x_e_putc(' ');
@@ -1821,11 +1852,18 @@ do_complete(int flags,  /* XCF_{COMMAND,F
 static void
 x_adjust(void)
 {
+       int i;
        x_adj_done++;                   /* flag the fact that we were called. */
        /*
         * we had a problem if the prompt length > xx_cols / 2
         */
-       if ((xbp = xcp - (x_displen / 2)) < xbuf)
+       xbp = xcp;
+       for(i = 0; i < (x_displen/2);) {
+               xbp--;
+               if(!isu8cont(*xbp))
+                       i++;
+       }
+       if (xbp < xbuf)
                xbp = xbuf;
        xlp_valid = false;
        x_redraw(xx_cols);
@@ -1863,6 +1901,12 @@ x_e_getc(void)
 static void
 x_e_putc(int c)
 {
+       static int u8wait = 0;
+       if(isu8lead(c)) {
+               u8wait = isu8lead(c);
+       } else if(isu8cont(c)) {
+               u8wait--;
+       }
        if (c == '\r' || c == '\n')
                x_col = 0;
        if (x_col < xx_cols) {
@@ -1874,10 +1918,12 @@ x_e_putc(int c)
                case '\n':
                        break;
                case '\b':
-                       x_col--;
+                       if(!isu8cont(c))
+                               x_col--;
                        break;
                default:
-                       x_col++;
+                       if(!u8wait)
+                               x_col++;
                        break;
                }
        }

Reply via email to