On 2017-05-07T17:31:26+0200, Hiltjo Posthuma wrote:
> Hey,
> 
> mg crashes with certain (unicode) characters and moving the cursor to the
> end of the line.
> 
> The characters are printed to the screen as \nnn in vtpute() and vtcol is
> updated, however vteeol() will write beyond the buffer.

About a year or so ago I ran into this myself (along with some other
issues) when adding unicode support. The thread sort of fizzled out
then[0], but I've been rebasing the diff since[1], and at least one
other person has reported that it works. If bugs related to unicode
are again interesting, I should probably post my patch again.

I don't mean to hijack the thread or try and re-propose my patch,
just to point out a previous effort; if you're working on this, you
can at least see what you don't like about my patch and plan to
avoid it. ;)

[0] https://marc.info/?l=openbsd-tech&m=145338987910327&w=2
[1] https://github.com/hboetes/mg/tree/display-wide-characters

-- 
S. Gilles
diff --git a/basic.c b/basic.c
index 85d9f70..123e115 100644
--- a/basic.c
+++ b/basic.c
@@ -18,6 +18,7 @@
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 
@@ -269,12 +270,25 @@ setgoal(void)
 int
 getgoal(struct line *dlp)
 {
-       int c, i, col = 0;
-       char tmp[5];
+       return getbyteofcol(dlp, 0, curgoal);
+}
 
+/*
+ * Return the byte offset within lp that is targetcol columns beyond
+ * startbyte
+ */
+size_t
+getbyteofcol(const struct line *lp, const size_t startbyte,
+             const size_t targetcol)
+{
+       int c;
+       size_t i, col = 0;
+       char tmp[5];
+       size_t advance_by = 1;
 
-       for (i = 0; i < llength(dlp); i++) {
-               c = lgetc(dlp, i);
+       for (i = startbyte; i < llength(lp); i += advance_by) {
+               advance_by = 1;
+               c = lgetc(lp, i);
                if (c == '\t'
 #ifdef NOTAB
                    && !(curbp->b_flag & BFNOTAB)
@@ -284,18 +298,86 @@ getgoal(struct line *dlp)
                        col++;
                } else if (ISCTRL(c) != FALSE) {
                        col += 2;
-               } else if (isprint(c))
+               } else if (isprint(c)) {
                        col++;
-               else {
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               advance_by = consumed;
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
+               } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }
-               if (col > curgoal)
+               if (col > targetcol)
                        break;
        }
        return (i);
 }
 
 /*
+ * Return the column at which specified offset byte would appear, if
+ * this were part of a longer string printed by vtputs, starting at
+ * intial_col
+ */
+size_t
+getcolofbyte(const struct line *lp, const size_t startbyte,
+             const size_t initial_col, const size_t targetoffset)
+{
+       int c;
+       size_t i, col = initial_col;
+       char tmp[5];
+       size_t advance_by = 1;
+
+       for (i = startbyte; i < llength(lp); i += advance_by) {
+               if (i >= targetoffset)
+                       break;
+               advance_by = 1;
+               c = lgetc(lp, i);
+               if (c == '\t'
+#ifdef NOTAB
+                   && !(curbp->b_flag & BFNOTAB)
+#endif
+                       ) {
+                       col |= 0x07;
+                       col++;
+               } else if (ISCTRL(c) != FALSE) {
+                       col += 2;
+               } else if (isprint(c)) {
+                       col++;
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               advance_by = consumed;
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
+               } else {
+                       col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+               }
+       }
+       return (col);
+}
+
+
+/*
  * Scroll forward by a specified number
  * of lines, or by a full page if no argument.
  * The "2" is the window overlap (this is the default
diff --git a/cmode.c b/cmode.c
index a238285..9d4cac3 100644
--- a/cmode.c
+++ b/cmode.c
@@ -14,6 +14,7 @@
 #include <ctype.h>
 #include <signal.h>
 #include <stdio.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "funmap.h"
@@ -419,10 +420,25 @@ findcolpos(const struct buffer *bp, const struct line 
*lp, int lo)
                        ) {
                        col |= 0x07;
                        col++;
-               } else if (ISCTRL(c) != FALSE)
+               } else if (ISCTRL(c) != FALSE) {
                        col += 2;
-               else if (isprint(c)) {
+               } else if (isprint(c)) {
                        col++;
+               } else if (!(bp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               i += (consumed - 1);
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
                } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }
diff --git a/def.h b/def.h
index 54b74ce..2fb289d 100644
--- a/def.h
+++ b/def.h
@@ -270,7 +270,7 @@ struct buffer {
        int              b_marko;       /* ditto for the "mark"          */
        short            b_nmodes;      /* number of non-fundamental modes */
        char             b_nwnd;        /* Count of windows on buffer    */
-       char             b_flag;        /* Flags                         */
+       short            b_flag;        /* Flags                         */
        char             b_fname[NFILEN]; /* File name                   */
        char             b_cwd[NFILEN]; /* working directory             */
        struct fileinfo  b_fi;          /* File attributes               */
@@ -297,6 +297,7 @@ struct buffer {
 #define BFDIRTY     0x20               /* Buffer was modified elsewhere */
 #define BFIGNDIRTY  0x40               /* Ignore modifications          */
 #define BFDIREDDEL  0x80               /* Dired has a deleted 'D' file  */
+#define BFSHOWRAW   0x100              /* Show unprintable as octal     */
 /*
  * This structure holds information about recent actions for the Undo command.
  */
@@ -497,6 +498,7 @@ int          digit_argument(int, int);
 int             negative_argument(int, int);
 int             selfinsert(int, int);
 int             quote(int, int);
+int             insert_char(int, int);
 
 /* main.c */
 int             ctrlg(int, int);
@@ -519,6 +521,8 @@ int          forwline(int, int);
 int             backline(int, int);
 void            setgoal(void);
 int             getgoal(struct line *);
+size_t          getbyteofcol(const struct line *, size_t, size_t);
+size_t          getcolofbyte(const struct line *, size_t, size_t, size_t);
 int             forwpage(int, int);
 int             backpage(int, int);
 int             forw1page(int, int);
@@ -665,6 +669,7 @@ int          notabmode(int, int);
 #endif /* NOTAB */
 int             overwrite_mode(int, int);
 int             set_default_mode(int,int);
+int             show_raw_mode(int, int);
 
 #ifdef REGEX
 /* re_search.c X */
diff --git a/display.c b/display.c
index 7af723c..104f873 100644
--- a/display.c
+++ b/display.c
@@ -7,7 +7,7 @@
  * redisplay system knows almost nothing about the editing
  * process; the editing functions do, however, set some
  * hints to eliminate a lot of the grinding. There is more
- * that can be done; the "vtputc" interface is a real
+ * that can be done; the "vtputs" interface is a real
  * pig.
  */
 
@@ -18,6 +18,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <term.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "kbd.h"
@@ -52,11 +53,10 @@ struct score {
 };
 
 void   vtmove(int, int);
-void   vtputc(int);
 void   vtpute(int);
-int    vtputs(const char *);
+int    vtputs(const char *, size_t, size_t);
 void   vteeol(void);
-void   updext(int, int);
+int    updext(int, int);
 void   modeline(struct mgwin *, int);
 void   setscores(int, int);
 void   traceback(int, int, int, int);
@@ -216,8 +216,8 @@ vtresize(int force, int newrow, int newcol)
        }
        if (rowchanged || colchanged || first_run) {
                for (i = 0; i < 2 * (newrow - 1); i++)
-                       TRYREALLOC(video[i].v_text, newcol);
-               TRYREALLOC(blanks.v_text, newcol);
+                       TRYREALLOC(video[i].v_text, newcol * MB_CUR_MAX);
+               TRYREALLOC(blanks.v_text, newcol * MB_CUR_MAX);
        }
 
        nrow = newrow;
@@ -260,7 +260,7 @@ vtinit(void)
         */
 
        blanks.v_color = CTEXT;
-       for (i = 0; i < ncol; ++i)
+       for (i = 0; i < ncol * MB_CUR_MAX; ++i)
                blanks.v_text[i] = ' ';
 }
 
@@ -287,7 +287,7 @@ vttidy(void)
  * Move the virtual cursor to an origin
  * 0 spot on the virtual display screen. I could
  * store the column as a character pointer to the spot
- * on the line, which would make "vtputc" a little bit
+ * on the line, which would make "vtputs" a little bit
  * more efficient. No checking for errors.
  */
 void
@@ -298,82 +298,6 @@ vtmove(int row, int col)
 }
 
 /*
- * Write a character to the virtual display,
- * dealing with long lines and the display of unprintable
- * things like control characters. Also expand tabs every 8
- * columns. This code only puts printing characters into
- * the virtual display image. Special care must be taken when
- * expanding tabs. On a screen whose width is not a multiple
- * of 8, it is possible for the virtual cursor to hit the
- * right margin before the next tab stop is reached. This
- * makes the tab code loop if you are not careful.
- * Three guesses how we found this.
- */
-void
-vtputc(int c)
-{
-       struct video    *vp;
-
-       c &= 0xff;
-
-       vp = vscreen[vtrow];
-       if (vtcol >= ncol)
-               vp->v_text[ncol - 1] = '$';
-       else if (c == '\t'
-#ifdef NOTAB
-           && !(curbp->b_flag & BFNOTAB)
-#endif
-           ) {
-               do {
-                       vtputc(' ');
-               } while (vtcol < ncol && (vtcol & 0x07) != 0);
-       } else if (ISCTRL(c)) {
-               vtputc('^');
-               vtputc(CCHR(c));
-       } else if (isprint(c))
-               vp->v_text[vtcol++] = c;
-       else {
-               char bf[5];
-
-               snprintf(bf, sizeof(bf), "\\%o", c);
-               vtputs(bf);
-       }
-}
-
-/*
- * Put a character to the virtual screen in an extended line.  If we are not
- * yet on left edge, don't print it yet.  Check for overflow on the right
- * margin.
- */
-void
-vtpute(int c)
-{
-       struct video *vp;
-
-       c &= 0xff;
-
-       vp = vscreen[vtrow];
-       if (vtcol >= ncol)
-               vp->v_text[ncol - 1] = '$';
-       else if (c == '\t'
-#ifdef NOTAB
-           && !(curbp->b_flag & BFNOTAB)
-#endif
-           ) {
-               do {
-                       vtpute(' ');
-               } while (((vtcol + lbound) & 0x07) != 0 && vtcol < ncol);
-       } else if (ISCTRL(c) != FALSE) {
-               vtpute('^');
-               vtpute(CCHR(c));
-       } else {
-               if (vtcol >= 0)
-                       vp->v_text[vtcol] = c;
-               ++vtcol;
-       }
-}
-
-/*
  * Erase from the end of the software cursor to the end of the line on which
  * the software cursor is located. The display routines will decide if a
  * hardware erase to end of line command should be used to display this.
@@ -384,7 +308,7 @@ vteeol(void)
        struct video *vp;
 
        vp = vscreen[vtrow];
-       while (vtcol < ncol)
+       while (vtcol < ncol * MB_CUR_MAX)
                vp->v_text[vtcol++] = ' ';
 }
 
@@ -404,7 +328,7 @@ update(int modelinecolor)
        struct mgwin    *wp;
        struct video    *vp1;
        struct video    *vp2;
-       int      c, i, j;
+       int      c, i;
        int      hflag;
        int      currow, curcol;
        int      offs, size;
@@ -479,8 +403,9 @@ update(int modelinecolor)
                        vscreen[i]->v_color = CTEXT;
                        vscreen[i]->v_flag |= (VFCHG | VFHBAD);
                        vtmove(i, 0);
-                       for (j = 0; j < llength(lp); ++j)
-                               vtputc(lgetc(lp, j));
+                       if (llength(lp)) {
+                               vtputs(lp->l_text, llength(lp), 0);
+                       }
                        vteeol();
                } else if ((wp->w_rflag & (WFEDIT | WFFULL)) != 0) {
                        hflag = TRUE;
@@ -489,8 +414,10 @@ update(int modelinecolor)
                                vscreen[i]->v_flag |= (VFCHG | VFHBAD);
                                vtmove(i, 0);
                                if (lp != wp->w_bufp->b_headp) {
-                                       for (j = 0; j < llength(lp); ++j)
-                                               vtputc(lgetc(lp, j));
+                                       if (llength(lp)) {
+                                               vtputs(lp->l_text, llength(lp),
+                                                   0);
+                                       }
                                        lp = lforw(lp);
                                }
                                vteeol();
@@ -508,32 +435,53 @@ update(int modelinecolor)
                ++currow;
                lp = lforw(lp);
        }
+
        curcol = 0;
        i = 0;
        while (i < curwp->w_doto) {
-               c = lgetc(lp, i++);
+               char tmp[5];
+               c = lgetc(lp, i);
                if (c == '\t'
 #ifdef NOTAB
                    && !(curbp->b_flag & BFNOTAB)
 #endif
                        ) {
-                       curcol |= 0x07;
                        curcol++;
+                       while ((curcol - lbound) & 0x07) {
+                               curcol++;
+                       }
                } else if (ISCTRL(c) != FALSE)
                        curcol += 2;
-               else if (isprint(c))
+               else if (isprint(c)) {
                        curcol++;
-               else {
-                       char bf[5];
-
-                       snprintf(bf, sizeof(bf), "\\%o", c);
-                       curcol += strlen(bf);
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       } else {
+                               memset(&mbs, 0, sizeof mbs);
+                       }
+                       if (width >= 0) {
+                               curcol += width;
+                               i += (consumed - 1);
+                       } else {
+                               snprintf(tmp, sizeof(tmp), "\\%o", c);
+                               curcol += strlen(tmp);
+                       }
+               } else {
+                       snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       curcol += strlen(tmp);
                }
+               i++;
        }
        if (curcol >= ncol - 1) {       /* extended line. */
                /* flag we are extended and changed */
                vscreen[currow]->v_flag |= VFEXT | VFCHG;
-               updext(currow, curcol); /* and output extended line */
+               curcol = updext(currow, curcol);
        } else
                lbound = 0;     /* not extended line */
 
@@ -552,8 +500,10 @@ update(int modelinecolor)
                                if ((wp != curwp) || (lp != wp->w_dotp) ||
                                    (curcol < ncol - 1)) {
                                        vtmove(i, 0);
-                                       for (j = 0; j < llength(lp); ++j)
-                                               vtputc(lgetc(lp, j));
+                                       if (llength(lp)) {
+                                               vtputs(lp->l_text, llength(lp),
+                                                   0);
+                                       }
                                        vteeol();
                                        /* this line no longer is extended */
                                        vscreen[i]->v_flag &= ~VFEXT;
@@ -655,39 +605,44 @@ ucopy(struct video *vvp, struct video *pvp)
        pvp->v_hash = vvp->v_hash;
        pvp->v_cost = vvp->v_cost;
        pvp->v_color = vvp->v_color;
-       bcopy(vvp->v_text, pvp->v_text, ncol);
+       bcopy(vvp->v_text, pvp->v_text, ncol * MB_CUR_MAX);
 }
 
 /*
- * updext: update the extended line which the cursor is currently on at a
- * column greater than the terminal width. The line will be scrolled right or
- * left to let the user see where the cursor is.
+ * updext: update the extended line which the cursor is currently on
+ * at a column greater than the terminal width. The line will be
+ * scrolled right or left to let the user see where the cursor
+ * is. curcol may need to be adjusted, depending on how wide
+ * characters and lbound interact, that adjusted position is returned.
  */
-void
+int
 updext(int currow, int curcol)
 {
-       struct line     *lp;                    /* pointer to current line */
-       int      j;                     /* index into line */
+       struct line *lp = curwp->w_dotp;        /* pointer to current line */
+       size_t startbyte;
+       int bettercol = curcol;
+       size_t fullextent;
 
        if (ncol < 2)
-               return;
+               return curcol;
 
        /*
-        * calculate what column the left bound should be
-        * (force cursor into middle half of screen)
+        * calculate what column the left bound should be (force
+        * cursor into middle half of screen). Ensuring that it is at
+        * a tabstop allows update() to calculate curcol without
+        * wondering how tabstops are calculated before the first '$'.
         */
        lbound = curcol - (curcol % (ncol >> 1)) - (ncol >> 2);
-
-       /*
-        * scan through the line outputing characters to the virtual screen
-        * once we reach the left edge
-        */
-       vtmove(currow, -lbound);                /* start scanning offscreen */
-       lp = curwp->w_dotp;                     /* line to output */
-       for (j = 0; j < llength(lp); ++j)       /* until the end-of-line */
-               vtpute(lgetc(lp, j));
-       vteeol();                               /* truncate the virtual line */
-       vscreen[currow]->v_text[0] = '$';       /* and put a '$' in column 1 */
+       lbound = (lbound | 0x07) + 1;
+       vscreen[currow]->v_text[0] = '$';
+       vtmove(currow, 1);
+       startbyte = getbyteofcol(lp, 0, lbound + 1);
+       fullextent = getbyteofcol(lp, startbyte, ncol + 1);
+       vtputs(lp->l_text + startbyte, fullextent - startbyte, 1);
+       vteeol();
+
+       bettercol = lbound + getcolofbyte(lp, startbyte, 1, curwp->w_doto);
+       return (bettercol);
 }
 
 /*
@@ -702,12 +657,35 @@ updext(int currow, int curcol)
 void
 uline(int row, struct video *vvp, struct video *pvp)
 {
-       char  *cp1;
-       char  *cp2;
-       char  *cp3;
-       char  *cp4;
-       char  *cp5;
+       char  *cp1; /* Pointer to the start of dirty region */
+       char  *cp2; /* pvp's counterpart for cp1 */
+       char  *cp3; /* Pointer to end of dirty region */
+       char  *cp4; /* pvp's counterpart for cp3 */
+       char  *cp5; /* After this, within dirty region, all is ' ' */
+       char  *mbcounter;
        int    nbflag;
+       int    startcol; /* onscreen column matching cp1 */
+       char  *lastbyte; /* byte which handles last onscreen column  */
+       int    seencols = 0;
+       mbstate_t mbs = { 0 };
+       wchar_t wc = 0;
+
+       lastbyte = vvp->v_text;
+       while (seencols < ncol && *lastbyte) {
+               size_t consumed = mbrtowc(&wc, lastbyte,
+                   (vvp->v_text + ncol * MB_CUR_MAX - lastbyte), &mbs);
+               if (consumed < (size_t) -2) {
+                       lastbyte += consumed;
+                       seencols += wcwidth(wc);
+               } else {
+                       lastbyte++;
+                       seencols++;
+                       memset(&mbs, 0, sizeof mbs);
+               }
+       }
+       if (lastbyte - vvp->v_text < ncol) {
+               lastbyte = &vvp->v_text[ncol];
+       }
 
        if (vvp->v_color != pvp->v_color) {     /* Wrong color, do a     */
                ttmove(row, 0);                 /* full redraw.          */
@@ -723,11 +701,12 @@ uline(int row, struct video *vvp, struct video *pvp)
                 * putting the invisible glitch character on the next line.
                 * (Hazeltine executive 80 model 30)
                 */
-               cp2 = &vvp->v_text[ncol - (magic_cookie_glitch >= 0 ?
-                   (magic_cookie_glitch != 0 ? magic_cookie_glitch : 1) : 0)];
+               cp2 = lastbyte -
+                   (magic_cookie_glitch >= 0 ? (magic_cookie_glitch != 0 ?
+                   magic_cookie_glitch : 1) : 0);
 #else
                cp1 = &vvp->v_text[0];
-               cp2 = &vvp->v_text[ncol];
+               cp2 = lastbyte;
 #endif
                while (cp1 != cp2) {
                        ttputc(*cp1++);
@@ -738,21 +717,31 @@ uline(int row, struct video *vvp, struct video *pvp)
        }
        cp1 = &vvp->v_text[0];          /* Compute left match.   */
        cp2 = &pvp->v_text[0];
-       while (cp1 != &vvp->v_text[ncol] && cp1[0] == cp2[0]) {
+       while (cp1 != lastbyte && cp1[0] == cp2[0]) {
                ++cp1;
                ++cp2;
        }
-       if (cp1 == &vvp->v_text[ncol])  /* All equal.            */
+       if (cp1 == lastbyte)    /* All equal.            */
                return;
+       while (cp1 != vvp->v_text && !isprint(*cp1) &&
+              mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {
+               --cp1;
+               --cp2;
+       }
        nbflag = FALSE;
-       cp3 = &vvp->v_text[ncol];       /* Compute right match.  */
-       cp4 = &pvp->v_text[ncol];
+       cp3 = lastbyte; /* Compute right match.  */
+       cp4 = &pvp->v_text[lastbyte - vvp->v_text];
        while (cp3[-1] == cp4[-1]) {
                --cp3;
                --cp4;
                if (cp3[0] != ' ')      /* Note non-blanks in    */
                        nbflag = TRUE;  /* the right match.      */
        }
+       while (cp3 != lastbyte && !isprint(*cp3) &&
+               mbrtowc(&wc, cp3, (lastbyte - cp3), &mbs) >= (size_t) -2) {
+               ++cp3;
+               ++cp4;
+       }
        cp5 = cp3;                      /* Is erase good?        */
        if (nbflag == FALSE && vvp->v_color == CTEXT) {
                while (cp5 != cp1 && cp5[-1] == ' ')
@@ -762,13 +751,27 @@ uline(int row, struct video *vvp, struct video *pvp)
                        cp5 = cp3;
        }
        /* Alcyon hack */
-       ttmove(row, (int) (cp1 - &vvp->v_text[0]));
+       startcol = 0;
+       mbcounter = vvp->v_text;
+       while ((cp1 - mbcounter) > 0) {
+               size_t consumed = mbrtowc(&wc, mbcounter, (cp1 - mbcounter),
+                                         &mbs);
+               if (consumed < (size_t) -2) {
+                       mbcounter += consumed;
+                       startcol += wcwidth(wc);
+               } else {
+                       mbcounter++;
+                       startcol++;
+                       memset(&mbs, 0, sizeof mbs);
+               }
+       }
+       ttmove(row, startcol);
 #ifdef STANDOUT_GLITCH
        if (vvp->v_color != CTEXT && magic_cookie_glitch > 0) {
                if (cp1 < &vvp->v_text[magic_cookie_glitch])
                        cp1 = &vvp->v_text[magic_cookie_glitch];
-               if (cp5 > &vvp->v_text[ncol - magic_cookie_glitch])
-                       cp5 = &vvp->v_text[ncol - magic_cookie_glitch];
+               if (cp5 > lastbyte - magic_cookie_glitch)
+                       cp5 = lastbyte - magic_cookie_glitch;
        } else if (magic_cookie_glitch < 0)
 #endif
                ttcolor(vvp->v_color);
@@ -801,46 +804,39 @@ modeline(struct mgwin *wp, int modelinecolor)
        vscreen[n]->v_flag |= (VFCHG | VFHBAD); /* Recompute, display.   */
        vtmove(n, 0);                           /* Seek to right line.   */
        bp = wp->w_bufp;
-       vtputc('-');
-       vtputc('-');
+       n = vtputs("--", 0, 0);
        if ((bp->b_flag & BFREADONLY) != 0) {
-               vtputc('%');
+               n += vtputs("%", 0, n);
                if ((bp->b_flag & BFCHG) != 0)
-                       vtputc('*');
+                       n += vtputs("*", 0, n);
                else
-                       vtputc('%');
+                       n += vtputs("%", 0, n);
        } else if ((bp->b_flag & BFCHG) != 0) { /* "*" if changed.       */
-               vtputc('*');
-               vtputc('*');
+               n += vtputs("**", 0, n);
        } else {
-               vtputc('-');
-               vtputc('-');
+               n += vtputs("--", 0, n);
        }
-       vtputc('-');
+       n += vtputs("-", 0, n);
        n = 5;
-       n += vtputs("Mg: ");
+       n += vtputs("Mg: ", 0, n);
        if (bp->b_bname[0] != '\0')
-               n += vtputs(&(bp->b_bname[0]));
+               n += vtputs(&(bp->b_bname[0]), 0, n);
        while (n < 42) {                        /* Pad out with blanks.  */
-               vtputc(' ');
-               ++n;
+               n += vtputs(" ", 0, n);
        }
-       vtputc('(');
-       ++n;
+       n += vtputs("(", 0, n);
        for (md = 0; ; ) {
-               n += vtputs(bp->b_modes[md]->p_name);
+               n += vtputs(bp->b_modes[md]->p_name, 0, n);
                if (++md > bp->b_nmodes)
                        break;
-               vtputc('-');
-               ++n;
+               n += vtputs("-", 0, n);
        }
        /* XXX These should eventually move to a real mode */
        if (macrodef == TRUE)
-               n += vtputs("-def");
+               n += vtputs("-def", 0, n);
        if (globalwd == TRUE)
-               n += vtputs("-gwd");
-       vtputc(')');
-       ++n;
+               n += vtputs("-gwd", 0, n);
+       n += vtputs(")", 0, n);
 
        if (linenos && colnos)
                len = snprintf(sl, sizeof(sl), "--L%d--C%d", wp->w_dotline,
@@ -850,27 +846,132 @@ modeline(struct mgwin *wp, int modelinecolor)
        else if (colnos)
                len = snprintf(sl, sizeof(sl), "--C%d", getcolpos(wp));
        if ((linenos || colnos) && len < sizeof(sl) && len != -1)
-               n += vtputs(sl);
+               n += vtputs(sl, 0, n);
 
        while (n < ncol) {                      /* Pad out.              */
-               vtputc('-');
-               ++n;
+               n += vtputs("-", 0, n);
        }
 }
 
 /*
- * Output a string to the mode line, report how long it was.
+ * Output a string to the mode line, report how long it was,
+ * dealing with long lines and the display of unprintable
+ * things like control characters. Also expand tabs every 8
+ * columns. This code only puts printing characters into
+ * the virtual display image. Special care must be taken when
+ * expanding tabs. On a screen whose width is not a multiple
+ * of 8, it is possible for the virtual cursor to hit the
+ * right margin before the next tab stop is reached. This
+ * makes the tab code loop if you are not careful.
+ * Three guesses how we found this.
  */
 int
-vtputs(const char *s)
+vtputs(const char *s, const size_t max_bytes, const size_t initial_col)
 {
-       int n = 0;
+       const unsigned char *us = (const unsigned char *) s;
+       struct video *vp = vscreen[vtrow];
+       size_t bytes_handled = 0;
+       size_t last_full_byte_start = vtcol;
+       size_t space_printed = 0;
+
+       if (!s) {
+               return (0);
+       }
+
+       while (*us && (!max_bytes || bytes_handled < max_bytes)) {
+               if (space_printed + initial_col >= ncol) {
+                       break;
+               } else if (*us == '\t'
+#ifdef NOTAB
+                          && !(curbp->b_flag & BFNOTAB)
+#endif
+                          ) {
+                       last_full_byte_start = vtcol;
+                       do {
+                               if (vtcol >= 0) {
+                                       last_full_byte_start = vtcol;
+                                       vp->v_text[vtcol] = ' ';
+                               }
+                               vtcol++;
+                               space_printed++;
+                       } while (space_printed + initial_col < ncol &&
+                                ((space_printed + initial_col) & 0x07));
+                       us++;
+                       bytes_handled++;
+               } else if (ISCTRL(*us)) {
+                       last_full_byte_start = vtcol;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = '^';
+                       }
+                       vtcol++;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = CCHR(*us);
+                       }
+                       vtcol++;
+                       bytes_handled++;
+                       space_printed += 2;
+                       us++;
+               } else if (isprint(*us)) {
+                       last_full_byte_start = vtcol;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = *us++;
+                       }
+                       vtcol++;
+                       bytes_handled++;
+                       space_printed++;
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumable = max_bytes ?
+                               (max_bytes - bytes_handled) : -1;
+                       size_t consumed = mbrtowc(&wc, (const char *)us,
+                                                 consumable, &mbs);
+                       int width = -1;
+                       last_full_byte_start = vtcol;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               bytes_handled += consumed;
+                               space_printed += width;
+                               do {
+                                       if (vtcol >= 0) {
+                                               vp->v_text[vtcol] = *us++;
+                                       }
+                                       vtcol++;
+                               } while (--consumed);
+                       } else {
+                               char bf[5];
+                               snprintf(bf, sizeof(bf), "\\%o", *us);
+                               bytes_handled++;
+                               space_printed += vtputs(bf, 0,
+                                   space_printed + initial_col);
+                               us++;
+                       }
+               } else {
+                       char bf[5];
+                       last_full_byte_start = vtcol;
+                       snprintf(bf, sizeof(bf), "\\%o", *us);
+                       bytes_handled++;
+                       space_printed += vtputs(bf, 0,
+                           space_printed + initial_col);
+                       us++;
+               }
+       }
 
-       while (*s != '\0') {
-               vtputc(*s++);
-               ++n;
+       if ((space_printed + initial_col > ncol) ||
+           (space_printed + initial_col == ncol &&
+           (*us && (!max_bytes || bytes_handled < max_bytes)))) {
+               vp->v_text[last_full_byte_start] = '$';
+               while (++last_full_byte_start <= vtcol) {
+                       vp->v_text[last_full_byte_start] = ' ';
+               }
+               bytes_handled++;
+               space_printed++;
+               us++;
        }
-       return (n);
+
+       return (space_printed);
 }
 
 /*
@@ -888,11 +989,11 @@ hash(struct video *vp)
        char   *s;
 
        if ((vp->v_flag & VFHBAD) != 0) {       /* Hash bad.             */
-               s = &vp->v_text[ncol - 1];
-               for (i = ncol; i != 0; --i, --s)
+               s = &vp->v_text[ncol * MB_CUR_MAX - 1];
+               for (i = ncol * MB_CUR_MAX; i != 0; --i, --s)
                        if (*s != ' ')
                                break;
-               n = ncol - i;                   /* Erase cheaper?        */
+               n = ncol * MB_CUR_MAX - i;                      /* Erase 
cheaper?        */
                if (n > tceeol)
                        n = tceeol;
                vp->v_cost = i + n;             /* Bytes + blanks.       */
diff --git a/echo.c b/echo.c
index 6966c00..22b8333 100644
--- a/echo.c
+++ b/echo.c
@@ -844,9 +844,11 @@ ewprintf(const char *fmt, ...)
  * %k prints the name of the current key (and takes no arguments).
  * %d prints a decimal integer
  * %o prints an octal integer
+ * %x prints a hexadecimal integer
  * %p prints a pointer
  * %s prints a string
  * %ld prints a long word
+ * %lx prints a hexadecimal long word
  * Anything else is echoed verbatim
  */
 static void
@@ -885,6 +887,10 @@ eformat(const char *fp, va_list ap)
                                eputi(va_arg(ap, int), 8);
                                break;
 
+                       case 'x':
+                               eputi(va_arg(ap, int), 16);
+                               break;
+
                        case 'p':
                                snprintf(tmp, sizeof(tmp), "%p",
                                    va_arg(ap, void *));
@@ -902,6 +908,9 @@ eformat(const char *fp, va_list ap)
                                case 'd':
                                        eputl(va_arg(ap, long), 10);
                                        break;
+                               case 'x':
+                                       eputl(va_arg(ap, long), 16);
+                                       break;
                                default:
                                        eputc(c);
                                        break;
@@ -939,6 +948,7 @@ static void
 eputl(long l, int r)
 {
        long     q;
+       int      c;
 
        if (l < 0) {
                eputc('-');
@@ -946,7 +956,10 @@ eputl(long l, int r)
        }
        if ((q = l / r) != 0)
                eputl(q, r);
-       eputc((int)(l % r) + '0');
+       c = (int)(l % r) + '0';
+       if (c > '9')
+               c += 'a' - '9' - 1;
+       eputc(c);
 }
 
 /*
diff --git a/funmap.c b/funmap.c
index bd555d6..7d88b75 100644
--- a/funmap.c
+++ b/funmap.c
@@ -114,6 +114,7 @@ static struct funmap functnames[] = {
        {bufferinsert, "insert-buffer",},
        {fileinsert, "insert-file",},
        {fillword, "insert-with-wrap",},
+       {insert_char, "insert-char",},
        {backisearch, "isearch-backward",},
        {forwisearch, "isearch-forward",},
        {joinline, "join-line",},
@@ -191,6 +192,7 @@ static struct funmap functnames[] = {
        {shellcommand, "shell-command",},
        {piperegion, "shell-command-on-region",},
        {shrinkwind, "shrink-window",},
+       {show_raw_mode, "show-raw-mode",},
 #ifdef NOTAB
        {space_to_tabstop, "space-to-tabstop",},
 #endif /* NOTAB */
diff --git a/kbd.c b/kbd.c
index 1d7a1a2..f613b10 100644
--- a/kbd.c
+++ b/kbd.c
@@ -9,6 +9,8 @@
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "kbd.h"
@@ -406,6 +408,43 @@ quote(int f, int n)
 }
 
 /*
+ * Prompt for a codepoint in whatever the native system's encoding is,
+ * insert it into the file
+ */
+int
+insert_char(int f, int n)
+{
+       char *bufp;
+       char inpbuf[32];
+       wchar_t wc;
+       char mb[MB_CUR_MAX + 1];
+       mbstate_t mbs = { 0 };
+       size_t mbslen;
+       size_t i;
+
+       if ((bufp = eread("Insert character (hex): ", inpbuf, sizeof inpbuf,
+            EFNEW)) == NULL) {
+               return (ABORT);
+       } else if (bufp[0] == '\0') {
+               return (FALSE);
+       }
+
+       wc = (wchar_t) strtoll(bufp, NULL, 16);
+       mbslen = wcrtomb(mb, wc, &mbs);
+       if (mbslen == (size_t) -1) {
+               return (FALSE);
+       }
+
+       for (i = 0; i < mbslen; ++i) {
+               if (linsert(1, mb[i]) == FALSE) {
+                       return (FALSE);
+               }
+       }
+
+       return (TRUE);
+}
+
+/*
  * Wraper function to count invocation repeats.
  * We ignore any function whose sole purpose is to get us
  * to the intended function.
diff --git a/keymap.c b/keymap.c
index ef71f84..ab23182 100644
--- a/keymap.c
+++ b/keymap.c
@@ -120,6 +120,21 @@ static struct KEYMAPE (2) cX4map = {
        }
 };
 
+static PF cX8J[] = {
+       insert_char             /* ^M */
+};
+
+static struct KEYMAPE (1) cX8map = {
+       1,
+       1,
+       rescan,
+       {
+               {
+                       CCHR('M'), CCHR('M'), cX8J, NULL
+               }
+       }
+};
+
 static PF cXcB[] = {
        listbuffers,            /* ^B */
        quit,                   /* ^C */
@@ -158,6 +173,10 @@ static PF cX0[] = {
        NULL                    /* 4 */
 };
 
+static PF cX8[] = {
+       NULL                    /* 4 */
+};
+
 static PF cXeq[] = {
        showcpos                /* = */
 };
@@ -189,9 +208,9 @@ static PF cXcar[] = {
        undo                    /* u */
 };
 
-struct KEYMAPE (6) cXmap = {
-       6,
-       6,
+struct KEYMAPE (7) cXmap = {
+       7,
+       7,
        rescan,
        {
                {
@@ -207,6 +226,9 @@ struct KEYMAPE (6) cXmap = {
                        '0', '4', cX0, (KEYMAP *) & cX4map
                },
                {
+                       '8', '8', cX8, (KEYMAP *) & cX8map
+               },
+               {
                        '=', '=', cXeq, NULL
                },
                {
@@ -491,6 +513,18 @@ static struct KEYMAPE (1) overwmap = {
        }
 };
 
+static struct KEYMAPE (1) rawmap = {
+       0,
+       1,              /* 1 to avoid 0 sized array */
+       rescan,
+       {
+               /* unused dummy entry for VMS C */
+               {
+                       (KCHAR)0, (KCHAR)0, NULL, NULL
+               }
+       }
+};
+
 
 /*
  * The basic (root) keyboard map
@@ -513,6 +547,7 @@ static struct maps_s map_table[] = {
        {(KEYMAP *) &notabmap, "notab",},
 #endif /* NOTAB */
        {(KEYMAP *) &overwmap, "overwrite",},
+       {(KEYMAP *) &rawmap, "raw",},
        {(KEYMAP *) &metamap, "esc prefix",},
        {(KEYMAP *) &cXmap, "c-x prefix",},
        {(KEYMAP *) &cX4map, "c-x 4 prefix",},
diff --git a/modes.c b/modes.c
index 027a5cd..2013bcb 100644
--- a/modes.c
+++ b/modes.c
@@ -111,6 +111,23 @@ overwrite_mode(int f, int n)
 }
 
 int
+show_raw_mode(int f, int n)
+{
+       if (changemode(f, n, "raw") == FALSE)
+               return (FALSE);
+       if (f & FFARG) {
+               if (n <= 0)
+                       curbp->b_flag &= ~BFSHOWRAW;
+               else
+                       curbp->b_flag |= BFSHOWRAW;
+       } else
+               curbp->b_flag ^= BFSHOWRAW;
+
+       sgarbf = TRUE;
+       return (TRUE);
+}
+
+int
 set_default_mode(int f, int n)
 {
        int      i;
@@ -170,5 +187,11 @@ set_default_mode(int f, int n)
                        defb_flag |= BFNOTAB;
        }
 #endif /* NOTAB */
+       if (strcmp(modebuf, "raw") == 0) {
+               if (n <= 0)
+                       defb_flag &= ~BFSHOWRAW;
+               else
+                       defb_flag |= BFSHOWRAW;
+       }
        return (TRUE);
 }
diff --git a/util.c b/util.c
index 9357d4d..dbfc058 100644
--- a/util.c
+++ b/util.c
@@ -13,6 +13,8 @@
 #include <ctype.h>
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 
@@ -33,6 +35,9 @@ showcpos(int f, int n)
        int      nline, row;
        int      cline, cbyte;          /* Current line/char/byte */
        int      ratio;
+       char     ismb = 0;
+       wchar_t  wc = 0;
+       char     mbc[MB_CUR_MAX + 1];
 
        /* collect the data */
        clp = bfirstlp(curbp);
@@ -69,8 +74,43 @@ showcpos(int f, int n)
                clp = lforw(clp);
        }
        ratio = nchar ? (100L * cchar) / nchar : 100;
-       ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d  col=%d",
-           cbyte, cbyte, cchar, ratio, cline, row, getcolpos(curwp));
+
+       if (!(curbp->b_flag & BFSHOWRAW)) {
+               mbstate_t mbs = { 0 };
+               size_t consumed = 0;
+               size_t offset = 0;
+               while (cbyte != '\n' && offset <= curwp->w_doto) {
+                       int c = lgetc(clp, curwp->w_doto - offset);
+                       if (isprint(c) || (ISCTRL(c) != FALSE)) {
+                               break;
+                       }
+                       consumed = mbrtowc(&wc,
+                                          &clp->l_text[curwp->w_doto - offset],
+                                          llength(clp) - curwp->w_doto + 
offset,
+                                          &mbs);
+                       if (consumed < (size_t) -2) {
+                               ismb = (offset < consumed);
+                               snprintf(mbc, consumed + 1, "%s",
+                                        &clp->l_text[curwp->w_doto - offset]);
+                               mbc[consumed + 1] = '\0';
+                               break;
+                       } else {
+                               memset(&mbs, 0, sizeof mbs);
+                       }
+                       offset++;
+               }
+       }
+
+       if (ismb) {
+               ewprintf("Char: %s (codepoint 0x%lx) Byte: %c (0%o)  "
+                        "point=%ld(%d%%)  line=%d  row=%d col=%d", mbc,
+                        (long) wc, cbyte, cbyte, cchar, ratio, cline, row,
+                        getcolpos(curwp));
+       } else {
+               ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d"
+                        "col=%d", cbyte, cbyte, cchar, ratio, cline, row,
+                        getcolpos(curwp));
+       }
        return (TRUE);
 }
 
@@ -96,6 +136,22 @@ getcolpos(struct mgwin *wp)
                        col += 2;
                else if (isprint(c)) {
                        col++;
+               } else if (!(wp->w_bufp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &wp->w_dotp->l_text[i],
+                                                 llength(wp->w_dotp) - i,
+                                                 &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               i += (consumed - 1);
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
                } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }

Reply via email to