Hi,

the file less/line.c currently contains three copies of code to
step one multibyte character to the left in a char * buffer, and
cleaning up the file less/cmdbuf.c will require similar functionality
in the future.  So let's introduce a new function for that purpose.
Keep it as similar as possible to the ANSI C function mbtowc(3).
Then again, there is no need to handle the case of the input char *
being NULL; for resetting, people should use mbtowc(3) itself, not
the new function.

The logic in less/cmdbuf.c is so contorted that cleaning that up
will still be challenging, but this at least provides one piece to
help with the puzzle.

Behaviour may change in LC_CTYPE=C mode if linebuf[curr-1] contains
an explicit '\0' byte.  I think in that case, jumping out of the
backc() function, just like it is already done for UTF-8 mode, i.e.
not trying to back up across the NUL byte, is more robust behaviour.
Either way, linebuf[] is not supposed to contain NUL bytes except
in -r mode, and -r mode is almost unusable in the first place.
No other functional change is intended.

Minus 16 lines of code, by the way...

OK?
  Ingo


Index: charset.c
===================================================================
RCS file: /cvs/src/usr.bin/less/charset.c,v
retrieving revision 1.24
diff -u -p -r1.24 charset.c
--- charset.c   15 May 2019 19:36:20 -0000      1.24
+++ charset.c   30 Aug 2019 13:02:01 -0000
@@ -146,6 +146,27 @@ init_charset(void)
 }
 
 /*
+ * Like mbtowc(3), except that it converts the multibyte character
+ * preceding ps rather than the one starting at ps.
+ */
+int
+mbtowc_left(wchar_t *pwc, const char *ps, size_t psz)
+{
+       size_t sz = 0;
+       int len;
+
+       do {
+               if (++sz > psz)
+                       return -1;
+       } while (utf_mode && IS_UTF8_TRAIL(ps[-sz]));
+       if ((len = mbtowc(pwc, ps - sz, sz)) == -1) {
+               (void)mbtowc(NULL, NULL, 0);
+               return -1;
+       }
+       return len == sz || (len == 0 && sz == 1) ? len : -1;
+}
+
+/*
  * Is a given character a "control" character?
  */
 static int
Index: funcs.h
===================================================================
RCS file: /cvs/src/usr.bin/less/funcs.h,v
retrieving revision 1.23
diff -u -p -r1.23 funcs.h
--- funcs.h     15 May 2019 19:36:20 -0000      1.23
+++ funcs.h     30 Aug 2019 13:02:02 -0000
@@ -55,6 +55,7 @@ void ch_set_eof(void);
 void ch_init(int, int);
 void ch_close(void);
 int ch_getflags(void);
+int mbtowc_left(wchar_t *, const char *, size_t);
 void init_charset(void);
 char *prchar(LWCHAR);
 char *prutfchar(LWCHAR);
Index: line.c
===================================================================
RCS file: /cvs/src/usr.bin/less/line.c,v
retrieving revision 1.31
diff -u -p -r1.31 line.c
--- line.c      15 May 2019 19:06:01 -0000      1.31
+++ line.c      30 Aug 2019 13:02:02 -0000
@@ -437,44 +437,20 @@ backc(void)
        wchar_t  ch, prev_ch;
        int      i, len, width;
 
-       i = curr - 1;
-       if (utf_mode) {
-               while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
-                       i--;
-       }
-       if (i < lmargin)
+       if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0)
                return (0);
-       if (utf_mode) {
-               len = mbtowc(&ch, linebuf + i, curr - i);
-               if (len == -1 || i + len < curr) {
-                       (void)mbtowc(NULL, NULL, MB_CUR_MAX);
-                       return (0);
-               }
-       } else
-               ch = linebuf[i];
+       curr -= len;
 
        /* This assumes that there is no '\b' in linebuf.  */
-       while (curr > lmargin && column > lmargin &&
-           (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
-               curr = i--;
-               if (utf_mode) {
-                       while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
-                               i--;
-               }
-               if (i < lmargin)
+       while (curr >= lmargin && column > lmargin &&
+           !(attr[curr] & (AT_ANSI|AT_BINARY))) {
+               if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0)
                        prev_ch = L'\0';
-               else if (utf_mode) {
-                       len = mbtowc(&prev_ch, linebuf + i, curr - i);
-                       if (len == -1 || i + len < curr) {
-                               (void)mbtowc(NULL, NULL, MB_CUR_MAX);
-                               prev_ch = L'\0';
-                       }
-               } else
-                       prev_ch = linebuf[i];
                width = pwidth(ch, attr[curr], prev_ch);
                column -= width;
                if (width > 0)
                        return (1);
+               curr -= len;
                if (prev_ch == L'\0')
                        return (0);
                ch = prev_ch;
@@ -554,21 +530,8 @@ store_char(LWCHAR ch, char a, char *rep,
        }
        if (w == -1) {
                wchar_t prev_ch;
-
-               if (utf_mode) {
-                       for (i = curr - 1; i >= 0; i--)
-                               if (!IS_UTF8_TRAIL(linebuf[i]))
-                                       break;
-                       if (i >= 0) {
-                               w = mbtowc(&prev_ch, linebuf + i, curr - i);
-                               if (w == -1 || i + w < curr) {
-                                       (void)mbtowc(NULL, NULL, MB_CUR_MAX);
-                                       prev_ch = L' ';
-                               }
-                       } else
-                               prev_ch = L' ';
-               } else
-                       prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';
+               if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0)
+                       prev_ch = L' ';
                w = pwidth(ch, a, prev_ch);
        }
 

Reply via email to