Hi, the file less/line.c currently contains three copies of code to step one multibyte character to the left in a char * buffer, and cleaning up the file less/cmdbuf.c will require similar functionality in the future. So let's introduce a new function for that purpose. Keep it as similar as possible to the ANSI C function mbtowc(3). Then again, there is no need to handle the case of the input char * being NULL; for resetting, people should use mbtowc(3) itself, not the new function.
The logic in less/cmdbuf.c is so contorted that cleaning that up will still be challenging, but this at least provides one piece to help with the puzzle. Behaviour may change in LC_CTYPE=C mode if linebuf[curr-1] contains an explicit '\0' byte. I think in that case, jumping out of the backc() function, just like it is already done for UTF-8 mode, i.e. not trying to back up across the NUL byte, is more robust behaviour. Either way, linebuf[] is not supposed to contain NUL bytes except in -r mode, and -r mode is almost unusable in the first place. No other functional change is intended. Minus 16 lines of code, by the way... OK? Ingo Index: charset.c =================================================================== RCS file: /cvs/src/usr.bin/less/charset.c,v retrieving revision 1.24 diff -u -p -r1.24 charset.c --- charset.c 15 May 2019 19:36:20 -0000 1.24 +++ charset.c 30 Aug 2019 13:02:01 -0000 @@ -146,6 +146,27 @@ init_charset(void) } /* + * Like mbtowc(3), except that it converts the multibyte character + * preceding ps rather than the one starting at ps. + */ +int +mbtowc_left(wchar_t *pwc, const char *ps, size_t psz) +{ + size_t sz = 0; + int len; + + do { + if (++sz > psz) + return -1; + } while (utf_mode && IS_UTF8_TRAIL(ps[-sz])); + if ((len = mbtowc(pwc, ps - sz, sz)) == -1) { + (void)mbtowc(NULL, NULL, 0); + return -1; + } + return len == sz || (len == 0 && sz == 1) ? len : -1; +} + +/* * Is a given character a "control" character? */ static int Index: funcs.h =================================================================== RCS file: /cvs/src/usr.bin/less/funcs.h,v retrieving revision 1.23 diff -u -p -r1.23 funcs.h --- funcs.h 15 May 2019 19:36:20 -0000 1.23 +++ funcs.h 30 Aug 2019 13:02:02 -0000 @@ -55,6 +55,7 @@ void ch_set_eof(void); void ch_init(int, int); void ch_close(void); int ch_getflags(void); +int mbtowc_left(wchar_t *, const char *, size_t); void init_charset(void); char *prchar(LWCHAR); char *prutfchar(LWCHAR); Index: line.c =================================================================== RCS file: /cvs/src/usr.bin/less/line.c,v retrieving revision 1.31 diff -u -p -r1.31 line.c --- line.c 15 May 2019 19:06:01 -0000 1.31 +++ line.c 30 Aug 2019 13:02:02 -0000 @@ -437,44 +437,20 @@ backc(void) wchar_t ch, prev_ch; int i, len, width; - i = curr - 1; - if (utf_mode) { - while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i])) - i--; - } - if (i < lmargin) + if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0) return (0); - if (utf_mode) { - len = mbtowc(&ch, linebuf + i, curr - i); - if (len == -1 || i + len < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - return (0); - } - } else - ch = linebuf[i]; + curr -= len; /* This assumes that there is no '\b' in linebuf. */ - while (curr > lmargin && column > lmargin && - (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) { - curr = i--; - if (utf_mode) { - while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i])) - i--; - } - if (i < lmargin) + while (curr >= lmargin && column > lmargin && + !(attr[curr] & (AT_ANSI|AT_BINARY))) { + if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0) prev_ch = L'\0'; - else if (utf_mode) { - len = mbtowc(&prev_ch, linebuf + i, curr - i); - if (len == -1 || i + len < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - prev_ch = L'\0'; - } - } else - prev_ch = linebuf[i]; width = pwidth(ch, attr[curr], prev_ch); column -= width; if (width > 0) return (1); + curr -= len; if (prev_ch == L'\0') return (0); ch = prev_ch; @@ -554,21 +530,8 @@ store_char(LWCHAR ch, char a, char *rep, } if (w == -1) { wchar_t prev_ch; - - if (utf_mode) { - for (i = curr - 1; i >= 0; i--) - if (!IS_UTF8_TRAIL(linebuf[i])) - break; - if (i >= 0) { - w = mbtowc(&prev_ch, linebuf + i, curr - i); - if (w == -1 || i + w < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - prev_ch = L' '; - } - } else - prev_ch = L' '; - } else - prev_ch = curr > 0 ? linebuf[curr - 1] : L' '; + if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0) + prev_ch = L' '; w = pwidth(ch, a, prev_ch); }