Hi,

thanks for checking the pappend() and filename.c patches,
those are now committed.

Here is basic cleanup of the last major function in line.c, pshift().
Several minor issues still remain in the file, but those are for later.

This gets rid of two LWCHAR variables, one call to utf_len(),
get_wchar(), is_composing_char(), is_combining_char(), and
control_char() each and two calls to is_wide_char().

Note the change from "continue" to "break" in the "width == 2"
block.  It is an improvement because the double-width character did
not move off-screen completely, its second half is still there,
represented as a single-width space.  So whatever follows it does
absolutely not move off-screen, not even if what follows is zero
width, so it must not be inspected by the loop.

Since the variable names are far from intuitive, improve and add
comments.  Also, the comment above pshift() is simply wrong.
The function does not discard a specific number of characters.


Tested by running

  LC_CTYPE=en_US.UTF-8 less -# 1
  LC_CTYPE=en_US.UTF-8 less -# 1 -R
  LC_CTYPE=C less -# 1

on my set of test files containing a wide variety of combinations
of valid and invalid UTF-8 and control sequences, then scrolling
horizontally with the arrow keys.

OK?
  Ingo


Index: line.c
===================================================================
RCS file: /cvs/src/usr.bin/less/line.c,v
retrieving revision 1.29
diff -u -p -r1.29 line.c
--- line.c      7 May 2019 14:16:16 -0000       1.29
+++ line.c      7 May 2019 20:52:26 -0000
@@ -32,11 +32,11 @@ int ntabstops = 1;          /* Number of tabstop
 int tabdefault = 8;            /* Default repeated tabstops */
 off_t highest_hilite;          /* Pos of last hilite in file found so far */
 
-static int curr;               /* Index into linebuf */
-static int column;     /* Printable length, accounting for backspaces, etc. */
+static int curr;               /* Total number of bytes in linebuf */
+static int column;             /* Display columns needed to show linebuf */
 static int overstrike;         /* Next char should overstrike previous char */
 static int is_null_line;       /* There is no current line */
-static int lmargin;            /* Left margin */
+static int lmargin;            /* Index in linebuf of start of content */
 static char pendc;
 static off_t pendpos;
 static char *end_ansi_chars;
@@ -202,20 +202,21 @@ plinenum(off_t pos)
 
 /*
  * Shift the input line left.
- * This means discarding N printable chars at the start of the buffer.
+ * Starting at lmargin, some bytes are discarded from the linebuf,
+ * until the number of display columns needed to show these bytes
+ * would exceed the argument.
  */
 static void
 pshift(int shift)
 {
-       LWCHAR prev_ch = 0;
-       unsigned char c;
-       int shifted = 0;
-       int to;
-       int from;
-       int len;
-       int width;
-       int prev_attr;
-       int next_attr;
+       int shifted = 0;  /* Number of display columns already discarded. */
+       int from;         /* Index in linebuf of the current character. */
+       int to;           /* Index in linebuf to move this character to. */
+       int len;          /* Number of bytes in this character. */
+       int width = 0;    /* Display columns needed for this character. */
+       int prev_attr;    /* Attributes of the preceding character. */
+       int next_attr;    /* Attributes of the following character. */
+       unsigned char c;  /* First byte of current character. */
 
        if (shift > column - lmargin)
                shift = column - lmargin;
@@ -241,44 +242,41 @@ pshift(int shift)
                        }
                        continue;
                }
-
-               width = 0;
-
                if (!IS_ASCII_OCTET(c) && utf_mode) {
-                       /* Assumes well-formedness validation already done.  */
-                       LWCHAR ch;
-
-                       len = utf_len(c);
-                       if (from + len > curr)
-                               break;
-                       ch = get_wchar(linebuf + from);
-                       if (!is_composing_char(ch) &&
-                           !is_combining_char(prev_ch, ch))
-                               width = is_wide_char(ch) ? 2 : 1;
-                       prev_ch = ch;
+                       wchar_t ch;
+                       /*
+                        * Before this point, UTF-8 validity was already
+                        * checked, but for additional safety, treat
+                        * invalid bytes as single-width characters
+                        * if they ever make it here.  Similarly, treat
+                        * non-printable characters as width 1.
+                        */
+                       len = mbtowc(&ch, linebuf + from, curr - from);
+                       if (len == -1)
+                               len = width = 1;
+                       else if ((width = wcwidth(ch)) == -1)
+                               width = 1;
                } else {
                        len = 1;
                        if (c == '\b')
                                /* XXX - Incorrect if several '\b' in a row.  */
-                               width = (utf_mode && is_wide_char(prev_ch)) ?
-                                   -2 : -1;
-                       else if (!control_char(c))
-                               width = 1;
-                       prev_ch = 0;
+                               width = width > 0 ? -width : -1;
+                       else
+                               width = iscntrl(c) ? 0 : 1;
                }
 
                if (width == 2 && shift - shifted == 1) {
-                       /* Should never happen when called by pshift_all().  */
-                       attr[to] = attr[from];
                        /*
-                        * Assume a wide_char will never be the first half of a
-                        * combining_char pair, so reset prev_ch in case we're
-                        * followed by a '\b'.
+                        * Move the first half of a double-width character
+                        * off screen.  Print a space instead of the second
+                        * half.  This should never happen when called
+                        * by pshift_all().
                         */
-                       prev_ch = linebuf[to++] = ' ';
+                       attr[to] = attr[from];
+                       linebuf[to++] = ' ';
                        from += len;
                        shifted++;
-                       continue;
+                       break;
                }
 
                /* Adjust width for magic cookies. */

Reply via email to