Hi John, mako,

El lun, 05-12-2005 a las 11:52 -0500, John E. Davis escribió:
> On Mon, 5 Dec 2005 10:39:46 -0500, "Benj. Mako Hill" <[EMAIL PROTECTED]> said:
> >> UTF-8 enabled Most, with working (and enabled by default during build)
> >> UTF-8 compliant RegExp searches.
> >
> >Wonderful! I've test out the patch and it works great on my
> >system. I've applied it in whole.
> 
> While it is a good start for UTF-8, it will require more work to
> integrate.  For example, the patch to buffer.c:forward_columns does
> not appear to properly handle tab characters, embedded backspaces,
> etc.  Such backspaces are used by manpages to simulate an overstrike,
> underline, etc, e.g.,

The attached patch fixes this case, plus other minor glitches with
multi-byte characters.

There is only one important glitch that I haven't been able to fix, and
seems like a bug in Slang or Gnome-Terminal. The attached file
most-test-long is displayed as expected, but some of the characters in
most-test-long-fmt wrap to the second line of the buffer. This seems to
be an artifact of formatting. As you'll see if you scroll the buffer to
the right, the few bold and underlined characters are sometimes
displayed with different spacing around them than in the plain version
of the document.

I've verified with the code below that most_analyse_line extracts at
most 80 columns (for my terminal) worth of text. However, the fact that
the terminal adds extra spacing when formatting is applied, causes the
output to wrap.

   len = most_analyse_line(beg, end, line, attr);
   p = line;
   for (i = 0; i < strlen(attr); i++) {
           SLwchar_Type wc;
           p = SLutf8_decode(p, line + line_len, &wc, NULL);
           if (p)
                   wc_len += SLwchar_wcwidth(wc);
   }
   assert(wc_len <= SLtt_Screen_Cols);

Cheers,
-- 
Javier Kohen <[EMAIL PROTECTED]>
ICQ: blashyrkh #2361802
Jabber: [EMAIL PROTECTED]
aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ 
aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ
aáホホビ_ビ_Хथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ 
aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ aáŴÓХथሰਗ਼בּホビХथሰਗ਼בּホビ
Sólo en most-4.10.2: config.log
Sólo en most-4.10.2: config.status
diff -ur most-4.10.2-2.debian-orig/debian/changelog most-4.10.2/debian/changelog
--- most-4.10.2-2.debian-orig/debian/changelog	2005-12-05 21:51:17.000000000 -0300
+++ most-4.10.2/debian/changelog	2005-12-06 04:57:47.000000000 -0300
@@ -1,3 +1,10 @@
+most (4.10.2-2.0.1) unstable; urgency=low
+
+  * Fixed minor glitches when displaying multi-byte characters.
+  * Fixed wrapped mode when using formatted output.
+
+ -- Javier Kohen <[EMAIL PROTECTED]>  Tue,  6 Dec 2005 04:57:01 -0300
+
 most (4.10.2-2) unstable; urgency=low
 
   * Patch from Javier Kohen to rework (and fix) RegExp searches, so they now
Sólo en most-4.10.2: Makefile
diff -ur most-4.10.2-2.debian-orig/src/buffer.c most-4.10.2/src/buffer.c
--- most-4.10.2-2.debian-orig/src/buffer.c	2005-12-05 21:51:17.000000000 -0300
+++ most-4.10.2/src/buffer.c	2005-12-06 04:48:06.000000000 -0300
@@ -62,11 +62,13 @@
 	if (*pos == '\n')
 	  {
 	     pos--; /* Skip back the new-line. */
-	     while ((pos > Most_Beg)
-		    && (*pos != '\n'))
-	       pos = SLutf8_bskip_char(Most_Beg, pos);
+	     /* This block is UTF-8 safe, because it only scans the
+		buffer for a new-line, and doesn't count
+		characters. */
+	     while ((pos > Most_Beg) && (*pos != '\n'))
+	       pos--;
 
-	     if (*pos != '\n') return pos;
+	     if (*pos != '\n') return Most_Beg;
 	     /* from here on *pos == '\n' */
 	     if (pos + 1 != cpos)
 	       return pos + 1;
@@ -77,9 +79,9 @@
 
    if (*pos != '\n')
      {
-	while ((pos > Most_Beg)
-	       && (*pos != '\n'))
-	  pos = SLutf8_bskip_char(Most_Beg, pos);
+	/* This block is UTF-8 safe. See comment above. */
+	while ((pos > Most_Beg) && (*pos != '\n'))
+	  pos--;
 	if (*pos != '\n') return Most_Beg;
 	/* from here on *pos == '\n' */
 	return pos + 1;
@@ -96,58 +98,6 @@
    return pos;
 }
 
-
-static unsigned char *forward_columns (unsigned char *b, unsigned char *e, unsigned int num_cols)
-{
-   unsigned int col = 0;
-
-   if (Most_UTF8_Mode)
-     return SLutf8_skip_chars(b, e, num_cols, &col, 0);
-
-   while ((b < e)
-	  && (col < num_cols))
-     {
-	unsigned char ch = *b++;
-
-	if (most_isprint(ch))
-	  {
-	     col++;
-	     continue;
-	  }
-	
-	if ((ch == '\b') || (ch == '\t') || (ch == '\r'))
-	  switch (ch)
-	    {
-	     case '\b':
-	       if (Most_V_Opt == 0)
-		 {
-		    if (col > 0) col--;
-		 }
-	       else col += 2;
-	       break;
-	       
-	     case '\r':
-	       if (Most_V_Opt == 0)
-		 col = 0;
-	       else 
-		 col += 2;
-	       break;
-	       
-	     case '\t':
-	       if (Most_T_Opt == 0)
-		 col = Most_Tab_Width * (col/Most_Tab_Width + 1);
-	       else
-		 col += 2;
-	       break;
-	    }
-	else if (ch & 0x80)
-	  col += 3;
-	else
-	  col += 2;
-     }
-   return b;
-}
-
 /* does not move point */
 static unsigned char *end_of_line1(void)
 {
@@ -170,6 +120,9 @@
 
    if (*pos != '\n')
      {
+	/* This block is UTF-8 safe, because it only scans the buffer
+	   for a new-line, and doesn't count characters. */
+
 	n = pmax - pos;
 	n2 = n % 8;
 	pmax = pos + (n - 8);
@@ -221,7 +174,7 @@
    ncols = SLtt_Screen_Cols-1;
    while (1)
      {
-	unsigned char *next_b = forward_columns (b, e, ncols);
+	unsigned char *next_b = most_forward_columns (b, e, ncols, 1);
 	if ((next_b == e) || (next_b == b))
 	  break;
 	
@@ -243,11 +196,12 @@
      return e;
 
    if (b == NULL) b = most_beg_of_line ();
-   b = forward_columns (b, e, SLtt_Screen_Cols-1);
+   b = most_forward_columns (b, e, SLtt_Screen_Cols-1, 1);
    
    /* Do not wrap the line if the last character falls on the last column 
     * of the display.
     */
+   /* FIXME potential bug if dealing with multi-byte char. */
    if ((b + 1 <= e) 
        && (b + 1 < Most_Eob) 
        && (b[1] == '\n'))
Sólo en most-4.10.2/src: buffer.c~
Sólo en most-4.10.2/src: config.h
diff -ur most-4.10.2-2.debian-orig/src/line.c most-4.10.2/src/line.c
--- most-4.10.2-2.debian-orig/src/line.c	2005-12-05 21:51:17.000000000 -0300
+++ most-4.10.2/src/line.c	2005-12-06 04:56:38.000000000 -0300
@@ -119,19 +119,17 @@
    return (ch >= ' ' && ch < 0x7F) || ch >= SLsmg_Display_Eight_Bit;
 }
 
-static int most_analyse_line(unsigned char *begg, unsigned char *endd, 
+static int most_analyse_line(unsigned char *beg, unsigned char *end,
 			     unsigned char *out, char *attributes)
 {
-   unsigned char *beg, *end, *pout;
-   unsigned int min_col, max_col;
+   unsigned char *pout;
+   char* pattributes;
    unsigned int i, i_max;
 
-   beg = begg;
-   end = endd;
+   beg = most_forward_columns(beg, end, Most_Column - 1, 0);
    pout = out;
+   pattributes = attributes;
    i = i_max = 0;
-   min_col = Most_Column - 1;
-   max_col = min_col + SLtt_Screen_Cols;
 
    while (beg < end)
      {
@@ -154,9 +152,19 @@
 	     if (i > 0)
 	       {
 		  if (Most_UTF8_Mode)
-		    pout = SLutf8_bskip_char(out, pout);
+		    {
+		       SLwchar_Type wc;
+		       pout = SLutf8_bskip_char(out, pout);
+		       if (SLutf8_decode(pout, pout + SLUTF8_MAX_MBLEN, &wc, NULL))
+			 {
+			    unsigned int char_len = SLwchar_wcwidth(wc);
+			    if (char_len > 1)
+			      i -= char_len - 1;
+			 }
+		    }
 		  else
 		    pout--;
+		  pattributes--;
 		  i--;
 	       }
 	     continue;
@@ -165,7 +173,7 @@
 	if (i < i_max)		       /* overstrike */
 	  {
 	     attr = 'b';
-	     if ((i >= min_col) && (i < max_col))
+	     if (i < SLtt_Screen_Cols)
 	       {
 		  if (*pout == '_')
 		    attr = 'u';
@@ -185,16 +193,26 @@
 
 	if (Most_UTF8_Mode) {
 	   unsigned char *prev = --beg;
-	   int len;
+	   SLwchar_Type wc;
+	   unsigned int len;
+
+	   if (SLutf8_decode(beg, end, &wc, NULL))
+	     {
+	        unsigned int char_len = SLwchar_wcwidth(wc);
+		if (char_len > 1)
+	          i += char_len - 1;
+	     }
+
 	   beg = SLutf8_skip_char(beg, end);
 	   len = beg - prev;
+
 	   if (len > 1) {
 	     /* Non-ASCII char, display it. */
-	     if ((i >= min_col) && (i < max_col))
+	     if (i < SLtt_Screen_Cols)
 	       {
 		  memcpy(pout, prev, len);
 		  pout += len;
-		  attributes[i-min_col] = attr;
+		  *pattributes++ = attr;
 	       }
 	     i++;
 	     continue;
@@ -203,10 +221,10 @@
 
 	if (most_isprint(ch))
 	  {
-	     if ((i >= min_col) && (i < max_col))
+	     if (i < SLtt_Screen_Cols)
 	       {
 		  *pout++ = ch;
-		  attributes[i-min_col] = attr;
+		  *pattributes++ = attr;
 	       }
 	     i++;
 	     continue;
@@ -218,10 +236,10 @@
 	     int nspaces = Most_Tab_Width * (i/Most_Tab_Width + 1) - i;
 	     while (nspaces > 0)
 	       {
-		  if ((i >= min_col) && (i < max_col))
+		  if (i < SLtt_Screen_Cols)
 		    {
 		       *pout++ = ' ';
-		       attributes[i-min_col] = attr;
+		       *pattributes++ = attr;
 		    }
 		  i++;
 		  nspaces--;
@@ -231,30 +249,30 @@
 
 	if (ch & 0x80)
 	  {
-	     if ((i >= min_col) && (i < max_col))
+	     if (i < SLtt_Screen_Cols)
 	       {
 		  *pout++ = '~';
-		  attributes[i-min_col] = attr;
+		  *pattributes++ = attr;
 	       }
 	     i++;
 	     ch &= 0x7F;
 	     /* drop */
 	  }
 	
-	if ((i >= min_col) && (i < max_col))
+	if (i < SLtt_Screen_Cols)
 	  {
 	     *pout++ = '^';
-	     attributes[i-min_col] = attr;
+	     *pattributes++ = attr;
 	  }
 	i++;
 	
 	if (ch == 0x7F) ch = '?';
 	else ch += '@';
 	
-	if ((i >= min_col) && (i < max_col))
+	if (i < SLtt_Screen_Cols)
 	  {
 	     *pout++ = ch;
-	     attributes[i-min_col] = attr;
+	     *pattributes++ = attr;
 	  }
 	i++;
      }
@@ -268,7 +286,7 @@
    if (Most_Selective_Display 
        && (Most_W_Opt == 0)
        && (beg < Most_Eob)
-       && ((i >= min_col) && (i < max_col)))
+       && (i < SLtt_Screen_Cols))
      {
 	if (*beg == '\n') beg++;
 
@@ -282,31 +300,22 @@
 	     i_max = i + 3;
 	     while (i < i_max)
 	       {
-		  if (i < max_col)
+		  if (i < SLtt_Screen_Cols)
 		    {
 		       *pout++ = '.';
-		       attributes[i] = ' ';
+		       *pattributes++ = ' ';
 		    }
 		  i++;
 	       }
 	  }
      }
-   
-   i_max = i;
-
-   if (i < min_col)
-     i = min_col;
-   else if (i >= max_col)
-     i = max_col;
-
-   i -= min_col;
 
    *pout = 0;
-   attributes[i] = 0;
-   return i_max;
+   *pattributes = 0;
+   return i;
 }
 
-static void output_with_attr (unsigned char *out, unsigned char *attr)
+static void output_with_attr (unsigned char *out, char *attr)
 {
    unsigned char at, lat;
    unsigned char *p = out;
@@ -361,8 +370,11 @@
 {
    unsigned char *beg, *end;
    unsigned int len;
+#if 0
+   unsigned char dollar;
+#endif
    static unsigned char *line;
-   static unsigned char *attr;
+   static char *attr;
    static unsigned int line_len;
 
    if (Most_B_Opt)
@@ -376,18 +388,42 @@
    if (line_len < (unsigned int)(SLtt_Screen_Cols + 1) * SLUTF8_MAX_MBLEN)
      {
 	SLfree ((char *) line);
-	SLfree ((char *) attr);
+	SLfree (attr);
 	
 	line_len = (SLtt_Screen_Cols + 1) * SLUTF8_MAX_MBLEN;
 	
 	if ((NULL == (line = (unsigned char *) SLmalloc (line_len)))
-	    || (NULL == (attr = (unsigned char *) SLmalloc (line_len))))
+	    || (NULL == (attr = SLmalloc (line_len))))
 	  most_exit_error ("Out of memory");
      }
 
    (void) most_extract_line (&beg, &end);
 
-   len = most_analyse_line(beg, end, line, (char *) attr);
+   len = most_analyse_line(beg, end, line, attr);
+
+#if 0
+   /* Currently the dollar sign is not always being written at the
+      rightmost column when displaying multi-byte characters. */
+   dollar = 0;
+   if (Most_W_Opt)
+     {
+       if ((end < Most_Eob)
+           && (*end != '\n'))
+         dollar = '\\';
+     }
+   else if (len > (unsigned int) SLtt_Screen_Cols + (Most_Column - 1))
+     dollar = '$';
+
+   if (dollar)
+     {
+       unsigned char *pline =
+	 most_forward_columns(line, line + line_len, SLtt_Screen_Cols-1, 1);
+       *pline = dollar;
+       *(pline+1) = 0;
+       attr[SLtt_Screen_Cols-1] = ' ';
+       attr[SLtt_Screen_Cols] = 0;
+     }
+#endif
 
    output_with_attr (line, attr);
    SLsmg_erase_eol ();
@@ -459,3 +495,90 @@
      }
    return i;
 }
+
+/*
+ * Returns a pointer to the num_cols'th character after the one
+ * pointed at b. Invisible character runs are not counted toward this
+ * limit, i.e. strings that represent attributes, such as "_\b" for
+ * underlines.
+ *
+ * If multi_column is non-zero, characters spanning more than one
+ * column will add their extra width to the column count.
+ *
+ * If there the end of the buffer is reached, as delimited by argument
+ * e, then e is returned.
+ */
+unsigned char *most_forward_columns (unsigned char *b, unsigned char *e, unsigned int num_cols, int multi_column)
+{
+   unsigned int col = 0;
+   unsigned int prev_width = 1;
+
+   while ((b < e)
+	  && ((col < num_cols)
+	      || (*b == '\b')
+	      || (*b == '\t')
+	      || (*b == '\r')))
+     {
+	unsigned char ch = *b;
+
+	if (Most_UTF8_Mode)
+	  {
+	     unsigned char *prev = b;
+	     int len;
+	     b = SLutf8_skip_char(b, e);
+	     len = b - prev;
+	     if (len > 1)
+	       {
+		  if (multi_column)
+		    {
+		        SLwchar_Type wc;
+			if (SLutf8_decode(prev, e, &wc, NULL))
+			  col += prev_width = SLwchar_wcwidth(wc);
+		    }
+		  else
+		    col++;
+		  continue;
+	       }
+	  }
+	else
+	  b++;
+
+	if (most_isprint(ch))
+	  {
+	     col++;
+	     prev_width = 1;
+	     continue;
+	  }
+
+	if ((ch == '\b') || (ch == '\t') || (ch == '\r'))
+	  switch (ch)
+	    {
+	     case '\b':
+	       if (Most_V_Opt == 0)
+		 {
+		    if (col > 0) col -= prev_width;
+		 }
+	       else col += 2;
+	       break;
+
+	     case '\r':
+	       if (Most_V_Opt == 0)
+		 col = 0;
+	       else
+		 col += 2;
+	       break;
+
+	     case '\t':
+	       if (Most_T_Opt == 0)
+		 col = Most_Tab_Width * (col/Most_Tab_Width + 1);
+	       else
+		 col += 2;
+	       break;
+	    }
+	else if (ch & 0x80)
+	  col += 3;
+	else
+	  col += 2;
+     }
+   return b;
+}
Sólo en most-4.10.2/src: line.c~
diff -ur most-4.10.2-2.debian-orig/src/line.h most-4.10.2/src/line.h
--- most-4.10.2-2.debian-orig/src/line.h	2005-12-05 21:51:17.000000000 -0300
+++ most-4.10.2/src/line.h	2005-12-06 04:20:05.000000000 -0300
@@ -7,6 +7,7 @@
 
 extern void most_display_line(void);
 extern int most_apparant_distance(unsigned char *);
-extern int most_isprint(unsigned char ch);
+extern int most_isprint(unsigned char);
+extern unsigned char *most_forward_columns (unsigned char *, unsigned char *, unsigned int, int);
 #endif
 
Sólo en most-4.10.2/src: line.h~
Sólo en most-4.10.2/src: Makefile
Sólo en most-4.10.2/src: objs
Sólo en most-4.10.2/src: TAGS
diff -ur most-4.10.2-2.debian-orig/src/window.c most-4.10.2/src/window.c
--- most-4.10.2-2.debian-orig/src/window.c	2005-12-05 21:51:17.000000000 -0300
+++ most-4.10.2/src/window.c	2005-12-06 03:01:05.000000000 -0300
@@ -51,6 +51,7 @@
 long long Most_Top_Line;		       /* row number of top window */
 long long Most_Curs_Row;
 long long Most_Curs_Col;
+/* The leftmost visible column. */
 long long Most_Column = 1;
 int Most_Restore_Width_To = 0;
 char Most_Mini_Buf[256];
Sólo en most-4.10.2/src: window.c~

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to