Author: engelsman
Date: 2010-04-18 07:33:33 -0700 (Sun, 18 Apr 2010)
New Revision: 7527
Log:
Fl_Text_Buffer/Display fixes for UTF-8 / STR-2158 (part 1)

fixes to handle incorrect counting of UTF-8 characters by checking
for complete UTF-8 encodings in char* rather than char by char.



Modified:
   branches/branch-1.3/FL/Fl_Text_Buffer.H
   branches/branch-1.3/FL/Fl_Text_Display.H
   branches/branch-1.3/src/Fl_Text_Buffer.cxx
   branches/branch-1.3/src/Fl_Text_Display.cxx

Modified: branches/branch-1.3/FL/Fl_Text_Buffer.H
===================================================================
--- branches/branch-1.3/FL/Fl_Text_Buffer.H     2010-04-18 09:52:49 UTC (rev 
7526)
+++ branches/branch-1.3/FL/Fl_Text_Buffer.H     2010-04-18 14:33:33 UTC (rev 
7527)
@@ -248,6 +248,18 @@
    */
   unsigned int character(int pos) const;
   
+  /**
+   Convert a byte offset in buffer into a memory address.
+   */
+  const char *address(int pos) const
+  { return (pos < mGapStart) ? mBuf+pos : mBuf+pos+mGapEnd-mGapStart; }
+
+  /**
+   Convert a byte offset in buffer into a memory address.
+   */
+  char *address(int pos)
+  { return (pos < mGapStart) ? mBuf+pos : mBuf+pos+mGapEnd-mGapStart; }
+  
   /** 
    Returns the text from the given rectangle. When you are done
    with the text, free it using the free() function.
@@ -681,6 +693,7 @@
    \return number of byte in substitution
    */
   static int character_width(const char *src, int indent, int tabDist);
+  static int character_width(const char    c, int indent, int tabDist);
 
   /**
    Count the number of displayed characters between buffer position
@@ -934,18 +947,6 @@
    */
   void update_selections(int pos, int nDeleted, int nInserted);
   
-  /**
-   Convert a byte offset in buffer into a memory address.
-   */
-  const char *address(int pos) const
-  { return (pos < mGapStart) ? mBuf+pos : mBuf+pos+mGapEnd-mGapStart; }
-
-  /**
-   Convert a byte offset in buffer into a memory address.
-   */
-  char *address(int pos)
-  { return (pos < mGapStart) ? mBuf+pos : mBuf+pos+mGapEnd-mGapStart; }
-  
   Fl_Text_Selection mPrimary;     /**< highlighted areas */
   Fl_Text_Selection mSecondary;   /**< highlighted areas */
   Fl_Text_Selection mHighlight;   /**< highlighted areas */

Modified: branches/branch-1.3/FL/Fl_Text_Display.H
===================================================================
--- branches/branch-1.3/FL/Fl_Text_Display.H    2010-04-18 09:52:49 UTC (rev 
7526)
+++ branches/branch-1.3/FL/Fl_Text_Display.H    2010-04-18 14:33:33 UTC (rev 
7527)
@@ -251,7 +251,7 @@
                                bool countLastLineMissingNewLine = true) const;
     void find_line_end(int pos, bool start_pos_is_line_start, int *lineEnd,
                          int *nextLineStart) const;
-    int measure_proportional_character(char c, int colNum, int pos) const;
+    int measure_proportional_character(const char *s, int colNum, int pos) 
const;
     int wrap_uses_character(int lineEndPos) const;
     int range_touches_selection(const Fl_Text_Selection *sel, int rangeStart,
                                  int rangeEnd) const;

Modified: branches/branch-1.3/src/Fl_Text_Buffer.cxx
===================================================================
--- branches/branch-1.3/src/Fl_Text_Buffer.cxx  2010-04-18 09:52:49 UTC (rev 
7526)
+++ branches/branch-1.3/src/Fl_Text_Buffer.cxx  2010-04-18 14:33:33 UTC (rev 
7527)
@@ -975,6 +975,8 @@
 
 // static function and counterpart to "character_width"
 // - unicode ok
+// FIXME: harmonise with new character_width(char*...) version
+//
 int Fl_Text_Buffer::expand_character(const char *src, int indent, char 
*outStr, int tabDist)
 {
   char c = *src;
@@ -1018,8 +1020,27 @@
 // - unicode ok
 int Fl_Text_Buffer::character_width(const char *src, int indent, int tabDist)
 {
+  char c = *src;
+  if ((c & 0x80) && (c & 0x40)) {       // first byte of UTF-8 sequence
+    int len = fl_utf8len(c);
+    int ret = 0;
+    unsigned int ucs = fl_utf8decode(src, src+len, &ret);
+    int width = 1; //   mk_wcwidth((wchar_t)ucs); // FIXME
+    // fprintf(stderr, "mk_wcwidth(%x) -> %d (%d, %d, %s)\n", ucs, width, len, 
ret, s);
+    return width;
+  }
+  if ((c & 0x80) && !(c & 0x40)) {      // other byte of UTF-8 sequence
+    return 0;
+  }
+  return character_width(c, indent, tabDist);
+}
+
+// FIXME: merge the following with the char* version above.
+// but the question then is: how to reorganise expand_character()?
+//
+int Fl_Text_Buffer::character_width(const char    c, int indent, int tabDist)
+{
   /* Note, this code must parallel that in Fl_Text_Buffer::ExpandCharacter */
-  char c = *src;
   if (c == '\t') {
     return tabDist - (indent % tabDist);
   } else if (((unsigned char) c) <= 31) {
@@ -1032,7 +1053,8 @@
 #endif
     return 1;
   } else if (c & 0x80) {
-    return fl_utf8len(c);
+    // return fl_utf8len(c);
+    return 1;
   }
   return 1;
 }
@@ -1529,8 +1551,7 @@
   const char *linePtr;
   
   for (linePtr = line; *linePtr != '\0'; linePtr++) {
-    len =
-    Fl_Text_Buffer::character_width(linePtr, indent, tabDist);
+    len = Fl_Text_Buffer::character_width(linePtr, indent, tabDist);
     if (indent + len > column)
       break;
     indent += len;
@@ -1573,8 +1594,7 @@
                                   &len);
     for (const char *c = retabbedStr; *c != '\0'; c++) {
       *outPtr++ = *c;
-      len =
-      Fl_Text_Buffer::character_width(c, indent, tabDist);
+      len = Fl_Text_Buffer::character_width(c, indent, tabDist);
       indent += len;
     }
     free((void *) retabbedStr);
@@ -1624,8 +1644,7 @@
   for (c = line; *c != '\0'; c++) {
     if (indent > rectStart)
       break;
-    len =
-    Fl_Text_Buffer::character_width(c, indent, tabDist);
+    len = Fl_Text_Buffer::character_width(c, indent, tabDist);
     if (indent + len > rectStart && (indent == rectStart || *c == '\t'))
       break;
     indent += len;
@@ -1635,8 +1654,7 @@
   
   /* skip the characters between rectStart and rectEnd */
   for (; *c != '\0' && indent < rectEnd; c++)
-    indent +=
-    Fl_Text_Buffer::character_width(c, indent, tabDist);
+    indent += Fl_Text_Buffer::character_width(c, indent, tabDist);
   int postRectIndent = indent;
   
   /* If the line ended before rectEnd, there's nothing more to do */
@@ -1682,8 +1700,7 @@
   const char *linePtr = line;
   
   for (; *linePtr != '\0'; linePtr++) {
-    len =
-    Fl_Text_Buffer::character_width(linePtr, inIndent, tabDist);
+    len = Fl_Text_Buffer::character_width(linePtr, inIndent, tabDist);
     if (inIndent + len > rectStart)
       break;
     inIndent += len;
@@ -1709,8 +1726,7 @@
   /* skip the characters between rectStart and rectEnd */
   int postRectIndent = rectEnd;
   for (; *linePtr != '\0'; linePtr++) {
-    inIndent +=
-    Fl_Text_Buffer::character_width(linePtr, inIndent, tabDist);
+    inIndent += Fl_Text_Buffer::character_width(linePtr, inIndent, tabDist);
     if (inIndent >= rectEnd) {
       linePtr++;
       postRectIndent = inIndent;
@@ -1738,8 +1754,7 @@
     realignTabs(insLine, 0, rectStart, tabDist, useTabs, &len);
     for (const char *c = retabbedStr; *c != '\0'; c++) {
       *outPtr++ = *c;
-      len =
-      Fl_Text_Buffer::character_width(c, outIndent, tabDist);
+      len = Fl_Text_Buffer::character_width(c, outIndent, tabDist);
       outIndent += len;
     }
     free((void *) retabbedStr);
@@ -1895,7 +1910,7 @@
   if (useTabs) {
     while (indent < toIndent) {
       static char t = '\t';
-      len = Fl_Text_Buffer::character_width(&t, indent, tabDist);
+      len = Fl_Text_Buffer::character_width("\t", indent, tabDist);
       if (len > 1 && indent + len <= toIndent) {
        *outPtr++ = '\t';
        indent += len;
@@ -2272,6 +2287,7 @@
       indent = startIndent;
       outLen++;
     } else {
+      // FIXME: character_width does not return number of bytes for UTF-8!
       indent +=
       Fl_Text_Buffer::character_width(c, indent, tabDist);
       outLen++;
@@ -2292,6 +2308,7 @@
       indent = startIndent;
       *outPtr++ = *c;
     } else {
+      // FIXME: character_width does not return number of bytes for UTF-8!
       indent +=
       Fl_Text_Buffer::character_width(c, indent, tabDist);
       *outPtr++ = *c;

Modified: branches/branch-1.3/src/Fl_Text_Display.cxx
===================================================================
--- branches/branch-1.3/src/Fl_Text_Display.cxx 2010-04-18 09:52:49 UTC (rev 
7526)
+++ branches/branch-1.3/src/Fl_Text_Display.cxx 2010-04-18 14:33:33 UTC (rev 
7527)
@@ -644,7 +644,8 @@
     ch = buf->character( p );
     if ( ch == '\n' )
       break;
-    indent += Fl_Text_Buffer::character_width( &ch, indent, 
buf->tab_distance() ); // FIXME: not unicode
+    const char *s = buf->address(p);
+    indent += Fl_Text_Buffer::character_width(s, indent, buf->tab_distance() 
); // FIXME: not unicode
     if ( indent == endIndent ) {
       p++;
       break;
@@ -1635,7 +1636,6 @@
     clear_rect( style, X, Y, toX - X, mMaxsize );
     return;
   }
-
   /* Set font, color, and gc depending on style.  For normal text, GCs
      for normal drawing, or drawing within a Fl_Text_Selection or highlight are
      pre-allocated and pre-configured.  For syntax highlighting, GCs are
@@ -2717,9 +2717,10 @@
            colNum = 0;
            width = 0;
        } else {
-          colNum += Fl_Text_Buffer::character_width((char*)&c, colNum, 
tabDist); // FIXME: unicode
+          const char *s = buf->address(p);
+          colNum += Fl_Text_Buffer::character_width(s, colNum, tabDist); // 
FIXME: unicode
            if (countPixels)
-               width += measure_proportional_character(c, colNum, 
p+styleBufOffset);
+               width += measure_proportional_character(s, colNum, 
p+styleBufOffset);
        }
 
        /* If character exceeded wrap margin, find the break point
@@ -2737,7 +2738,7 @@
                        for (i=b+1; i<p+1; i++) {
                            width += measure_proportional_character(
                                                                     // FIXME: 
character is ucs-4
-                                   buf->character(i), colNum, 
+                                   buf->address(i), colNum, 
                                    i+styleBufOffset);
                            colNum++;
                        }
@@ -2749,9 +2750,10 @@
            }
            if (!foundBreak) { /* no whitespace, just break at margin */
                newLineStart = max(p, lineStart+1);
-              colNum = Fl_Text_Buffer::character_width((char*)&c, colNum, 
tabDist); // FIXME: unicode
+              const char *s = buf->address(b);
+              colNum = Fl_Text_Buffer::character_width(s, colNum, tabDist); // 
FIXME: unicode
                if (countPixels)
-                   width = measure_proportional_character(c, colNum, 
p+styleBufOffset);
+                   width = measure_proportional_character(s, colNum, 
p+styleBufOffset);
            }
            if (p >= maxPos) {
                *retPos = maxPos;
@@ -2783,9 +2785,9 @@
 }
 
 /**
-   Measure the width in pixels of a character "c" at a particular column
-   "colNum" and buffer position "pos".  This is for measuring characters in
-   proportional or mixed-width highlighting fonts.
+   Measure the width in pixels of the first character of string "s" at a
+   particular column "colNum" and buffer position "pos".  This is for measuring
+   characters in proportional or mixed-width highlighting fonts.
 **
    A note about proportional and mixed-width fonts: the mixed width and
    proportional font code in nedit does not get much use in general editing,
@@ -2796,12 +2798,13 @@
    insertion/deletion, though static display and wrapping and resizing
    should now be solid because they are now used for online help display.
 */
-int Fl_Text_Display::measure_proportional_character(char c, int colNum, int 
pos) const {
+
+int Fl_Text_Display::measure_proportional_character(const char *s, int colNum, 
int pos) const {
     int charLen, style;
     char expChar[ FL_TEXT_MAX_EXP_CHAR_LEN ];
     Fl_Text_Buffer *styleBuf = mStyleBuffer;
     
-  charLen = Fl_Text_Buffer::expand_character(&c, colNum, expChar, 
buffer()->tab_distance()); // FIXME: unicode
+  charLen = Fl_Text_Buffer::expand_character(s, colNum, expChar, 
buffer()->tab_distance()); // FIXME: unicode
     if (styleBuf == 0) {
        style = 0;
     } else {

_______________________________________________
fltk-commit mailing list
[email protected]
http://lists.easysw.com/mailman/listinfo/fltk-commit

Reply via email to