cp1252.txt and can hang

Duncan Gibson Fri, 19 Nov 2010 14:14:48 -0800

DO NOT REPLY TO THIS MESSAGE.  INSTEAD, POST ANY RESPONSES TO THE LINK BELOW.


[STR New]

Link: http://www.fltk.org/str.php?L2348
Version: 1.3-current





Link: http://www.fltk.org/str.php?L2348
Version: 1.3-current

Index: src/Fl_Text_Buffer.cxx
===================================================================
--- src/Fl_Text_Buffer.cxx      (revision 7860)
+++ src/Fl_Text_Buffer.cxx      (working copy)
@@ -1024,7 +1024,7 @@
           *foundPos = startPos;
           return 1;
         }
-        int l = fl_utf8len(c);
+        int l = fl_utf8len(c); // TODO: replace?
         if (memcmp(sp, address(bp), l))
           break;
         sp += l; bp += l;
@@ -1076,7 +1076,7 @@
           *foundPos = startPos;
           return 1;
         }
-        int l = fl_utf8len(c);
+        int l = fl_utf8len(c); // TODO: replace?
         if (memcmp(sp, address(bp), l))
           break;
         sp += l; bp += l;
@@ -1560,6 +1560,29 @@
 
 
 /*
+ * temporary functions for debug purposes
+ */
+void printByte(const char byte) {
+  if (' ' <= byte && byte <= '~')
+    printf("   %c", byte);
+  else
+    printf("0x%02hhx", byte);
+}
+void printBytes(int pos, const char byte) {
+  printf("pos = %d  byte  = [", pos);
+  printByte(byte);
+  printf("]\n");
+}
+void printBytes(int pos, const char* bytes, int n) {
+  printf("pos = %d  bytes = [", pos);
+  for (int i = 0; i < n && bytes[i] != '\0'; i++) {
+    printByte(bytes[i]);
+    printf(",");
+  }
+  printf("]\n");
+}
+
+/*
  Return the previous character position.
  Uncode safe.
  */
@@ -1570,6 +1593,15 @@
 
   IS_UTF8_ALIGNED2(this, (pos))  
 
+  char bytes[10]; memset(bytes, '\0', 10);  // longest UTF-8 sequence is 6 
bytes
+  int b = pos;
+  for (int i = 0; i < 10 && b > 0; i++, b--)
+    bytes[10-1-i] = byte_at(pos-i);
+  const char* p = &bytes[10-1];
+  const char* q = fl_utf8back(p-1, &bytes[b], p);
+  int d = p - q;
+  pos = pos - d;
+/*
   char c;
   do {
     pos--;
@@ -1577,7 +1609,7 @@
       return 0;
     c = byte_at(pos);
   } while ( (c&0xc0) == 0x80);
-  
+*/
   IS_UTF8_ALIGNED2(this, (pos))  
   return pos;
 }
@@ -1593,13 +1625,23 @@
   return prev_char_clipped(pos);
 }
 
-
 /*
  Return the next character position.
  Returns length() if the end of the buffer is reached.
  */
 int Fl_Text_Buffer::next_char(int pos) const
 {
+  char bytes[10]; memset(bytes, '\0', 10);  // longest UTF-8 sequence is 6 
bytes
+  for (int i = 0; i < 10 && pos+i < mLength; i++)
+    bytes[i+0] = byte_at(pos+i);
+  const char *p = &bytes[0];
+  const char *q = fl_utf8fwd(p+1, p, p+6);
+  int d = q - p;
+  pos += d;
+  if (pos >= mLength)
+    return mLength;
+  return pos;
+  /*
   IS_UTF8_ALIGNED2(this, (pos))  
   int n = fl_utf8len(byte_at(pos));
   pos += n;
@@ -1607,6 +1649,7 @@
     return mLength;
   IS_UTF8_ALIGNED2(this, (pos))  
   return pos;
+  */
 }
 
 
@@ -1624,14 +1667,53 @@
  */
 int Fl_Text_Buffer::utf8_align(int pos) const 
 {
+  char bytes[10]; memset(bytes, '\0', 10);  // longest UTF-8 sequence is 6 
bytes
+  int b = pos;
+  for (int i = 0; i < 10 && b > 0; i++, b--)
+    bytes[10-1-i] = byte_at(pos-i);
+  const char* p = &bytes[10-1];
+  const char* q = fl_utf8back(p, &bytes[b], p);
+  int d = p - q;
+  return (d == 0) ? pos : pos-d;
+/*  
   char c = byte_at(pos);
   while ( (c&0xc0) == 0x80) {
     pos--;
     c = byte_at(pos);
   }
   return pos;
+*/
 }
 
+/*
+ * temporary routines for use in IS_UTF8_ALIGN macros
+ */
+int isUtf8Aligned(const char* s) {
+  if (s && *s && fl_utf8len(*s)<=0) {
+    const char* p = fl_utf8fwd(s, s, s+6);
+    if (p == s)
+      return 1;
+    return 0;
+  }
+  return 1;
+}
+int isUtf8Aligned(const Fl_Text_Buffer* tb, int pos) {
+  if (pos < 0 || pos >= tb->length())
+    return 1;
+    
+  char bytes[10]; memset(bytes, '\0', 10);  // longest UTF-8 sequence is 6 
bytes
+  int len = min(10-1, tb->length() - pos);
+  for (int i = 0; i < len; i++)
+    bytes[i] = tb->byte_at(pos+i);
+  const char* p = &bytes[0];
+  const char* q = fl_utf8fwd(p, p, p+6);
+  if (p == q)
+    return 1;
+  if (pos>=0 && pos<tb->length() && fl_utf8len(tb->byte_at(pos))<=0)
+    return 0;
+  return 1;
+}
+
 //
 // End of "$Id$".
 //
Index: src/Fl_Text_Display.cxx
===================================================================
--- src/Fl_Text_Display.cxx     (revision 7860)
+++ src/Fl_Text_Display.cxx     (working copy)
@@ -84,7 +84,35 @@
 // CET - FIXME
 #define TMPFONTWIDTH 6
 
+/* fl_utf8len(char b) replacement that handles CP1252 C1 control chars, etc.
+ * temporary name to aid finding and replacing the originals
+ * temporary location to minimize file changes
+ */
+int fl_drg8len(const char* s)
+{
+  int n = 0;
+  unsigned ucs = fl_utf8decode(s, 0, &n);
+  ucs = ucs + 1;       // keep g++ quiet
+  return n;
+}
 
+/* even more temporary version with added debug
+ */
+int fl_drg8len(const char* s, const char* f)
+{
+  int m = fl_utf8len(*s);
+  int n = fl_drg8len(s);
+  if (m != n) {
+    // printf("%s: utf8len=%d  drg8len=%d  ", f, m, n);
+    // for (int i = 0; i < max(m,n); i++) {
+    //   printByte(s[i]);
+    //   printf(",");
+    // }
+    // printf("\n");
+  }
+  return n;
+}
+  
 
 /**  
  \brief Creates a new text display widget.
@@ -757,10 +785,20 @@
   /* determine how many displayed character positions are covered */
   startIndent = mBuffer->count_displayed_characters( lineStart, startPos );
   indent = startIndent;
+  
   for ( c = text; *c != '\0'; c += fl_utf8len(*c) )
     indent++;
   endIndent = indent;
   
+  // the following trumps the block above
+  int drgIndent = startIndent;
+  for ( c = text; *c != '\0'; c += fl_drg8len(c, "overstrike") )
+    drgIndent++;
+  // if (drgIndent != indent)
+  //   printf("drgIndent = %d  indent = %d\n", drgIndent, indent);
+  indent = drgIndent;
+  endIndent = indent;
+  
   /* find which characters to remove, and if necessary generate additional
    padding to make up for removed control characters at the end */
   indent = startIndent;
@@ -1742,6 +1780,9 @@
   style = position_style(lineStartPos, lineLen, 0);
   for (i=0; i<lineLen; ) {
     int len = fl_utf8len(lineStr[i]);
+    int drgLen = fl_drg8len(&lineStr[i], "handle_vline");
+    len = drgLen;
+    
     charStyle = position_style(lineStartPos, lineLen, i);
     // FIXME: if the character is a tab, we need to do the correct indenting
     // FIXME: if the character is an optional hyphen, we need to ignore it 
unless we wrap the text
@@ -1809,6 +1850,8 @@
   int i = 0;
   while (i<len) {
     int cl = fl_utf8len(s[i]);
+    int drgLen = fl_drg8len(&s[i], "find_x");
+    cl = drgLen;
     int w = int( string_width(s, i+cl, style) );
     if (w>x) 
       return i;
@@ -1867,6 +1910,23 @@
 }
 
 
+/*
+ * expand CP1252(?) src string to full UTF-8 representation in dst
+ * temporary name and location
+ */
+int drgExpand(const char* src, int srcBytes, char* dst) {
+  int len = 0, dstBytes = 0;
+  unsigned ucs;
+  char buffer[10];     // max utf-8 sequence is 6 bytes
+  char* p = (char*)src;
+  for (int i = 0; i < srcBytes; i += len, p += len) {  // TODO: assert(len != 
1) ?
+    ucs = fl_utf8decode(p, &src[srcBytes], &len);
+    int utf8len = fl_utf8encode(ucs, buffer);
+    for (int j = 0; j < utf8len; j++)
+      dst[dstBytes++] = buffer[j];
+  }
+  return dstBytes;
+}
 
 /**
  \brief Draw a text segment in a single style.
@@ -1944,7 +2004,13 @@
   if (!(style & BG_ONLY_MASK)) {
     fl_color( foreground );
     fl_font( font, fsize );
-    fl_draw( string, nChars, X, Y + mMaxsize - fl_descent());
+    // fl_draw( string, nChars, X, Y + mMaxsize - fl_descent());
+    
+    // need to expand to full UTF-8 or fl_draw() won't work as expected
+    char* expanded = (char*)malloc(nChars*6*sizeof(char));
+    int numBytes = drgExpand(string, nChars, expanded);
+    fl_draw(expanded, numBytes, X, Y + mMaxsize - fl_descent());
+    free((void*)expanded);
   }
   
   // CET - FIXME
@@ -2158,7 +2224,13 @@
   }
   fl_font( font, fsize );
   
-  return fl_width( string, length );
+  // need to expand to full UTF-8 or fl_width() won't work as expected
+  char* expanded = (char*)malloc(length*6*sizeof(char));
+  int numBytes = drgExpand(string, length, expanded);
+  double width = fl_width(expanded, numBytes);
+  free((void*)expanded);
+  
+  return width;
 }
 
 
@@ -3175,6 +3247,8 @@
   IS_UTF8_ALIGNED(s)
   
   int charLen = fl_utf8len(*s), style = 0;
+  int drgLen = fl_drg8len(s, "measure_prop_char");
+  charLen = drgLen;
   if (mStyleBuffer) {
     style = mStyleBuffer->byte_at(pos);
   }
@@ -3253,6 +3327,7 @@
     return 1;
   
   c = buffer()->char_at(lineEndPos);
+  // TODO: if c is ucs, then fl_utf8len(c) is just plain wrong! no?
   return c == '\n' || ((c == '\t' || c == ' ') &&
                        lineEndPos + fl_utf8len(c) < buffer()->length());
 }
Index: FL/Fl_Text_Buffer.H
===================================================================
--- FL/Fl_Text_Buffer.H (revision 7860)
+++ FL/Fl_Text_Buffer.H (working copy)
@@ -38,8 +38,18 @@
 
 #ifdef ASSERT_UTF8
 # include <assert.h>
-# define IS_UTF8_ALIGNED(a) if (a && *a) assert(fl_utf8len(*(a))>0);
-# define IS_UTF8_ALIGNED2(a, b) if (b>=0 && b<a->length()) 
assert(fl_utf8len(a->byte_at(b))>0);
+// define IS_UTF8_ALIGNED(a) if (a && *a) assert(fl_utf8len(*(a))>0);
+// define IS_UTF8_ALIGNED2(a, b) if (b>=0 && b<a->length()) 
assert(fl_utf8len(a->byte_at(b))>0);
+  // replace body of macros with functions to allow extra debugging.
+  // temporary location for forward declaration and externs.
+  class Fl_Text_Buffer;
+  extern void printByte(const char byte);
+  extern void printBytes(int pos, const char byte);
+  extern void printBytes(int pos, const char* bytes, int n);
+  extern int isUtf8Aligned(const char* s);
+  extern int isUtf8Aligned(const Fl_Text_Buffer* tb, int pos);
+# define IS_UTF8_ALIGNED(a) assert(isUtf8Aligned(a));
+# define IS_UTF8_ALIGNED2(a, b) assert(isUtf8Aligned(a,b));
 #else
 # define IS_UTF8_ALIGNED(a)
 # define IS_UTF8_ALIGNED2(a, b)

_______________________________________________
fltk-bugs mailing list
[email protected]
http://lists.easysw.com/mailman/listinfo/fltk-bugs

Re: [fltk.bugs] [HIGH] STR #2348: test/editor fails to display misc/cp1252.txt and can hang

Reply via email to