Author: fabien
Date: 2008-11-04 17:55:17 -0800 (Tue, 04 Nov 2008)
New Revision: 6497
Log:
Added fast utf8 string detection routine fl_is_valid_utf8(). Solves the drawing 
problem in #2080 related fl_expand_text() use. The particularity of this 
routine is that the complete scan is cached so that it executes 0 to 1 time in 
the fl_expand_text() loop. Now chinese example in str 2080 seems to works 
perfectly here. Still, incorrect windows title in Win32 need to be adressed.

Modified:
   branches/branch-1.3/src/fl_draw.cxx

Modified: branches/branch-1.3/src/fl_draw.cxx
===================================================================
--- branches/branch-1.3/src/fl_draw.cxx 2008-11-04 08:16:50 UTC (rev 6496)
+++ branches/branch-1.3/src/fl_draw.cxx 2008-11-05 01:55:17 UTC (rev 6497)
@@ -55,6 +55,56 @@
 // Sets n to the number of characters put into the buffer.
 // Sets width to the width of the string in the current font.
 
+#define C_IN(c,a,b) ((c)>=(a) && (c)<=(b)) 
+#define C_UTF8(c)   C_IN(c,0x80,0xBF)
+
+/** fast utf8 string detection routine. \reval 0 if not utf8, 1 otherwise */
+int fl_is_valid_utf8(int& init_scan, int& scan_ret, const char* s) {
+    if (init_scan) return scan_ret; // scan only once the string
+    init_scan=1;
+    if ( !s || !(*s) ) return 0;
+
+    register const unsigned char* p=(const unsigned char*)s;
+    while (*p) {
+       if ( p[0]==0x09 || p[0]==0x0d || p[0]==0x0a || (p[0]>0x1f && p[0]<0x80) 
) {
+           p++;
+           continue; // Ascii 
+       }
+       if ( C_IN(p[0], 0xc2, 0xdf) && C_UTF8(p[1]) ) {
+           p+=2;
+           continue; // non-overlong 2-byte
+       }
+       if ( p[0]==0xe0 && C_IN(p[1], 0xa0, 0xbf) && C_UTF8(p[2]) ) {
+           p+=3;
+           continue; //  excluding overlongs
+       }
+       if (p[0]==0xed && C_IN(p[1], 0x80, 0x9f) && C_UTF8(p[2]) ) {
+           p+=3;
+           continue; //  excluding surrogates
+       }
+       if (p[0]!=0xed && C_IN(p[0], 0xe1, 0xef) && C_UTF8(p[1]) && 
C_UTF8(p[2]) ) {
+           p+=3;
+           continue; // straight 3-byte
+       }
+       if (p[0]==0xf0 && C_IN(p[1], 0x90, 0xbf)   && C_UTF8(p[2]) && 
C_UTF8(p[3]) ) {
+           p+=4;
+           continue; // planes 1-3
+       }
+       if (C_IN(p[0], 0xf1, 0xf3) && C_UTF8(p[1]) && C_UTF8(p[2]) && 
C_UTF8(p[3]) ) {
+           p+=4;
+           continue; // planes 4-15
+       }
+       if (p[0]==0xf4 && C_IN(p[1], 0x80, 0x8f)   && C_UTF8(p[2]) && 
C_UTF8(p[3]) ) {
+           p+=4;
+           continue; // planes 16
+       }
+       scan_ret=0;
+       return scan_ret; // not utf8
+    }
+    scan_ret=1;
+    return scan_ret;
+}
+
 const char*
 fl_expand_text(const char* from, char* buf, int maxbuf, double maxw, int& n, 
        double &width, int wrap, int draw_symbols) {
@@ -65,6 +115,8 @@
   const char* word_start = from;
   double w = 0;
 
+  int init_scan=0, scan_ret;
+  
   const char* p = from;
   for (;; p++) {
 
@@ -99,9 +151,9 @@
       *o++ = '^';
       *o++ = c ^ 0x40;
 #ifdef __APPLE__
-    } else if (c == 0xCA) { // non-breaking space in MacRoman
+    } else if (c == 0xCA && !fl_is_valid_utf8(init_scan, scan_ret,from) ) { // 
non-breaking space in MacRoman
 #else
-    } else if (c == 0xA0) { // non-breaking space in ISO 8859
+    } else if (c == 0xA0 && !fl_is_valid_utf8(init_scan, scan_ret,from) ) { // 
non-breaking space in ISO 8859
 #endif
       *o++ = ' ';
     } else if (c == '@' && draw_symbols) { // Symbol???

_______________________________________________
fltk-commit mailing list
[email protected]
http://lists.easysw.com/mailman/listinfo/fltk-commit

Reply via email to