[fossil-users] bug : comment_print_legacy function destroys utf8 characters when wrapping text.

2017-06-16 Thread er38hcma
$ fossil diff --from trunk src/comformat.c


@@ -188,8 +188,11 @@
   maxChars = lineChars;
   for(;;){
 int useChars = 1;
 char c = zLine[index];
+    if( maxChars==1 && (c&0xC0)==0xC0 && lineChars!=1) {
+  break;
+    }
 if( c==0 ){
   break;
 }else{
   if( origBreak && index>0 ){
@@ -227,9 +230,20 @@
   charCnt++;
 }
 assert( c!='\n' || charCnt==0 );
 fossil_print("%c", c);
-    if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
+    maxChars -= useChars;
+    // utf8 character bytes
+    if( ((unsigned char) c >> 7) != 0 ) { // if( (c&0x80)!=0 ) { // 0x80 
0b1000
+  while( ((unsigned char) zLine[index] >> 6) == 0b10 ){ // while( 
(zLine[index]&0xC0)==0x80 ){
+    c = zLine[index];
+    fossil_print("%c", c);
+    index++;
+  }
+  maxChars--; // 2 width , Todo: 2 or 1 width
+  if ((zLine[index]&0x80)!=0 && maxChars<=1)
+    break;
+    }
 if( maxChars<=0 ) break;
 if( c=='\n' ) break;
   }
   if( charCnt>0 ){
@@ -260,8 +274,9 @@
   int indent,    /* Number of spaces to indent each non-initial line. */
   int width  /* Maximum number of characters per line. */
 ){
   int maxChars = width - indent;
+  int nText;
   int si, sk, i, k;
   int doIndent = 0;
   char *zBuf;
   char zBuffer[400];
@@ -270,13 +285,14 @@
   if( width<0 ){
 comment_set_maxchars(indent, );
   }
   if( zText==0 ) zText = "(NULL)";
+  nText = strlen(zText);
   if( maxChars<=0 ){
-    maxChars = strlen(zText);
+    maxChars = nText;
   }
-  if( maxChars >= (sizeof(zBuffer)) ){
-    zBuf = fossil_malloc(maxChars+1);
+  if( nText >= (sizeof(zBuffer)) ){
+    zBuf = fossil_malloc(nText+1);
   }else{
 zBuf = zBuffer;
   }
   for(;;){
@@ -285,13 +301,16 @@
   if( doIndent==0 ){
 fossil_print("\n");
 lineCnt = 1;
   }
-  if( zBuf!=zBuffer) fossil_free(zBuf);
+  if( zBuf!=zBuffer ) fossil_free(zBuf);
   return lineCnt;
 }
-    for(sk=si=i=k=0; zText[i] && k 6) == 0b10){
+    i++;
+  }
+  if( i!=0 ){
+    i--;
+    continue;
+  }
+    }
+
+    // utf8 character bytes , bit shift >> 7 : 1(utf8) 0(ascii)
+    if ( ((unsigned char) c >> 7)!=0 ){
+  // todo: check charactor display width : 1 or 2
+  // treat as width 2
+  if (charCnt>=maxChars && ((unsigned char) c >> 6)==0b11 && nText!=1){
+    zBuf[k] =0;
+    i--;
+    break;
+  }
+  while(((unsigned char) zText[i+1] >> 6) == 0b10){
+    // utf8 first byte 0b11, utf8 data byte  0b10
+    i++;
+    k++;
+    zBuf[k] = zText[i];
+  }
+  c = zText[i];
+  charCnt++;
+    }
+
 if( c=='-' && k>0 && fossil_isalpha(zBuf[k-1]) ){
   si = i+1;
   sk = k+1;
 }
@@ -319,8 +369,9 @@
 }
 fossil_print("%s\n", zBuf);
 lineCnt++;
   }
+  if( zBuf!=zBuffer ) fossil_free(zBuf);
 }
 
 /*
 ** This is the comment printing function.  The comment printing algorithm
___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users


Re: [fossil-users] bug : comment_print_legacy function destroys utf8 characters when wrapping text.

2017-06-15 Thread Stephan Beal
On Thu, Jun 15, 2017 at 3:15 AM,  wrote:

> Bug
>
>   comment_print_legacy function destroys utf8 characters when wrapping
> text.
>
> https://www.fossil-scm.org/index.html/artifact?ln=302-
> 307=f570981a7bb58eb3
> src/comformat.c : Line 302-307
>

For those whose mailers consider src/comformat.c to be part of that URL and
mis-link it (gmail):

https://www.fossil-scm.org/index.html/artifact?ln=302-307=f570981a7bb58eb3


-- 
- stephan beal
http://wanderinghorse.net/home/stephan/
"Freedom is sloppy. But since tyranny's the only guaranteed byproduct of
those who insist on a perfect world, freedom will have to do." -- Bigby Wolf
___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users


[fossil-users] bug : comment_print_legacy function destroys utf8 characters when wrapping text.

2017-06-14 Thread er38hcma
Bug

  comment_print_legacy function destroys utf8 characters when wrapping text.

https://www.fossil-scm.org/index.html/artifact?ln=302-307=f570981a7bb58eb3
src/comformat.c : Line 302-307

(unsigned char) c: bit right shift 7: 0 is ascii, 1 is utf8.
(unsigned char) c: bit right shift 6: 0b11 is utf8 first byte, 0b10 is next 
utf8 bytes.

https://en.wikipedia.org/wiki/UTF-8#Description

___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users