[fossil-users] bug : comment_print_legacy function destroys utf8 characters when wrapping text.

2017-06-16 Thread er38hcma
$ fossil diff --from trunk src/comformat.c


@@ -188,8 +188,11 @@
   maxChars = lineChars;
   for(;;){
 int useChars = 1;
 char c = zLine[index];
+    if( maxChars==1 && (c&0xC0)==0xC0 && lineChars!=1) {
+  break;
+    }
 if( c==0 ){
   break;
 }else{
   if( origBreak && index>0 ){
@@ -227,9 +230,20 @@
   charCnt++;
 }
 assert( c!='\n' || charCnt==0 );
 fossil_print("%c", c);
-    if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
+    maxChars -= useChars;
+    // utf8 character bytes
+    if( ((unsigned char) c >> 7) != 0 ) { // if( (c&0x80)!=0 ) { // 0x80 
0b1000
+  while( ((unsigned char) zLine[index] >> 6) == 0b10 ){ // while( 
(zLine[index]&0xC0)==0x80 ){
+    c = zLine[index];
+    fossil_print("%c", c);
+    index++;
+  }
+  maxChars--; // 2 width , Todo: 2 or 1 width
+  if ((zLine[index]&0x80)!=0 && maxChars<=1)
+    break;
+    }
 if( maxChars<=0 ) break;
 if( c=='\n' ) break;
   }
   if( charCnt>0 ){
@@ -260,8 +274,9 @@
   int indent,    /* Number of spaces to indent each non-initial line. */
   int width  /* Maximum number of characters per line. */
 ){
   int maxChars = width - indent;
+  int nText;
   int si, sk, i, k;
   int doIndent = 0;
   char *zBuf;
   char zBuffer[400];
@@ -270,13 +285,14 @@
   if( width<0 ){
 comment_set_maxchars(indent, &maxChars);
   }
   if( zText==0 ) zText = "(NULL)";
+  nText = strlen(zText);
   if( maxChars<=0 ){
-    maxChars = strlen(zText);
+    maxChars = nText;
   }
-  if( maxChars >= (sizeof(zBuffer)) ){
-    zBuf = fossil_malloc(maxChars+1);
+  if( nText >= (sizeof(zBuffer)) ){
+    zBuf = fossil_malloc(nText+1);
   }else{
 zBuf = zBuffer;
   }
   for(;;){
@@ -285,13 +301,16 @@
   if( doIndent==0 ){
 fossil_print("\n");
 lineCnt = 1;
   }
-  if( zBuf!=zBuffer) fossil_free(zBuf);
+  if( zBuf!=zBuffer ) fossil_free(zBuf);
   return lineCnt;
 }
-    for(sk=si=i=k=0; zText[i] && k> 6) == 0b10){
+    i++;
+  }
+  if( i!=0 ){
+    i--;
+    continue;
+  }
+    }
+
+    // utf8 character bytes , bit shift >> 7 : 1(utf8) 0(ascii)
+    if ( ((unsigned char) c >> 7)!=0 ){
+  // todo: check charactor display width : 1 or 2
+  // treat as width 2
+  if (charCnt>=maxChars && ((unsigned char) c >> 6)==0b11 && nText!=1){
+    zBuf[k] =0;
+    i--;
+    break;
+  }
+  while(((unsigned char) zText[i+1] >> 6) == 0b10){
+    // utf8 first byte 0b11, utf8 data byte  0b10
+    i++;
+    k++;
+    zBuf[k] = zText[i];
+  }
+  c = zText[i];
+  charCnt++;
+    }
+
 if( c=='-' && k>0 && fossil_isalpha(zBuf[k-1]) ){
   si = i+1;
   sk = k+1;
 }
@@ -319,8 +369,9 @@
 }
 fossil_print("%s\n", zBuf);
 lineCnt++;
   }
+  if( zBuf!=zBuffer ) fossil_free(zBuf);
 }
 
 /*
 ** This is the comment printing function.  The comment printing algorithm
___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users


Re: [fossil-users] test-comment-format utf8

2017-06-16 Thread Andy Bradford
Thus said Stephan Beal on Thu, 15 Jun 2017 09:55:11 +0200:

>  Sidebar: i had no idea bash can do ranges like that! 

I usually use jot:

$ echo $(jot -w '%c' 26 a)
a b c d e f g h i j k l m n o p q r s t u v w x y z

$ echo $(jot -w '%c' 26 a | sort -r)
z y x w v u t s r q p o n m l k j i h g f e d c b a

$ echo $(jot -w '%c' 26 A)  
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

Andy
-- 
TAI64 timestamp: 400059449a86


___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users


Re: [fossil-users] test-comment-format utf8

2017-06-16 Thread Warren Young
On Jun 15, 2017, at 1:55 AM, Stephan Beal  wrote:
> 
> i had no idea bash can do ranges like that!

[snip]

> That would have saved me lots of typing in the past :/.

When on a system without Bash or when writing a script that must be portable to 
such systems, there’s seq(1) if you only want numbers.  Unlike Bash sequences, 
seq(1) can do floating-point sequences, but Bash can do letter sequences which 
seq(1) cannot.
___
fossil-users mailing list
fossil-users@lists.fossil-scm.org
http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users