one problem with this fix is that it assumes valid utf-8 input.
you're better off using fullrune.

-rob

On 8/31/05, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:
> The bellow is a first-aid bug fix
> 
> we define read function for utf-8
> 
> /* read until utf boundary */
> int
> readu(int fd, char *buf, int n)
> {
>      static char b[3];
>      static int nb;
>      int m;
>      char *s, *e;
>      if(nb)
>          memcpy(buf, b, nb);
>      m = read(fd, buf + nb, n - nb);
> 
>      /*
>      01.   x in [00000000.0bbbbbbb] → 0bbbbbbb
>      10.   x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
>      11.   x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb,10bbbbbb
>      */
> 
>      e = buf + m + nb;
>      for(s = buf; s < e; s++){
>          if((*s & 0x80) == 0)
>              continue;
>          if((*s & 0xe0) == 0xd0){
>              s++;
>              continue;
>          }
>          /* then *s is 111bbbbb */
>          if(s+2 >= e)
>              break;
>          s += 2;
>          continue;
>      }
>      /* we have e - s bytes in s    */
>      nb = e - s;
>      memcpy(b, s, nb);
>      return s - buf;
> }
> 
> and replace 'read' by 'readu' in utf.c
> 
> utf_in(int fd, long *notused, struct convert *out)
> {
> 
>      ...
>      while((n = readu(fd, buf+tot, N-tot)) >= 0){
>          ...
> }
> 
> Kenji Arisawa
> 
>

Reply via email to