one problem with this fix is that it assumes valid utf-8 input.
you're better off using fullrune.
-rob
On 8/31/05, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:
> The bellow is a first-aid bug fix
>
> we define read function for utf-8
>
> /* read until utf boundary */
> int
> readu(int fd, char *buf, int n)
> {
> static char b[3];
> static int nb;
> int m;
> char *s, *e;
> if(nb)
> memcpy(buf, b, nb);
> m = read(fd, buf + nb, n - nb);
>
> /*
> 01. x in [00000000.0bbbbbbb] → 0bbbbbbb
> 10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
> 11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb,10bbbbbb
> */
>
> e = buf + m + nb;
> for(s = buf; s < e; s++){
> if((*s & 0x80) == 0)
> continue;
> if((*s & 0xe0) == 0xd0){
> s++;
> continue;
> }
> /* then *s is 111bbbbb */
> if(s+2 >= e)
> break;
> s += 2;
> continue;
> }
> /* we have e - s bytes in s */
> nb = e - s;
> memcpy(b, s, nb);
> return s - buf;
> }
>
> and replace 'read' by 'readu' in utf.c
>
> utf_in(int fd, long *notused, struct convert *out)
> {
>
> ...
> while((n = readu(fd, buf+tot, N-tot)) >= 0){
> ...
> }
>
> Kenji Arisawa
>
>