one problem with this fix is that it assumes valid utf-8 input.
you're better off using fullrune.


more simple and robust solution
that follows forsyth's suggestion


/* read until utf boundary */
int
readu(int fd, char *buf, int n)
{
        static char b[3];
        static int nb;
        int m;
        char *s, *e;
        if(nb)
                memcpy(buf, b, nb);
        m = read(fd, buf + nb, n - nb);

        /*
        01.   x in [00000000.0bbbbbbb] → 0bbbbbbb
        10.   x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb
        */

        e = buf + m + nb;
        for(s = e - 2; s < e; s++){
                if((*s & 0xc0) == 0x80)
                        continue;
                if((*s & 0xc0) == 0xc0)
                        break;
        }

        /* we have e - s bytes in s     */
        nb = e - s;
        memcpy(b, s, nb);
        return s - buf;
}

Kenji Arisawa

Reply via email to