The bellow is a first-aid bug fix

we define read function for utf-8

/* read until utf boundary */
int
readu(int fd, char *buf, int n)
{
    static char b[3];
    static int nb;
    int m;
    char *s, *e;
    if(nb)
        memcpy(buf, b, nb);
    m = read(fd, buf + nb, n - nb);

    /*
    01.   x in [00000000.0bbbbbbb] → 0bbbbbbb
    10.   x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
    11.   x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb,10bbbbbb
    */

    e = buf + m + nb;
    for(s = buf; s < e; s++){
        if((*s & 0x80) == 0)
            continue;
        if((*s & 0xe0) == 0xd0){
            s++;
            continue;
        }
        /* then *s is 111bbbbb */
        if(s+2 >= e)
            break;
        s += 2;
        continue;
    }
    /* we have e - s bytes in s    */
    nb = e - s;
    memcpy(b, s, nb);
    return s - buf;
}

and replace 'read' by 'readu' in utf.c

utf_in(int fd, long *notused, struct convert *out)
{

    ...
    while((n = readu(fd, buf+tot, N-tot)) >= 0){
        ...
}

Kenji Arisawa

Reply via email to