one problem with this fix is that it assumes valid utf-8 input.
you're better off using fullrune.
more simple and robust solution
that follows forsyth's suggestion
/* read until utf boundary */
int
readu(int fd, char *buf, int n)
{
static char b[3];
static int nb;
int m;
char *s, *e;
if(nb)
memcpy(buf, b, nb);
m = read(fd, buf + nb, n - nb);
/*
01. x in [00000000.0bbbbbbb] → 0bbbbbbb
10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb,
10bbbbbb
*/
e = buf + m + nb;
for(s = e - 2; s < e; s++){
if((*s & 0xc0) == 0x80)
continue;
if((*s & 0xc0) == 0xc0)
break;
}
/* we have e - s bytes in s */
nb = e - s;
memcpy(b, s, nb);
return s - buf;
}
Kenji Arisawa