commit cd9f55f5e1c8035840ecf906665b418db47a56c1
Author: Richard Ipsum <[email protected]>
AuthorDate: Fri Oct 9 15:52:10 2020 +0100
Commit: Michael Forney <[email protected]>
CommitDate: Wed Oct 14 15:03:20 2020 -0700
fold: fix handling of multibyte characters
diff --git a/fold.c b/fold.c
index 0d68b37..c7f6ae8 100644
--- a/fold.c
+++ b/fold.c
@@ -7,19 +7,22 @@
#include "text.h"
#include "util.h"
+#include "utf.h"
static int bflag = 0;
static int sflag = 0;
static size_t width = 80;
static void
-foldline(struct line *l) {
+foldline(struct line *l, const char *fname) {
size_t i, col, last, spacesect, len;
+ Rune r;
+ int runelen;
- for (i = 0, last = 0, col = 0, spacesect = 0; i < l->len; i++) {
- if (!UTF8_POINT(l->data[i]) && !bflag)
- continue;
+ for (i = 0, last = 0, col = 0, spacesect = 0; i < l->len; i += runelen)
{
if (col >= width && ((l->data[i] != '\r' && l->data[i] != '\b')
|| bflag)) {
+ if (bflag && col > width)
+ i -= runelen; /* never split a character */
len = ((sflag && spacesect) ? spacesect : i) - last;
if (fwrite(l->data + last, 1, len, stdout) != len)
eprintf("fwrite <stdout>:");
@@ -29,8 +32,11 @@ foldline(struct line *l) {
col = 0;
spacesect = 0;
}
- if (sflag && isspace(l->data[i]))
- spacesect = i + 1;
+ runelen = charntorune(&r, l->data + i, l->len - i);
+ if (!runelen || r == Runeerror)
+ eprintf("charntorune: %s: invalid utf\n", fname);
+ if (sflag && isspacerune(r))
+ spacesect = i + runelen;
if (!bflag && iscntrl(l->data[i])) {
switch(l->data[i]) {
case '\b':
@@ -46,7 +52,7 @@ foldline(struct line *l) {
break;
}
} else {
- col++;
+ col += bflag ? runelen : 1;
}
}
if (l->len - last)
@@ -62,7 +68,7 @@ fold(FILE *fp, const char *fname)
while ((len = getline(&line.data, &size, fp)) > 0) {
line.len = len;
- foldline(&line);
+ foldline(&line, fname);
}
if (ferror(fp))
eprintf("getline %s:", fname);