Here's the diff for UTF-8 support in column(1).
OK?
Index: column.c
===================================================================
RCS file: /cvs/src/usr.bin/column/column.c,v
retrieving revision 1.24
diff -u -p -r1.24 column.c
--- column.c 31 Aug 2016 20:43:57 -0000 1.24
+++ column.c 3 Sep 2016 16:41:55 -0000
@@ -36,10 +36,12 @@
#include <ctype.h>
#include <err.h>
#include <limits.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
void c_columnate(void);
void *ereallocarray(void *, size_t, size_t);
@@ -60,7 +62,7 @@ int entries; /* number of records */
int eval; /* exit value */
int *maxwidths; /* longest record per column */
struct field **table; /* one array of pointers per line */
-char *separator = "\t "; /* field separator for table option */
+wchar_t *separator = L"\t "; /* field separator for table option */
int
main(int argc, char *argv[])
@@ -71,6 +73,8 @@ main(int argc, char *argv[])
char *p;
const char *errstr;
+ setlocale(LC_CTYPE, "");
+
termwidth = 0;
if ((p = getenv("COLUMNS")) != NULL)
termwidth = strtonum(p, 1, INT_MAX, NULL);
@@ -92,7 +96,12 @@ main(int argc, char *argv[])
errx(1, "%s: %s", errstr, optarg);
break;
case 's':
- separator = optarg;
+ if ((separator = reallocarray(NULL, strlen(optarg) + 1,
+ sizeof(*separator))) == NULL)
+ err(1, NULL);
+ if (mbstowcs(separator, optarg, strlen(optarg) + 1) ==
+ (size_t) -1)
+ err(1, "sep");
break;
case 't':
tflag = 1;
@@ -106,7 +115,7 @@ main(int argc, char *argv[])
}
if (!tflag)
- separator = "";
+ separator = L"";
argv += optind;
if (*argv == NULL) {
@@ -226,6 +235,8 @@ input(FILE *fp)
size_t blen;
ssize_t llen;
char *p, *s, *buf = NULL;
+ wchar_t wc;
+ int wlen;
while ((llen = getline(&buf, &blen, fp)) > -1) {
if (buf[llen - 1] == '\n')
@@ -244,8 +255,9 @@ input(FILE *fp)
/* Skip leading, multiple, and trailing separators. */
- while (*p != '\0' && strchr(separator, *p) != NULL)
- p++;
+ while ((wlen = mbtowc(&wc, p, MB_CUR_MAX)) != -1 &&
+ wcschr(separator, wc) != NULL)
+ p += wlen;
if (*p == '\0')
break;
@@ -256,11 +268,22 @@ input(FILE *fp)
s = p;
width = 0;
- while (*p != '\0' && strchr(separator, *p) == NULL) {
- if (*p++ == '\t')
- INCR_NEXTTAB(width);
- else
+ for (;;) {
+ if (*p == '\0')
+ break;
+
+ if ((wlen = mbtowc(&wc, p, MB_CUR_MAX)) == -1) {
width++;
+ p++;
+ continue;
+ }
+ if (wcschr(separator, wc) != NULL)
+ break;
+ if (*p == '\t')
+ INCR_NEXTTAB(width);
+ else
+ width += wcwidth(wc);
+ p += wlen;
}
if (col + 1 >= maxcols) {