Here's the diff for UTF-8 support in column(1).

OK?

Index: column.c
===================================================================
RCS file: /cvs/src/usr.bin/column/column.c,v
retrieving revision 1.24
diff -u -p -r1.24 column.c
--- column.c    31 Aug 2016 20:43:57 -0000      1.24
+++ column.c    3 Sep 2016 16:41:55 -0000
@@ -36,10 +36,12 @@
 #include <ctype.h>
 #include <err.h>
 #include <limits.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>
 
 void  c_columnate(void);
 void *ereallocarray(void *, size_t, size_t);
@@ -60,7 +62,7 @@ int entries;                  /* number of records */
 int eval;                      /* exit value */
 int *maxwidths;                        /* longest record per column */
 struct field **table;          /* one array of pointers per line */
-char *separator = "\t ";       /* field separator for table option */
+wchar_t *separator = L"\t ";   /* field separator for table option */
 
 int
 main(int argc, char *argv[])
@@ -71,6 +73,8 @@ main(int argc, char *argv[])
        char *p;
        const char *errstr;
 
+       setlocale(LC_CTYPE, "");
+
        termwidth = 0;
        if ((p = getenv("COLUMNS")) != NULL)
                termwidth = strtonum(p, 1, INT_MAX, NULL);
@@ -92,7 +96,12 @@ main(int argc, char *argv[])
                                errx(1, "%s: %s", errstr, optarg);
                        break;
                case 's':
-                       separator = optarg;
+                       if ((separator = reallocarray(NULL, strlen(optarg) + 1,
+                           sizeof(*separator))) == NULL)
+                               err(1, NULL);
+                       if (mbstowcs(separator, optarg, strlen(optarg) + 1) ==
+                           (size_t) -1)
+                               err(1, "sep");
                        break;
                case 't':
                        tflag = 1;
@@ -106,7 +115,7 @@ main(int argc, char *argv[])
        }
 
        if (!tflag)
-               separator = "";
+               separator = L"";
        argv += optind;
 
        if (*argv == NULL) {
@@ -226,6 +235,8 @@ input(FILE *fp)
        size_t blen;
        ssize_t llen;
        char *p, *s, *buf = NULL;
+       wchar_t wc;
+       int wlen;
 
        while ((llen = getline(&buf, &blen, fp)) > -1) {
                if (buf[llen - 1] == '\n')
@@ -244,8 +255,9 @@ input(FILE *fp)
 
                        /* Skip leading, multiple, and trailing separators. */
 
-                       while (*p != '\0' && strchr(separator, *p) != NULL)
-                               p++;
+                       while ((wlen = mbtowc(&wc, p, MB_CUR_MAX)) != -1 &&
+                           wcschr(separator, wc) != NULL)
+                               p += wlen;
                        if (*p == '\0')
                                break;
 
@@ -256,11 +268,22 @@ input(FILE *fp)
 
                        s = p;
                        width = 0;
-                       while (*p != '\0' && strchr(separator, *p) == NULL) {
-                               if (*p++ == '\t')
-                                       INCR_NEXTTAB(width);
-                               else
+                       for (;;) {
+                               if (*p == '\0')
+                                       break;
+
+                               if ((wlen = mbtowc(&wc, p, MB_CUR_MAX)) == -1) {
                                        width++;
+                                       p++;
+                                       continue;
+                               }
+                               if (wcschr(separator, wc) != NULL)
+                                       break;
+                               if (*p == '\t')
+                                       INCR_NEXTTAB(width);
+                               else 
+                                       width += wcwidth(wc);
+                               p += wlen;
                        }
 
                        if (col + 1 >= maxcols) {

Reply via email to