commit cdbc0d50356a0f7e0dd5755e3c46593a947cf029
Author: FRIGN <[email protected]>
Date:   Thu Jan 29 20:15:50 2015 +0100

    Add UTF8-support to paste(1) and refactor parallel()
    
    Hopefully it now is clearer what the heck is going on.

diff --git a/README b/README
index 2530204..82b524b 100644
--- a/README
+++ b/README
@@ -49,7 +49,7 @@ The following tools are implemented ('*' == finished, '#' == 
UTF-8 support,
 =* nice            yes                             none
 =  nl              no                              -d, -f, -h, -l, -n, -p, -v, 
-w
 =* nohup           yes                             none
-   paste           yes                             none
+#  paste           yes                             none
 =  printenv        non-posix                       none
    printf          stolen                          stolen
 =* pwd             yes                             none
diff --git a/paste.c b/paste.c
index 382ad79..5529520 100644
--- a/paste.c
+++ b/paste.c
@@ -4,38 +4,131 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <wchar.h>
 
+#include "utf.h"
 #include "util.h"
 
-typedef struct {
+struct fdescr {
        FILE *fp;
        const char *name;
-} Fdescr;
+};
 
-static size_t unescape(wchar_t *);
-static wint_t in(Fdescr *);
-static void out(wchar_t);
-static void sequential(Fdescr *, int, const wchar_t *, size_t);
-static void parallel(Fdescr *, int, const wchar_t *, size_t);
+static size_t
+resolveescapes(char *s)
+{
+        size_t len, i, off, m;
+
+       len = strlen(s);
+
+        for (i = 0; i < len; i++) {
+                if (s[i] != '\\')
+                        continue;
+                off = 0;
+
+                switch (s[i + 1]) {
+                case '\\': s[i] = '\\'; off++; break;
+                case 'a':  s[i] = '\a'; off++; break;
+                case 'b':  s[i] = '\b'; off++; break;
+                case 'f':  s[i] = '\f'; off++; break;
+                case 'n':  s[i] = '\n'; off++; break;
+                case 'r':  s[i] = '\r'; off++; break;
+                case 't':  s[i] = '\t'; off++; break;
+                case 'v':  s[i] = '\v'; off++; break;
+                case '\0':
+                        eprintf("paste: null escape sequence in delimiter\n");
+                default:
+                        eprintf("paste: invalid escape sequence '\\%c' in "
+                                "delimiter\n", s[i + 1]);
+                }
+
+                for (m = i + 1; m <= len - off; m++)
+                        s[m] = s[m + off];
+                len -= off;
+        }
+
+        return len;
+}
+
+static void
+sequential(struct fdescr *dsc, int fdescrlen, Rune *delim, size_t delimlen)
+{
+       Rune c, last;
+       size_t i, d;
+
+       for (i = 0; i < fdescrlen; i++) {
+               d = 0;
+               last = 0;
+
+               while (readrune(dsc[i].name, dsc[i].fp, &c)) {
+                       if (last == '\n') {
+                               if (delim[d] != '\0')
+                                       writerune("<stdout>", stdout, 
&delim[d]);
+                               d = (d + 1) % delimlen;
+                       }
+
+                       if (c != '\n')
+                               writerune("<stdout>", stdout, &c);
+                       last = c;
+               }
+
+               if (last == '\n')
+                       writerune("<stdout>", stdout, &last);
+       }
+}
+
+static void
+parallel(struct fdescr *dsc, int fdescrlen, Rune *delim, size_t delimlen)
+{
+       Rune c, d;
+       size_t i, m;
+       ssize_t last;
+
+nextline:
+       last = -1;
+
+       for (i = 0; i < fdescrlen; i++) {
+               d = delim[i % delimlen];
+               c = 0;
+
+               for (; readrune(dsc[i].name, dsc[i].fp, &c) ;) {
+                       for (m = last + 1; m < i; m++)
+                               writerune("<stdout>", stdout, &(delim[m % 
delimlen]));
+                       last = i;
+                       if (c == '\n') {
+                               if (i != fdescrlen - 1)
+                                       c = d;
+                               writerune("<stdout>", stdout, &c);
+                               break;
+                       }
+                       writerune("<stdout>", stdout, &c);
+               }
+
+               if (c == 0 && last != -1) {
+                       if (i == fdescrlen - 1)
+                               putchar('\n');
+                       else
+                               writerune("<stdout>", stdout, &d);
+                               last++;
+               }
+       }
+       if (last != -1)
+               goto nextline;
+}
 
 static void
 usage(void)
 {
-       eprintf("usage: %s [-s] [-d list] file...\n", argv0);
+       eprintf("usage: %s [-s] [-d list] file ...\n", argv0);
 }
 
 int
 main(int argc, char *argv[])
 {
-       const char *adelim = NULL;
-       int seq = 0;
-       wchar_t *delim = NULL;
-       size_t len;
-       Fdescr *dsc = NULL;
-       int i;
-
-       setlocale(LC_CTYPE, "");
+       struct fdescr *dsc;
+       Rune  *delim;
+       size_t i, len;
+       int    seq = 0;
+       char  *adelim = "\t";
 
        ARGBEGIN {
        case 's':
@@ -51,20 +144,9 @@ main(int argc, char *argv[])
        if (argc == 0)
                usage();
 
-       /* populate delimeters */
-       if (!adelim)
-               adelim = "\t";
-
-       len = mbstowcs(NULL, adelim, 0);
-       if (len == (size_t) - 1)
-               eprintf("invalid delimiter\n");
-
-       delim = emalloc((len + 1) * sizeof(*delim));
-
-       mbstowcs(delim, adelim, len);
-       len = unescape(delim);
-       if (len == 0)
-               eprintf("no delimiters specified\n");
+       /* populate delimiters */
+       resolveescapes(adelim);
+       len = chartorunearr(adelim, &delim);
 
        /* populate file list */
        dsc = emalloc(argc * sizeof(*dsc));
@@ -76,7 +158,7 @@ main(int argc, char *argv[])
                        dsc[i].fp = fopen(argv[i], "r");
 
                if (!dsc[i].fp)
-                       eprintf("can't open '%s':", argv[i]);
+                       eprintf("fopen %s:", argv[i]);
 
                dsc[i].name = argv[i];
        }
@@ -91,135 +173,5 @@ main(int argc, char *argv[])
                        (void)fclose(dsc[i].fp);
        }
 
-       free(delim);
-       free(dsc);
-
        return 0;
 }
-
-static size_t
-unescape(wchar_t *delim)
-{
-       wchar_t c;
-       size_t i;
-       size_t len;
-
-       for (i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
-               if (c == '\\') {
-                       switch (delim[i++]) {
-                       case 'n':
-                               delim[len] = '\n';
-                               break;
-                       case 't':
-                               delim[len] = '\t';
-                               break;
-                       case '0':
-                               delim[len] = '\0';
-                               break;
-                       case '\\':
-                               delim[len] = '\\';
-                               break;
-                       case '\0':
-                       default:
-                               /* POSIX: unspecified results */
-                               return len;
-                       }
-               } else
-                       delim[len] = c;
-       }
-
-       return len;
-}
-
-static wint_t
-in(Fdescr *f)
-{
-       wint_t c = fgetwc(f->fp);
-
-       if (c == WEOF && ferror(f->fp))
-               eprintf("'%s' read error:", f->name);
-
-       return c;
-}
-
-static void
-out(wchar_t c)
-{
-       putwchar(c);
-       if (ferror(stdout))
-               eprintf("write error:");
-}
-
-static void
-sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt)
-{
-       int i;
-       size_t d;
-       wint_t c, last;
-
-       for (i = 0; i < len; i++) {
-               d = 0;
-               last = WEOF;
-
-               while ((c = in(&dsc[i])) != WEOF) {
-                       if (last == '\n') {
-                               if (delim[d] != '\0')
-                                       out(delim[d]);
-
-                               d++;
-                               d %= cnt;
-                       }
-
-                       if (c != '\n')
-                               out((wchar_t)c);
-
-                       last = c;
-               }
-
-               if (last == '\n')
-                       out((wchar_t)last);
-       }
-}
-
-static void
-parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt)
-{
-       int last, i;
-       wint_t c, o;
-       wchar_t d;
-
-       do {
-               last = 0;
-               for (i = 0; i < len; i++) {
-                       d = delim[i % cnt];
-
-                       do {
-                               o = in(&dsc[i]);
-                               c = o;
-                               switch (c) {
-                               case WEOF:
-                                       if (last == 0)
-                                               break;
-
-                                       o = '\n';
-                                       /* fallthrough */
-                               case '\n':
-                                       if (i != len - 1)
-                                               o = d;
-                                       break;
-                               default:
-                                       break;
-                               }
-
-                               if (o != WEOF) {
-                                       /* pad with delimiters up to this point 
*/
-                                       while (++last < i) {
-                                               if (d != '\0')
-                                                       out(d);
-                                       }
-                                       out((wchar_t)o);
-                               }
-                       } while (c != '\n' && c != WEOF);
-               }
-       } while (last > 0);
-}

Reply via email to