[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

git Sun, 01 Feb 2015 02:21:08 -0800

commit 017ec7655d5fe4df729633dbe1199c98de41e3cf
Author: FRIGN <[email protected]>
Date:   Sun Feb 1 04:06:06 2015 +0100


    Add even stricter UTF-8-support to wc(1)
    
    using readrune() and iswspace().
    musl for instance doesn't differentiate between iswspace() and
    isspace(), but when it does, the code will be ready.
    It goes without saying that GNU coreutils don't use iswspace()[0].
    
    [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c

diff --git a/wc.c b/wc.c
index f283e1b..6af23d4 100644
--- a/wc.c
+++ b/wc.c
@@ -3,7 +3,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <wctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 static int    lflag = 0;
@@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
 void
 wc(FILE *fp, const char *str)
 {
-       int word = 0;
-       int c;
+       int word = 0, read;
+       Rune c;
        size_t nc = 0, nl = 0, nw = 0;
 
-       while ((c = getc(fp)) != EOF) {
-               if (cmode != 'm' || UTF8_POINT(c))
-                       nc++;
+       while ((read = readrune(str, fp, &c))) {
+               nc += (cmode == 'c') ? read :
+                     (c != Runeerror) ? 1 : 0;
                if (c == '\n')
                        nl++;
-               if (!isspace(c))
+               if (!iswspace(c))
                        word = 1;
                else if (word) {
                        word = 0;

[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

Reply via email to