[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

git Tue, 24 Mar 2015 15:56:21 -0700

commit 986a9de51a77e7f6803e1b2259ec0675762077db
Author: FRIGN <[email protected]>
Date:   Sun Feb 1 04:06:06 2015 +0100


    Add even stricter UTF-8-support to wc(1)
    
    using readrune() and iswspace().
    musl for instance doesn't differentiate between iswspace() and
    isspace(), but when it does, the code will be ready.
    It goes without saying that GNU coreutils don't use iswspace()[0].
    
    [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c

diff --git a/wc.c b/wc.c
index f283e1b..6af23d4 100644
--- a/wc.c
+++ b/wc.c
@@ -3,7 +3,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <wctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 static int    lflag = 0;
@@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
 void
 wc(FILE *fp, const char *str)
 {
-       int word = 0;
-       int c;
+       int word = 0, read;
+       Rune c;
        size_t nc = 0, nl = 0, nw = 0;
 
-       while ((c = getc(fp)) != EOF) {
-               if (cmode != 'm' || UTF8_POINT(c))
-                       nc++;
+       while ((read = readrune(str, fp, &c))) {
+               nc += (cmode == 'c') ? read :
+                     (c != Runeerror) ? 1 : 0;
                if (c == '\n')
                        nl++;
-               if (!isspace(c))
+               if (!iswspace(c))
                        word = 1;
                else if (word) {
                        word = 0;

[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

Reply via email to