In rev(1), we call MB_CUR_MAX for every byte in the input stream.
This is extremely expensive.

It is much cheaper to call it once per line and use a simpler loop
(see the inlined patch below) if the current locale doesn't handle
multibyte characters:

# -current rev(1),
$ for i in $(jot 10); do nanotime rev /usr/share/dict/words > /dev/null; done
        0.136695757 real         0.130 user         0.010 sys
        0.075677725 real         0.060 user         0.000 sys
        0.075275764 real         0.070 user         0.000 sys
        0.075140009 real         0.070 user         0.000 sys
        0.075186630 real         0.070 user         0.010 sys
        0.075256959 real         0.080 user         0.000 sys
        0.076920069 real         0.080 user         0.000 sys
        0.075097523 real         0.060 user         0.010 sys
        0.075369093 real         0.070 user         0.000 sys
        0.075532266 real         0.070 user         0.000 sys

# patched rev(1)
$ for i in $(jot 10); do nanotime obj/rev /usr/share/dict/words > /dev/null; 
done
        0.068547813 real         0.060 user         0.010 sys
        0.022880303 real         0.020 user         0.000 sys
        0.022530839 real         0.020 user         0.000 sys
        0.022801439 real         0.020 user         0.000 sys
        0.022595941 real         0.020 user         0.000 sys
        0.022768434 real         0.020 user         0.000 sys
        0.022536526 real         0.020 user         0.000 sys
        0.022611791 real         0.020 user         0.000 sys
        0.022943240 real         0.020 user         0.000 sys
        0.022329260 real         0.020 user         0.000 sys

Over 3 times as fast as -current rev(1).

Multibyte locales also benefit:

# patched rev(1), LC_CTYPE=en_US.UTF-8
for i in $(jot 10); do LC_CTYPE=en_US.UTF-8 nanotime obj/rev 
/usr/share/dict/words >/dev/null; done
        0.088514093 real         0.070 user         0.010 sys
        0.026025780 real         0.020 user         0.000 sys
        0.025384554 real         0.020 user         0.000 sys
        0.025385471 real         0.020 user         0.000 sys
        0.025474371 real         0.020 user         0.010 sys
        0.025685188 real         0.030 user         0.000 sys
        0.025666618 real         0.030 user         0.000 sys
        0.025783925 real         0.020 user         0.000 sys
        0.025348339 real         0.020 user         0.000 sys
        0.025672734 real         0.020 user         0.010 sys

About 3 times as fast as -current rev(1).

CC schwarze@ to double-check I'm not misunderstanding MB_CUR_MAX.  I'm
under the impression the return value cannot change unless we call
setlocale(3).

ok?

Index: rev.c
===================================================================
RCS file: /cvs/src/usr.bin/rev/rev.c,v
retrieving revision 1.13
diff -u -p -r1.13 rev.c
--- rev.c       10 Apr 2016 17:06:52 -0000      1.13
+++ rev.c       7 Jan 2022 15:38:39 -0000
@@ -82,13 +82,18 @@ main(int argc, char *argv[])
                while ((len = getline(&p, &ps, fp)) != -1) {
                        if (p[len - 1] == '\n')
                                --len;
-                       for (t = p + len - 1; t >= p; --t) {
-                               if (isu8cont(*t))
-                                       continue;
-                               u = t;
-                               do {
-                                       putchar(*u);
-                               } while (isu8cont(*(++u)));
+                       if (MB_CUR_MAX == 1) {
+                               for (t = p + len - 1; t >= p; t--)
+                                       putchar(*t);
+                       } else {
+                               for (t = p + len - 1; t >= p; --t) {
+                                       if (isu8cont(*t))
+                                               continue;
+                                       u = t;
+                                       do {
+                                               putchar(*u);
+                                       } while (isu8cont(*(++u)));
+                               }
                        }
                        putchar('\n');
                }
@@ -104,7 +109,7 @@ main(int argc, char *argv[])
 int
 isu8cont(unsigned char c)
 {
-       return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80;
+       return (c & (0x80 | 0x40)) == 0x80;
 }
 
 void

Reply via email to