---
 .gitignore |   1 +
 Makefile   |   1 +
 README     |   1 +
 shuf.1     |  43 +++++++++++++++++++++
 shuf.c     | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 156 insertions(+)
 create mode 100644 shuf.1
 create mode 100644 shuf.c

diff --git a/.gitignore b/.gitignore
index 001b055..e789e24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /sha512sum
 /sha512-224sum
 /sha512-256sum
+/shuf
 /sleep
 /sort
 /split
diff --git a/Makefile b/Makefile
index 7bd9626..2807e6b 100644
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,7 @@ BIN =\
        sha512sum\
        sha512-224sum\
        sha512-256sum\
+       shuf\
        sleep\
        sort\
        split\
diff --git a/README b/README
index 5555f8b..9b287d9 100644
--- a/README
+++ b/README
@@ -112,6 +112,7 @@ The following tools are implemented:
 0=*|x sha512sum       .
 0=* x sha512-224sum   .
 0=* x sha512-256sum   .
+0=* x shuf            (-o, -z, -e, -i)
 0=*|o sleep           .
 0#*|o sort            .
 0=*|o split           .
diff --git a/shuf.1 b/shuf.1
new file mode 100644
index 0000000..223da4e
--- /dev/null
+++ b/shuf.1
@@ -0,0 +1,43 @@
+.Dd 2024-02-25
+.Dt SHUF 1
+.Os
+.Sh NAME
+.Nm shuf
+.Nd randomly permute input lines
+.Sh SYNOPSIS
+.Nm
+.Op Fl n Ar count
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+is a utility that outputs a random permutation of its input lines.
+By default,
+.Nm
+reads from stdin and outputs to stdout.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl r
+Do not permute.
+Instead, choose lines at random, with replacement.
+By default, it repeats indefinitely.
+.El
+.Sh NOTES
+This
+.Nm
+misses a few options.
+.Pp
+.Fl n
+can be reproduced using
+.Xr head 1 .
+.Pp
+.Fl i
+can be reproduced using
+.Xr seq 1 .
+.Pp
+.Fl e
+can be reproduced using
+.Xr printf 1 .
+.Sh SEE ALSO
+.Xr sort 1 ,
+.Xr seq 1 ,
+.Xr head 1
diff --git a/shuf.c b/shuf.c
new file mode 100644
index 0000000..309e469
--- /dev/null
+++ b/shuf.c
@@ -0,0 +1,110 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "text.h"
+#include "util.h"
+
+/*
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound).  This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ *
+ * Copied off OpenBSD (original is arc4random_uniform)
+ */
+static uint32_t
+random_uniform(uint32_t upper_bound)
+{
+       uint32_t r, min;
+
+       if (upper_bound < 2)
+               return 0;
+
+       /* 2**32 % x == (2**32 - x) % x */
+       min = -upper_bound % upper_bound;
+
+       /*
+        * This could theoretically loop forever but each retry has
+        * p > 0.5 (worst case, usually far better) of selecting a
+        * number inside the range we need, so it should rarely need
+        * to re-roll.
+        */
+       for (;;) {
+               r = random(); /* arc4random() is better, but we don't have it */
+               if (r >= min)
+                       break;
+       }
+
+       return r % upper_bound;
+}
+
+static void
+usage(void)
+{
+       eprintf("usage: %s [-r] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+       FILE *fp;
+       int i, j, rflag = 0, ret = 0;
+       struct linebuf buf = EMPTY_LINEBUF;
+       struct line line;
+
+       ARGBEGIN {
+       case 'r':
+               rflag = 1;
+               break;
+       default:
+               usage();
+       } ARGEND
+
+       if (!argc) {
+               getlines(stdin, &buf);
+       } else {
+               for (; *argv; argc--, argv++) {
+                       if (!strcmp(*argv, "-")) {
+                               *argv = "<stdin>";
+                               fp = stdin;
+                       } else if (!(fp = fopen(*argv, "r"))) {
+                               weprintf("fopen %s:", *argv);
+                               ret = 1;
+                               continue;
+                       }
+                       getlines(fp, &buf);
+                       if (fp != stdin && fshut(fp, *argv))
+                               ret = 1;
+               }
+       }
+
+       srandom((intptr_t)buf.lines | time(NULL));
+
+       if (!rflag) {
+               for (i = buf.nlines - 1; i > 0; i--) {
+                       j = random_uniform(i+1);
+                       line = buf.lines[j];
+                       buf.lines[j] = buf.lines[i];
+                       buf.lines[i] = line;
+               }
+
+               for (i = 0; i < buf.nlines; i++) {
+                       line = buf.lines[i];
+                       fwrite(line.data, 1, line.len, stdout);
+               }
+       } else {
+               for (;;) {
+                       j = random_uniform(buf.nlines);
+                       line = buf.lines[j];
+                       fwrite(line.data, 1, line.len, stdout);
+               }
+       }
+
+       ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
+
+       return ret;
+}
-- 
2.43.0


Reply via email to