A user noted that the following command was slower than they expected: busybox shuf -i "1500000000-$(date +%s)" -n 5
At time of writing the range contains 115 million values. On my system this takes 6.9s whereas 'shuf' from coreutils takes a handful of milliseconds. Optimise BusyBox 'shuf' for cases where -n is specified by stopping shuffling once the required number of lines have been processed. On my system the time for the example is reduced to 0.4s. function old new delta shuf_main 521 560 +39 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/0 up/down: 39/0) Total: 39 bytes Signed-off-by: Ron Yorston <[email protected]> --- coreutils/shuf.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/coreutils/shuf.c b/coreutils/shuf.c index fdbd3e9b2..839d1b80f 100644 --- a/coreutils/shuf.c +++ b/coreutils/shuf.c @@ -39,8 +39,10 @@ /* * Use the Fisher-Yates shuffle algorithm on an array of lines. + * If the required number of output lines is less than the total + * we can stop shuffling early. */ -static void shuffle_lines(char **lines, unsigned numlines) +static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines) { unsigned i; unsigned r; @@ -48,7 +50,7 @@ static void shuffle_lines(char **lines, unsigned numlines) srand(monotonic_us()); - for (i = numlines-1; i > 0; i--) { + for (i = numlines-1; i > 0 && outlines > 0; i--, outlines--) { r = rand(); /* RAND_MAX can be as small as 32767 */ if (i > RAND_MAX) @@ -67,7 +69,7 @@ int shuf_main(int argc, char **argv) char *opt_i_str, *opt_n_str, *opt_o_str; unsigned i; char **lines; - unsigned numlines; + unsigned numlines, outlines; char eol; opts = getopt32(argv, "^" @@ -128,24 +130,24 @@ int shuf_main(int argc, char **argv) fclose_if_not_stdin(fp); } - if (numlines != 0) - shuffle_lines(lines, numlines); + outlines = numlines; + if (opts & OPT_n) { + outlines = xatou(opt_n_str); + if (outlines > numlines) + outlines = numlines; + } + + if (numlines != 0 && outlines != 0) + shuffle_lines(lines, numlines, outlines); if (opts & OPT_o) xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); - if (opts & OPT_n) { - unsigned maxlines; - maxlines = xatou(opt_n_str); - if (numlines > maxlines) - numlines = maxlines; - } - eol = '\n'; if (opts & OPT_z) eol = '\0'; - for (i = 0; i < numlines; i++) { + for (i = numlines-outlines; i < numlines; i++) { if (opts & OPT_i) printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); else -- 2.29.2 _______________________________________________ busybox mailing list [email protected] http://lists.busybox.net/mailman/listinfo/busybox
