Hello, inspired by the attempt to make `sort' multi-threaded, I added threads support to md5sum and the sha* programs family. It has effect only when multiple files are specified.
Any comment? Cheers, Giuseppe >From 1e4ed081f41ac0955542d3a0f1ad143047b8ac25 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano <gscriv...@gnu.org> Date: Sun, 18 Oct 2009 00:19:25 +0200 Subject: [PATCH] md5: accepts a new --threads option * NEWS: Mention it. * bootstrap.conf: Use the `nproc' and `pthread' modules from gnulib. * doc/coreutils.texi: Document the new feature. * src/Makefile.am (md5sum, sha1sum, sha224, sha256, sha384, sha512): Link to the pthread library. * src/md5sum.c (main): Add --threads and move some code into new functions. (long_options, usage): Add --threads. (do_file): New function. (thread_start): New function. (check_files): New function. * tests/misc/md5sum: Test the new --threads option. * tests/misc/sha1sum: Ditto. * tests/misc/sha224sum: Ditto. * tests/misc/sha256sum: Ditto. * tests/misc/sha384sum: Ditto. * tests/misc/sha512sum: Ditto. --- NEWS | 3 + bootstrap.conf | 2 + doc/coreutils.texi | 8 ++ src/Makefile.am | 12 ++-- src/md5sum.c | 234 +++++++++++++++++++++++++++++++++++++------------- tests/misc/md5sum | 6 ++ tests/misc/sha1sum | 6 ++ tests/misc/sha224sum | 6 ++ tests/misc/sha256sum | 6 ++ tests/misc/sha384sum | 6 ++ tests/misc/sha512sum | 6 ++ 11 files changed, 230 insertions(+), 65 deletions(-) diff --git a/NEWS b/NEWS index f8269fc..70af0b3 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ GNU coreutils NEWS -*- outline -*- md5sum --check now also accepts openssl-style checksums. So do sha1sum, sha224sum, sha384sum and sha512sum. + md5sum, sha1sum, sha224sum, sha384sum and sha512sum accept a new option + --threads to improve parallelism when multiple files are specified. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/bootstrap.conf b/bootstrap.conf index e9b198c..fb3304d 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -155,6 +155,7 @@ gnulib_modules=" mktime modechange mountlist + nproc mpsort obstack pathmax @@ -166,6 +167,7 @@ gnulib_modules=" priv-set progname propername + pthread putenv quote quotearg diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5026e76..b81cb81 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3496,6 +3496,14 @@ distinguish between binary and text files. On other systems, it is the default for reading standard input when standard input is a terminal. +...@itemx --threa...@var{n} +...@opindex --threads +...@cindex verifying MD5 checksums +Use up to @var{n} threads when multiple files are specified. If a +value is not specified then the number of processors is used. The +number of threads used is limited by the number of specified files +thus in any case are not created more threads than files. + @item -w @itemx --warn @opindex -w diff --git a/src/Makefile.am b/src/Makefile.am index 915ea81..33d2563 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -220,7 +220,7 @@ link_LDADD = $(LDADD) ln_LDADD = $(LDADD) logname_LDADD = $(LDADD) ls_LDADD = $(LDADD) -md5sum_LDADD = $(LDADD) +md5sum_LDADD = $(LDADD) $(LIB_PTHREAD) mkdir_LDADD = $(LDADD) mkfifo_LDADD = $(LDADD) mknod_LDADD = $(LDADD) @@ -244,11 +244,11 @@ rmdir_LDADD = $(LDADD) runcon_LDADD = $(LDADD) seq_LDADD = $(LDADD) setuidgid_LDADD = $(LDADD) -sha1sum_LDADD = $(LDADD) -sha224sum_LDADD = $(LDADD) -sha256sum_LDADD = $(LDADD) -sha384sum_LDADD = $(LDADD) -sha512sum_LDADD = $(LDADD) +sha1sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha224sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha256sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha384sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha512sum_LDADD = $(LDADD) $(LIB_PTHREAD) shred_LDADD = $(LDADD) shuf_LDADD = $(LDADD) sleep_LDADD = $(LDADD) diff --git a/src/md5sum.c b/src/md5sum.c index aa2a144..161f1a6 100644 --- a/src/md5sum.c +++ b/src/md5sum.c @@ -20,8 +20,11 @@ #include <getopt.h> #include <sys/types.h> +#include <pthread.h> #include "system.h" +#include "nproc.h" +#include "xstrtol.h" #if HASH_ALGO_MD5 # include "md5.h" @@ -126,7 +129,8 @@ static bool quiet = false; enum { STATUS_OPTION = CHAR_MAX + 1, - QUIET_OPTION + QUIET_OPTION, + THREADS_OPTION }; static struct option const long_options[] = @@ -136,12 +140,28 @@ static struct option const long_options[] = { "quiet", no_argument, NULL, QUIET_OPTION }, { "status", no_argument, NULL, STATUS_OPTION }, { "text", no_argument, NULL, 't' }, + { "threads", optional_argument, NULL, THREADS_OPTION}, { "warn", no_argument, NULL, 'w' }, { GETOPT_HELP_OPTION_DECL }, { GETOPT_VERSION_OPTION_DECL }, { NULL, 0, NULL, 0 } }; + +struct thread_arg +{ + char **files; + int n_files; + unsigned char **bin_buffer; + bool *res; + int *file_is_binary; + bool do_check; + bool *busy; + + /* Protect BUSY. */ + pthread_mutex_t mutex; +}; + void usage (int status) { @@ -179,6 +199,8 @@ With no FILE, or when FILE is -, read standard input.\n\ -t, --text read in text mode (default)\n\ "), stdout); fputs (_("\ + --threads=N use up to N threads\n"), stdout); + fputs (_("\ \n\ The following three options are useful only when verifying checksums:\n\ --quiet don't print OK for each successfully verified file\n\ @@ -599,16 +621,154 @@ digest_check (const char *checkfile_name) && n_open_or_read_failures == 0); } +static void +do_file (struct thread_arg *ts, int j) +{ + char *file = ts->files[j]; + if (ts->do_check) + ts->res[j] = digest_check (file); + else + ts->res[j] = digest_file (file, &ts->file_is_binary[j], + ts->bin_buffer[j]); +} + +static void* +thread_start (void *arg) +{ + struct thread_arg *ts = arg; + int current = 0; + while (1) + { + pthread_mutex_lock (&ts->mutex); + + while (current < ts->n_files && ts->busy[current]) + current++; + + if (current < ts->n_files) + ts->busy[current] = true; + + pthread_mutex_unlock (&ts->mutex); + + /* No other files, exit from the thread. */ + if (ts->n_files <= current) + return NULL; + + do_file (ts, current++); + } + + return NULL; +} + +static bool +check_files (char **files, unsigned long n_threads, int n_files, bool do_check, + int binary) +{ + int j; + int ok = 1; + unsigned char *bin_buffer_unaligned = xnmalloc (DIGEST_BIN_BYTES + + DIGEST_ALIGN, n_files); + unsigned char *bin_buffer[n_files]; + bool res[n_files]; + bool busy[n_files]; + int file_is_binary[n_files]; + pthread_t tids[n_threads - 1]; + + struct thread_arg ts = + { + .bin_buffer = bin_buffer, + .busy = busy, + .do_check = do_check, + .files = files, + .file_is_binary = file_is_binary, + .mutex = PTHREAD_MUTEX_INITIALIZER, + .n_files = n_files, + .res = res, + }; + + for (int j = 0; j < n_files; j++) + { + /* Make sure bin_buffer is properly aligned. */ + unsigned char *tmp = &bin_buffer_unaligned[j * (DIGEST_BIN_BYTES + + DIGEST_ALIGN)]; + bin_buffer[j] = ptr_align (tmp, DIGEST_ALIGN); + file_is_binary[j] = binary; + ts.busy[j] = false; + } + + for (j = 0; j < n_threads - 1; j++) + if (pthread_create (&tids[j], NULL, thread_start, &ts)) + error (EXIT_FAILURE, errno, "cannot spawn a new thread"); + + /* Use the main thread as a regular thread. */ + thread_start (&ts); + + for (j = 0; j < n_threads - 1; j++) + pthread_join (tids[j], NULL); + + for (j = 0; j < n_files; j++) + { + char *file = files[j]; + + if (do_check) + ok &= res[j]; + else + { + if (! res[j]) + ok = false; + else + { + size_t i; + + /* Output a leading backslash if the file name contains + a newline or backslash. */ + if (strchr (file, '\n') || strchr (file, '\\')) + putchar ('\\'); + + for (i = 0; i < (digest_hex_bytes / 2); ++i) + printf ("%02x", bin_buffer[j][i]); + + putchar (' '); + if (file_is_binary[j]) + putchar ('*'); + else + putchar (' '); + + /* Translate each NEWLINE byte to the string, "\\n", + and each backslash to "\\\\". */ + for (i = 0; i < strlen (file); ++i) + { + switch (file[i]) + { + case '\n': + fputs ("\\n", stdout); + break; + + case '\\': + fputs ("\\\\", stdout); + break; + + default: + putchar (file[i]); + break; + } + } + putchar ('\n'); + } + } + } + free (bin_buffer_unaligned); + + return ok > 0; +} + int main (int argc, char **argv) { - unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN]; - /* Make sure bin_buffer is properly aligned. */ - unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN); - bool do_check = false; int opt; - bool ok = true; + bool ok; int binary = -1; + bool do_check = false; + unsigned long n_threads = 1; /* Setting values of global variables. */ initialize_main (&argc, &argv); @@ -646,6 +806,12 @@ main (int argc, char **argv) warn = false; quiet = true; break; + case THREADS_OPTION: + if (optarg) + xstrtoul (optarg, NULL, 10, &n_threads, ""); + else + n_threads = num_processors (); + break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -689,59 +855,9 @@ main (int argc, char **argv) if (optind == argc) argv[argc++] = bad_cast ("-"); - for (; optind < argc; ++optind) - { - char *file = argv[optind]; - - if (do_check) - ok &= digest_check (file); - else - { - int file_is_binary = binary; - - if (! digest_file (file, &file_is_binary, bin_buffer)) - ok = false; - else - { - size_t i; - - /* Output a leading backslash if the file name contains - a newline or backslash. */ - if (strchr (file, '\n') || strchr (file, '\\')) - putchar ('\\'); - - for (i = 0; i < (digest_hex_bytes / 2); ++i) - printf ("%02x", bin_buffer[i]); - - putchar (' '); - if (file_is_binary) - putchar ('*'); - else - putchar (' '); - - /* Translate each NEWLINE byte to the string, "\\n", - and each backslash to "\\\\". */ - for (i = 0; i < strlen (file); ++i) - { - switch (file[i]) - { - case '\n': - fputs ("\\n", stdout); - break; - - case '\\': - fputs ("\\\\", stdout); - break; - - default: - putchar (file[i]); - break; - } - } - putchar ('\n'); - } - } - } + size_t n_files = argc - optind; + ok = check_files (&argv[optind], MIN (n_threads, n_files), n_files, do_check, + binary); if (have_read_stdin && fclose (stdin) == EOF) error (EXIT_FAILURE, errno, _("standard input")); diff --git a/tests/misc/md5sum b/tests/misc/md5sum index 30edd9e..ae49954 100755 --- a/tests/misc/md5sum +++ b/tests/misc/md5sum @@ -96,6 +96,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/sha1sum b/tests/misc/sha1sum index d084204..2a3ca6a 100755 --- a/tests/misc/sha1sum +++ b/tests/misc/sha1sum @@ -82,6 +82,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/sha224sum b/tests/misc/sha224sum index aace96c..0405510 100755 --- a/tests/misc/sha224sum +++ b/tests/misc/sha224sum @@ -41,6 +41,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/sha256sum b/tests/misc/sha256sum index d85f248..e376155 100755 --- a/tests/misc/sha256sum +++ b/tests/misc/sha256sum @@ -47,6 +47,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/sha384sum b/tests/misc/sha384sum index c5818e2..0c60824 100755 --- a/tests/misc/sha384sum +++ b/tests/misc/sha384sum @@ -47,6 +47,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/misc/sha512sum b/tests/misc/sha512sum index 9a45602..7e19cb9 100755 --- a/tests/misc/sha512sum +++ b/tests/misc/sha512sum @@ -47,6 +47,12 @@ foreach $t (@Tests) splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/; } +# Insert the `--threads=2' argument for each test. +foreach $t (@Tests) + { + splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/; + } + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -- 1.6.3.3