I really like the patch.
Users get the benefit without needing to specify new options.
Also the overhead in the single threaded case is minimal.
Testing on my single core 1.7GHz pentium-m

seq -f%030.0f 1000000 | shuf > sort.in

$ time LANG=C src/sort.orig < sort.in > /dev/null
3.930s
$ time LANG=C src/sort.orig -S1M -T/dev/shm < sort.in > /dev/null
3.404s

$ time LANG=C src/sort < sort.in > /dev/null
3.838s (2.3% faster)
$ time LANG=C src/sort -S1M -T/dev/shm < sort.in > /dev/null
3.437s (1% slower)

The only thing I was questioning was the --threads option.
I've renamed that to --parallel in the attached patch
(along with other tweaks and cleanups). What do you think?

I still have a couple questions on the option though.
1. Is limiting the number to that returned by nproc()
always the right thing todo?
2. How does this option fit with the threaded external sort.
I.E. the integrated patches will need to share out the
threads to use, and perhaps multiple per CPU is appropriate,
especially in the presence of files on slow devices.

thanks a lot!
Pádraig.
commit 107ca5e80e84380cc40ef7fed46ff0bae9514b95
Author: Pádraig Brady <p...@draigbrady.com>
Date:   Fri Mar 12 23:16:39 2010 +0000

    small mods

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 71e0abe..a5d8327 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4061,13 +4061,13 @@ have a large sort or merge that is I/O-bound, you can often improve
 performance by using this option to specify directories on different
 disks and controllers.
 
-...@item --threa...@var{n}
-...@opindex --threads
+...@item --parall...@var{n}
+...@opindex --parallel
 @cindex multithreaded sort
-Use at most @var{n} threads to sort input. By default, @var{n} is set
-to the number of logical cores detected on the machine. If @var{n} is
-larger than the number of detected logical cores, @var{n} will be
-reduced to match the nubmer of detected logical cores.
+Limit the number of sorts run in parallel to @var{n}. By default,
+...@var{n} is set to the number of available processors, and values
+greater than that are reduced to that limit. Also see
+...@ref{nproc invocation}.
 
 @item -u
 @itemx --unique
@@ -4159,10 +4159,10 @@ sort -n -r
 @end example
 
 @item
-Sort using no more than 4 threads, using a buffer size of 10M.
+Run no more that 4 sorts concurrently, using a buffer size of 10M.
 
 @example
-sort --threads=4 -S 10M
+sort --parallel=4 -S 10M
 @end example
 
 @item
diff --git a/src/sort.c b/src/sort.c
index 1045a74..325d59f 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -436,7 +436,7 @@ Other options:\n\
   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
                               multiple options specify multiple directories\n\
-      --threads=N           use no more than N threads to improve parallelism\n\
+      --parallel=N          Limit the number of sorts run concurrently to N\n\
   -u, --unique              with -c, check for strict ordering;\n\
                               without -c, output only the first of an equal run\n\
 "), DEFAULT_TMPDIR);
@@ -481,7 +481,7 @@ enum
   NMERGE_OPTION,
   RANDOM_SOURCE_OPTION,
   SORT_OPTION,
-  THREADS_OPTION
+  PARALLEL_OPTION
 };
 
 static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
@@ -514,7 +514,7 @@ static struct option const long_options[] =
   {"temporary-directory", required_argument, NULL, 'T'},
   {"unique", no_argument, NULL, 'u'},
   {"zero-terminated", no_argument, NULL, 'z'},
-  {"threads", required_argument, NULL, THREADS_OPTION},
+  {"parallel", required_argument, NULL, PARALLEL_OPTION},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0},
@@ -1391,7 +1391,7 @@ specify_nthreads (int oi, char c, char const *s)
   if (e != LONGINT_OK)
     xstrtol_fatal (e, oi, c, long_options, s);
   if (nthreads == 0)
-    error (SORT_FAILURE, 0, _("number of threads must be nonzero"));
+    error (SORT_FAILURE, 0, _("number in parallel must be nonzero"));
   return nthreads;
 }
 
@@ -2868,9 +2868,10 @@ queue_pop (struct merge_node_queue *const restrict queue)
       if (queue->priority_queue->count)
         node = (struct merge_node *) heap_remove_top (queue->priority_queue);
       else
-        /*  Go into conditional wait if no NODE is immediately
-            available. */
-        pthread_cond_wait (&queue->cond, &queue->mutex);
+        {
+          /* Go into conditional wait if no NODE is immediately available.  */
+          pthread_cond_wait (&queue->cond, &queue->mutex);
+        }
       pthread_mutex_unlock (&queue->mutex);
     }
   lock_node (node);
@@ -2935,20 +2936,26 @@ merge_node (struct merge_node *const restrict node, const size_t total_lines,
     {
       /* Merge directly to output. */
       while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--)
-        if (compare (node->lo - 1, node->hi - 1) <= 0)
-          write_unique (--node->lo, tfp, temp_output);
-        else
-          write_unique (--node->hi, tfp, temp_output);
+        {
+          if (compare (node->lo - 1, node->hi - 1) <= 0)
+            write_unique (--node->lo, tfp, temp_output);
+          else
+            write_unique (--node->hi, tfp, temp_output);
+        }
 
       merged_lo = lo_orig - node->lo;
       merged_hi = hi_orig - node->hi;
 
       if (node->nhi == merged_hi)
-        while (node->lo != node->end_lo && to_merge--)
-          write_unique (--node->lo, tfp, temp_output);
+        {
+          while (node->lo != node->end_lo && to_merge--)
+            write_unique (--node->lo, tfp, temp_output);
+        }
       else if (node->nlo == merged_lo)
-        while (node->hi != node->end_hi && to_merge--)
-          write_unique (--node->hi, tfp, temp_output);
+        {
+          while (node->hi != node->end_hi && to_merge--)
+            write_unique (--node->hi, tfp, temp_output);
+        }
       node->dest -= lo_orig - node->lo + hi_orig - node->hi;
     }
 
@@ -2995,9 +3002,11 @@ update_parent (struct merge_node *const restrict node, size_t merged,
       unlock_node (node->parent);
     }
   else if (node->nlo + node->nhi == 0)
-    /* If the MERGE_ROOT NODE has finished merging, insert the
-       MERGE_END node. */
-    queue_insert (queue, node->parent);
+    {
+      /* If the MERGE_ROOT NODE has finished merging, insert the
+         MERGE_END node.  */
+      queue_insert (queue, node->parent);
+    }
 }
 
 /* Repeatedly pop QUEUE for a NODE with lines to merge, and merge at least
@@ -3125,7 +3134,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest,
         sequential_sort (lines, nlo, temp, false);
 
       /* Update merge NODE. No need to lock yet. */
-      nodelo = lines;
+      node.lo = lines;
       node.hi = lines - nlo;
       node.end_lo = lines - nlo;
       node.end_hi = lines - nlo - nhi;
@@ -3379,7 +3388,6 @@ sort (char * const *files, size_t nfiles, char const *output_file,
       while (fillbuf (&buf, fp, file))
         {
           struct line *line;
-          struct line *linebase;
 
           if (buf.eof && nfiles
               && (bytes_per_line + 1
@@ -3984,7 +3992,7 @@ main (int argc, char **argv)
           add_temp_dir (optarg);
           break;
 
-        case THREADS_OPTION:
+        case PARALLEL_OPTION:
           nthreads = specify_nthreads (oi, c, optarg);
           break;
 
@@ -4195,7 +4203,7 @@ main (int argc, char **argv)
   else
     {
       /* If NTHREADS > number of cores on the machine, spinlocking
-         could be wasteful. */
+         could be wasteful.  */
       unsigned long int np2 = num_processors (NPROC_CURRENT_OVERRIDABLE);
       if (!nthreads || nthreads > np2)
         nthreads = np2;
diff --git a/tests/misc/sort-benchmark-random b/tests/misc/sort-benchmark-random
index d4c9b2a..0c18f9f 100644
--- a/tests/misc/sort-benchmark-random
+++ b/tests/misc/sort-benchmark-random
@@ -27,11 +27,6 @@ fi
 
 very_expensive_
 
-# Use the 'C' locale to avoid problems in case Perl's sort isn't stable.
-LC_ALL=C
-export LC_ALL
-
-
 perl -e '
 my $num_lines = 500000;
 my $length = 100;
@@ -55,11 +50,7 @@ print sort(@list);
 close (FILE);
 ' > exp || framework_failure
 
-#start=$(date +%s)
 time sort in > out || fail=1
-#duration=$(expr $(date +%s) - $start)
-
-#echo sorting the random data took $duration seconds
 
 compare out exp || fail=1
 

Reply via email to