With random data and a 64-way merge this sped up 'sort' by 5% on our tests. Sorry, I don't know how to put multiple authors in the patch. However, the authors are:
Alexander Nguyen <v...@seas.ucla.edu> Paul Eggert <egg...@cs.ucla.edu> We've both signed papers. ----- src/sort.c (mergefps): When merging a line with a K-way merge, update a heap rather than doing a linear insertion. This improves the overall performance of 'sort' from O(N*K) to O(N log K). (less_than, swap): New functions. (sortlines_temp): Rename local variable so that it doesn't shadow the new 'swap' function. --- src/sort.c | 140 +++++++++++++++++++++++++++++------------------------------- 1 files changed, 67 insertions(+), 73 deletions(-) diff --git a/src/sort.c b/src/sort.c index 7b0b064..0e22aff 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2060,6 +2060,25 @@ compare (const struct line *a, const struct line *b) return reverse ? -diff : diff; } +/* If CUR is a vector of lines, and ORD a vector of indexes into CUR, + then return true if the line ORD[A] less less than the line ORD[B] + in CUR, using a stable comparison. */ +static bool +less_than (struct line const *const *cur, size_t const *ord, + size_t a, size_t b) +{ + return compare (cur[ord[a]], cur[ord[b]]) < (ord[a] < ord[b]); +} + +/* Swap *A and *B. */ +static inline void +swap (size_t *a, size_t *b) +{ + size_t t = *a; + *a = *b; + *b = t; +} + /* Check that the lines read from FILE_NAME come in order. Return true if they are in order. If CHECKONLY == 'c', also print a diagnostic (FILE_NAME, line number, contents of line) to stderr if @@ -2173,60 +2192,56 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, struct line const **base = xnmalloc (nmerge, sizeof *base); /* Base of each line table. */ size_t *ord = xnmalloc (nmerge, sizeof *ord); - /* Table representing a permutation of fps, - such that cur[ord[0]] is the smallest line - and will be next output. */ - size_t i; - size_t j; - size_t t; + /* Zero-based binary heap of run numbers for + selecting the minimum line to output next, + referenced by cur[ord[0]]. */ + size_t nopen = 0; struct keyfield const *key = keylist; saved.text = NULL; /* Read initial lines from each input file. */ - for (i = 0; i < nfiles; ) + for (size_t i = 0; i < nfiles; i++) { fps[i] = (files[i].pid ? open_temp (files[i].name, files[i].pid) : xfopen (files[i].name, "r")); initbuf (&buffer[i], sizeof (struct line), - MAX (merge_buffer_size, sort_size / nfiles)); + MAX (merge_buffer_size, sort_size / (nfiles - (i - nopen)))); if (fillbuf (&buffer[i], fps[i], files[i].name)) { struct line const *linelim = buffer_linelim (&buffer[i]); cur[i] = linelim - 1; base[i] = linelim - buffer[i].nlines; - i++; + ord[nopen++] = i; } else { /* fps[i] is empty; eliminate it from future consideration. */ xfclose (fps[i], files[i].name); if (i < ntemps) - { - ntemps--; - zaptemp (files[i].name); - } + zaptemp (files[i].name); free (buffer[i].buf); - --nfiles; - for (j = i; j < nfiles; ++j) - files[j] = files[j + 1]; } } if (! ofp) ofp = xfopen (output_file, "w"); - /* Set up the ord table according to comparisons among input lines. - Since this only reorders two items if one is strictly greater than - the other, it is stable. */ - for (i = 0; i < nfiles; ++i) - ord[i] = i; - for (i = 1; i < nfiles; ++i) - if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) - t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; + /* Set up the heap. Since this never reorders two lines when the + parent equals the current line, the resulting root must be the + smallest line, even if stability is taken into account. */ + for (size_t j = 1; j < nopen; j++) + for (size_t current = j; 0 < current; ) + { + size_t parent = (current - 1) / 2; + if (compare (cur[ord[parent]], cur[ord[current]]) <= 0) + break; + swap (&ord[current], &ord[parent]); + current = parent; + } /* Repeatedly output the smallest line until no input remains. */ - while (nfiles) + while (nopen != 0) { struct line const *smallest = cur[ord[0]]; @@ -2282,57 +2297,36 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, else { /* We reached EOF on fps[ord[0]]. */ - for (i = 1; i < nfiles; ++i) - if (ord[i] > ord[0]) - --ord[i]; - --nfiles; xfclose (fps[ord[0]], files[ord[0]].name); if (ord[0] < ntemps) - { - ntemps--; - zaptemp (files[ord[0]].name); - } + zaptemp (files[ord[0]].name); free (buffer[ord[0]].buf); - for (i = ord[0]; i < nfiles; ++i) - { - fps[i] = fps[i + 1]; - files[i] = files[i + 1]; - buffer[i] = buffer[i + 1]; - cur[i] = cur[i + 1]; - base[i] = base[i + 1]; - } - for (i = 0; i < nfiles; ++i) - ord[i] = ord[i + 1]; - continue; + + /* Shrink the ord array by replacing the root with the + last line on the last level. This line will be + sifted down in the loop below. */ + ord[0] = ord[--nopen]; } } /* The new line just read in may be larger than other lines - already in main memory; push it back in the queue until we - encounter a line larger than it. Optimize for the common - case where the new line is smallest. */ - { - size_t lo = 1; - size_t hi = nfiles; - size_t probe = lo; - size_t ord0 = ord[0]; - size_t count_of_smaller_lines; - - while (lo < hi) - { - int cmp = compare (cur[ord0], cur[ord[probe]]); - if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) - hi = probe; - else - lo = probe + 1; - probe = (lo + hi) / 2; - } - - count_of_smaller_lines = lo - 1; - for (j = 0; j < count_of_smaller_lines; j++) - ord[j] = ord[j + 1]; - ord[count_of_smaller_lines] = ord0; - } + already in main memory. Sift down the heap. */ + for (size_t current = 0; ; ) + { + size_t left = 2 * current + 1; + if (nopen <= left) + break; + size_t right = left + 1; + size_t winner = current; + if (less_than (cur, ord, left, winner)) + winner = left; + if (right < nopen && less_than (cur, ord, right, winner)) + winner = right; + if (winner == current) + break; + swap (&ord[current], &ord[winner]); + current = winner; + } /* Free up some resources every once in a while. */ if (MAX_PROCS_BEFORE_REAP < nprocs) @@ -2439,12 +2433,12 @@ sortlines_temp (struct line *lines, size_t nlines, struct line *temp) { if (nlines == 2) { - /* Declare `swap' as int, not bool, to work around a bug + /* Declare `swap_offset' as int, not bool, to work around a bug <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html> in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */ - int swap = (0 < compare (&lines[-1], &lines[-2])); - temp[-1] = lines[-1 - swap]; - temp[-2] = lines[-2 + swap]; + int swap_offset = (0 < compare (&lines[-1], &lines[-2])); + temp[-1] = lines[-1 - swap_offset]; + temp[-2] = lines[-2 + swap_offset]; } else { -- 1.5.3.2 _______________________________________________ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils