On Sat, 20 May 2017 15:27:06 -0600, "Todd C. Miller" wrote:

> One optimization implemented in the sample code from "Engineering
> a Sort Function" that our qsort lacks is storing the partition value
> out of line when convenient.  Currently, we swap the partition value
> into a[0], but this can significantly degrade performance when the
> array is sorted in reverse or near-reverse order.
> 
> Since we don't want to allocate memory to store the value, only do
> this when the elements of the array are int or long sized (which
> is often the case).  This speeds up the qsort  regress test a bit,
> which is probably due to the tests on reverse sorted input.
> 
> This diff requires my "support swapping int-sized elements" diff
> be applied first.

Fixed diff.

 - todd

Index: lib/libc/stdlib/qsort.c
===================================================================
--- /usr/src/lib/libc/stdlib/qsort.c    Sat May 20 08:08:08 2017
+++ /usr/src/lib/libc/stdlib/qsort.c    Mon May 22 13:17:35 2017
@@ -40,15 +40,12 @@
  * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
  *
  * This version differs from Bentley & McIlroy in the following ways:
- *   1. The partition value is swapped into a[0] instead of being
- *     stored out of line.
- *
- *   2. It uses David Musser's introsort algorithm to fall back to
+ *   1. It uses David Musser's introsort algorithm to fall back to
  *     heapsort(3) when the recursion depth reaches 2*lg(n + 1).
  *     This avoids quicksort's quadratic behavior for pathological
  *     input without appreciably changing the average run time.
  *
- *   3. Tail recursion is eliminated when sorting the larger of two
+ *   2. Tail recursion is eliminated when sorting the larger of two
  *     subpartitions to save stack space.
  */
 #define SWAPTYPE_BYTEV 1
@@ -57,6 +54,23 @@
 #define SWAPTYPE_INT   4
 #define SWAPTYPE_LONG  5
 
+#define PVINIT(pv, pm) do {                            \
+       switch (swaptype) {                             \
+       case SWAPTYPE_INT:                              \
+               pv = (char *)&v;                        \
+               v.i = *(int *)pm;                       \
+               break;                                  \
+       case SWAPTYPE_LONG:                             \
+               pv = (char *)&v;                        \
+               v.l = *(long *)pm;                      \
+               break;                                  \
+       default:                                        \
+               pv = a;                                 \
+               swap(pv, pm);                           \
+               break;                                  \
+       }                                               \
+} while(0)
+
 #define TYPE_ALIGNED(TYPE, a, es)                      \
        (((char *)a - (char *)0) % sizeof(TYPE) == 0 && es % sizeof(TYPE) == 0)
 
@@ -122,9 +136,13 @@
 introsort(char *a, size_t n, size_t es, size_t maxdepth, int swaptype,
     int (*cmp)(const void *, const void *))
 {
-       char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+       char *pa, *pb, *pc, *pd, *pl, *pm, *pn, *pv;
        int cmp_result;
        size_t r, s;
+       union {
+               int i;
+               long l;
+       } v;
 
 loop:  if (maxdepth == 0) {
                if (heapsort(a, n, es, cmp) == 0)
@@ -150,18 +168,18 @@
                }
                pm = med3(pl, pm, pn, cmp);
        }
-       swap(a, pm);
-       pa = pb = a + es;
+       PVINIT(pv, pm);         /* pv points to partition value */
+       pa = pb = a;
        pc = pd = a + (n - 1) * es;
        for (;;) {
-               while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) {
+               while (pb <= pc && (cmp_result = cmp(pb, pv)) <= 0) {
                        if (cmp_result == 0) {
                                swap(pa, pb);
                                pa += es;
                        }
                        pb += es;
                }
-               while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) {
+               while (pb <= pc && (cmp_result = cmp(pc, pv)) >= 0) {
                        if (cmp_result == 0) {
                                swap(pc, pd);
                                pd -= es;

Reply via email to