On 64-bit systems, when sorting int-sized elements our qsort will
swap elements byte by byte.  On 32-bit systems where sizeof(int)
== sizeof(long) this is not an issue.

Adding support for int-sized swaps results in a nice speedup when
sorting ints (like the qsort regress).  Another way to look at it
is that this fixes a run-time regression compared to 32-bit systems.

We only need to set swaptype once since the element size never
changes and we always increment the array by the element size so
the array alignment is also consistent.

Basic idea from FreeBSD.

 - todd

Index: lib/libc/stdlib/qsort.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/qsort.c,v
retrieving revision 1.16
diff -u -p -u -r1.16 qsort.c
--- lib/libc/stdlib/qsort.c     20 May 2017 12:48:56 -0000      1.16
+++ lib/libc/stdlib/qsort.c     20 May 2017 14:08:40 -0000
@@ -51,6 +51,15 @@ static __inline void  swapfunc(char *, c
  *   3. Tail recursion is eliminated when sorting the larger of two
  *     subpartitions to save stack space.
  */
+#define SWAPTYPE_BYTEV 1
+#define SWAPTYPE_INTV  2
+#define SWAPTYPE_LONGV 3
+#define SWAPTYPE_INT   4
+#define SWAPTYPE_LONG  5
+
+#define TYPE_ALIGNED(TYPE, a, es)                      \
+       (((char *)a - (char *)0) % sizeof(TYPE) == 0 && es % sizeof(TYPE) == 0)
+
 #define swapcode(TYPE, parmi, parmj, n) {              \
        size_t i = (n) / sizeof (TYPE);                 \
        TYPE *pi = (TYPE *) (parmi);                    \
@@ -62,25 +71,42 @@ static __inline void         swapfunc(char *, c
         } while (--i > 0);                             \
 }
 
-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
-       es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
-
 static __inline void
 swapfunc(char *a, char *b, size_t n, int swaptype)
 {
-       if (swaptype <= 1) 
-               swapcode(long, a, b, n)
-       else
-               swapcode(char, a, b, n)
+       switch (swaptype) {
+       case SWAPTYPE_INT:
+       case SWAPTYPE_INTV:
+               swapcode(int, a, b, n);
+               break;
+       case SWAPTYPE_LONG:
+       case SWAPTYPE_LONGV:
+               swapcode(long, a, b, n);
+               break;
+       default:
+               swapcode(char, a, b, n);
+               break;
+       }
 }
 
-#define swap(a, b)                                     \
-       if (swaptype == 0) {                            \
+#define swap(a, b)     do {                            \
+       switch (swaptype) {                             \
+       case SWAPTYPE_INT: {                            \
+               int t = *(int *)(a);                    \
+               *(int *)(a) = *(int *)(b);              \
+               *(int *)(b) = t;                        \
+               break;                                  \
+           }                                           \
+       case SWAPTYPE_LONG: {                           \
                long t = *(long *)(a);                  \
                *(long *)(a) = *(long *)(b);            \
                *(long *)(b) = t;                       \
-       } else                                          \
-               swapfunc(a, b, es, swaptype)
+               break;                                  \
+           }                                           \
+       default:                                        \
+               swapfunc(a, b, es, swaptype);           \
+       }                                               \
+} while (0)
 
 #define vecswap(a, b, n)       if ((n) > 0) swapfunc(a, b, n, swaptype)
 
@@ -93,11 +119,11 @@ med3(char *a, char *b, char *c, int (*cm
 }
 
 static void
-introsort(char *a, size_t n, size_t es, size_t maxdepth,
+introsort(char *a, size_t n, size_t es, size_t maxdepth, int swaptype,
     int (*cmp)(const void *, const void *))
 {
        char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-       int cmp_result, swaptype;
+       int cmp_result;
        size_t r, s;
 
 loop:  if (maxdepth == 0) {
@@ -105,7 +131,6 @@ loop:       if (maxdepth == 0) {
                        return;
        }
        maxdepth--;
-       SWAPINIT(a, es);
        if (n < 7) {
                for (pm = a + es; pm < a + n * es; pm += es)
                        for (pl = pm; pl > a && cmp(pl - es, pl) > 0;
@@ -164,8 +189,10 @@ loop:      if (maxdepth == 0) {
        if (r < s) {
                /* Recurse for 1st side, iterate for 2nd side. */
                if (s > es) {
-                       if (r > es)
-                               introsort(a, r / es, es, maxdepth, cmp);
+                       if (r > es) {
+                               introsort(a, r / es, es, maxdepth,
+                                   swaptype, cmp);
+                       }
                        a = pn - s;
                        n = s / es;
                        goto loop;
@@ -173,8 +200,10 @@ loop:      if (maxdepth == 0) {
        } else {
                /* Recurse for 2nd side, iterate for 1st side. */
                if (r > es) {
-                       if (s > es)
-                               introsort(pn - s, s / es, es, maxdepth, cmp);
+                       if (s > es) {
+                               introsort(pn - s, s / es, es, maxdepth,
+                                   swaptype, cmp);
+                       }
                        n = r / es;
                        goto loop;
                }
@@ -185,13 +214,22 @@ void
 qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *))
 {
        size_t i, maxdepth = 0;
+       int swaptype;
 
        /* Approximate 2*ceil(lg(n + 1)) */
        for (i = n; i > 0; i >>= 1)
                maxdepth++;
        maxdepth *= 2;
 
-       introsort(a, n, es, maxdepth, cmp);
+       if (TYPE_ALIGNED(long, a, es))
+               swaptype = es == sizeof(long) ? SWAPTYPE_LONG : SWAPTYPE_LONGV;
+       else if (sizeof(int) != sizeof(long) && TYPE_ALIGNED(int, a, es))
+               swaptype = es == sizeof(int) ? SWAPTYPE_INT : SWAPTYPE_INTV;
+       else
+               swaptype = SWAPTYPE_BYTEV;
+
+       introsort(a, n, es, maxdepth, swaptype, cmp);
+
 }
 
 DEF_STRONG(qsort);

Reply via email to