Author: sewardj
Date: 2008-02-18 01:58:23 +0000 (Mon, 18 Feb 2008)
New Revision: 7422

Log:

Replace VG_(ssort) -- the shellsort routine -- with a Bentley-McIlroy
style 3-way partitioning quicksort algorithm.  The latter really ought
to be the gold standard in quicksorts, but doesn't seem widely used.
It uses slightly fewer instructions than the shellsort, and trashes
the caches a lot less, particularly D1, when sorting large arrays.
Consequently is considerably faster.



Modified:
   branches/DATASYMS/coregrind/m_libcbase.c


Modified: branches/DATASYMS/coregrind/m_libcbase.c
===================================================================
--- branches/DATASYMS/coregrind/m_libcbase.c    2008-02-17 20:54:12 UTC (rev 
7421)
+++ branches/DATASYMS/coregrind/m_libcbase.c    2008-02-18 01:58:23 UTC (rev 
7422)
@@ -579,6 +579,144 @@
    Misc useful functions
    ------------------------------------------------------------------ */
 
+/////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////
+/// begin Bentley-McIlroy style quicksort
+/// See "Engineering a Sort Function".  Jon L Bentley, M. Douglas
+/// McIlroy.  Software Practice and Experience Vol 23(11), Nov 1993.
+
+#define BM_MIN(a, b)                                     \
+   (a) < (b) ? a : b
+
+#define BM_SWAPINIT(a, es)                               \
+   swaptype =   ((a-(Char*)0) | es) % sizeof(Word)  ? 2  \
+              : es > (SizeT)sizeof(Word) ? 1             \
+              : 0
+
+#define BM_EXCH(a, b, t)                                 \
+   (t = a, a = b, b = t)
+
+#define BM_SWAP(a, b)                                    \
+   swaptype != 0                                         \
+      ? bm_swapfunc(a, b, es, swaptype)                  \
+      : (void)BM_EXCH(*(Word*)(a), *(Word*)(b), t)
+
+#define BM_VECSWAP(a, b, n)                              \
+   if (n > 0) bm_swapfunc(a, b, n, swaptype)
+
+#define BM_PVINIT(pv, pm)                                \
+   if (swaptype != 0)                                    \
+      pv = a, BM_SWAP(pv, pm);                           \
+   else                                                  \
+      pv = (Char*)&v, v = *(Word*)pm
+
+static Char* bm_med3 ( Char* a, Char* b, Char* c, 
+                       Int (*cmp)(void*,void*) ) {
+   return cmp(a, b) < 0
+          ? (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
+          : (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
+}
+
+static void bm_swapfunc ( Char* a, Char* b, SizeT n, Int swaptype )
+{
+   if (swaptype <= 1) {
+      Word t;
+      for ( ; n > 0; a += sizeof(Word), b += sizeof(Word),
+                                        n -= sizeof(Word))
+         BM_EXCH(*(Word*)a, *(Word*)b, t);
+   } else {
+      Char t;
+      for ( ; n > 0; a += 1, b += 1, n -= 1)
+         BM_EXCH(*a, *b, t);
+   }
+}
+
+static void bm_qsort ( Char* a, SizeT n, SizeT es,
+                       Int (*cmp)(void*,void*) )
+{
+   Char  *pa, *pb, *pc, *pd, *pl, *pm, *pn, *pv;
+   Int   r, swaptype;
+   Word  t, v;
+   SizeT s, s1, s2;
+  tailcall:
+   BM_SWAPINIT(a, es);
+   if (n < 7) {
+      for (pm = a + es; pm < a + n*es; pm += es)
+         for (pl = pm; pl > a && cmp(pl-es, pl) > 0; pl -= es)
+            BM_SWAP(pl, pl-es);
+      return;
+   }
+   pm = a + (n/2)*es;
+   if (n > 7) {
+      pl = a;
+      pn = a + (n-1)*es;
+      if (n > 40) {
+         s = (n/8)*es;
+         pl = bm_med3(pl, pl+s, pl+2*s, cmp);
+         pm = bm_med3(pm-s, pm, pm+s, cmp);
+         pn = bm_med3(pn-2*s, pn-s, pn, cmp);
+      }
+      pm = bm_med3(pl, pm, pn, cmp);
+   }
+   BM_PVINIT(pv, pm);
+   pa = pb = a;
+   pc = pd = a + (n-1)*es;
+   for (;;) {
+      while (pb <= pc && (r = cmp(pb, pv)) <= 0) {
+         if (r == 0) { BM_SWAP(pa, pb); pa += es; }
+         pb += es;
+      }
+      while (pc >= pb && (r = cmp(pc, pv)) >= 0) {
+         if (r == 0) { BM_SWAP(pc, pd); pd -= es; }
+         pc -= es;
+      }
+      if (pb > pc) break;
+      BM_SWAP(pb, pc);
+      pb += es;
+      pc -= es;
+   }
+   pn = a + n*es;
+   s = BM_MIN(pa-a,  pb-pa   ); BM_VECSWAP(a,  pb-s, s);
+   s = BM_MIN(pd-pc, pn-pd-es); BM_VECSWAP(pb, pn-s, s);
+   /* Now recurse.  Do the smaller partition first with an explicit
+      recursion, then do the larger partition using a tail call.
+      Except we can't rely on gcc to implement a tail call in any sane
+      way, so simply jump back to the start.  This guarantees stack
+      growth can never exceed O(log N) even in the worst case. */
+   s1 = pb-pa;
+   s2 = pd-pc;
+   if (s1 < s2) {
+      if (s1 > es) {
+         bm_qsort(a, s1/es, es, cmp);
+      }
+      if (s2 > es) {
+         /* bm_qsort(pn-s2, s2/es, es, cmp); */
+         a = pn-s2; n = s2/es; es = es; cmp = cmp;
+         goto tailcall;
+      }
+   } else {
+      if (s2 > es) {
+         bm_qsort(pn-s2, s2/es, es, cmp);
+      }
+      if (s1 > es) {
+         /* bm_qsort(a, s1/es, es, cmp); */
+         a = a; n = s1/es; es = es; cmp = cmp;
+         goto tailcall;
+      } 
+   }
+}
+
+#undef BM_MIN
+#undef BM_SWAPINIT
+#undef BM_EXCH
+#undef BM_SWAP
+#undef BM_VECSWAP
+#undef BM_PVINIT
+
+/// end Bentley-McIlroy style quicksort
+/////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////
+
 /* Returns the base-2 logarithm of x.  Returns -1 if x is not a power
    of two. */
 Int VG_(log2) ( UInt x ) 
@@ -596,110 +734,10 @@
 void VG_(ssort)( void* base, SizeT nmemb, SizeT size,
                  Int (*compar)(void*, void*) )
 {
-   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
-                      9841, 29524, 88573, 265720,
-                      797161, 2391484 };
-   Int   lo = 0;
-   Int   hi = nmemb-1;
-   Int   i, j, h, bigN, hp;
+   bm_qsort(base,nmemb,size,compar);
+}
 
-   bigN = hi - lo + 1; if (bigN < 2) return;
-   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
 
-   #define SORT \
-   for ( ; hp >= 0; hp--) { \
-      h = incs[hp]; \
-      for (i = lo + h; i <= hi; i++) { \
-         ASSIGN(v,0, a,i); \
-         j = i; \
-         while (COMPAR(a,(j-h), v,0) > 0) { \
-            ASSIGN(a,j, a,(j-h)); \
-            j = j - h; \
-            if (j <= (lo + h - 1)) break; \
-         } \
-         ASSIGN(a,j, v,0); \
-      } \
-   }
-
-   // Specialised cases
-   if (sizeof(ULong) == size) {
-
-      #define ASSIGN(dst, dsti, src, srci) \
-      (dst)[(dsti)] = (src)[(srci)];      
-      
-      #define COMPAR(dst, dsti, src, srci) \
-      compar( (void*)(& (dst)[(dsti)]), (void*)(& (src)[(srci)]) )
-
-      ULong* a = (ULong*)base;
-      ULong  v[1];
-
-      SORT;
-
-   } else if (sizeof(UInt) == size) {
-
-      UInt* a = (UInt*)base;
-      UInt  v[1];
-
-      SORT;
-
-   } else if (sizeof(UShort) == size) {
-      UShort* a = (UShort*)base;
-      UShort  v[1];
-
-      SORT;
-
-   } else if (sizeof(UChar) == size) {
-      UChar* a = (UChar*)base;
-      UChar  v[1];
-
-      SORT;
-
-      #undef ASSIGN
-      #undef COMPAR
-
-   } else if ( (4*sizeof(UWord)) == size ) {
-      /* special-case 4 word-elements.  This captures a lot of cases
-         from symbol table reading/canonicalisaton, because both DiLoc
-         and DiSym are 4 word structures. */
-      HChar* a = base;
-      HChar  v[size];
-
-      #define ASSIGN(dst, dsti, src, srci) \
-       do { UWord* dP = (UWord*)&dst[size*(dsti)]; \
-            UWord* sP = (UWord*)&src[size*(srci)]; \
-            dP[0] = sP[0]; \
-            dP[1] = sP[1]; \
-            dP[2] = sP[2]; \
-            dP[3] = sP[3]; \
-          } while (0)
-
-      #define COMPAR(dst, dsti, src, srci) \
-      compar( &dst[size*(dsti)], &src[size*(srci)] )
-
-      SORT;
-
-      #undef ASSIGN
-      #undef COMPAR
-
-   // General case
-   } else {
-      HChar* a = base;
-      HChar  v[size];      // will be at least 'size' bytes
-
-      #define ASSIGN(dst, dsti, src, srci) \
-      VG_(memcpy)( &dst[size*(dsti)], &src[size*(srci)], size );
-
-      #define COMPAR(dst, dsti, src, srci) \
-      compar( &dst[size*(dsti)], &src[size*(srci)] )
-
-      SORT;
-
-      #undef ASSIGN
-      #undef COMPAR
-   }
-   #undef SORT
-}
-
 // This random number generator is based on the one suggested in Kernighan
 // and Ritchie's "The C Programming Language".
 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Valgrind-developers mailing list
Valgrind-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-developers

Reply via email to