Christopher Seiwald writes:
> The FreeBSD qsort implementation has a rather nasty degenerate case.
> If you have data that partitions exactly about the median pivot, yet
> which is unsorted in either partition, you get get treated to an insertion
> sort. Example:
>
> 1 2 3 4 5 6 7 8 9 10 19 18 17 16 14 14 13 12 11
>
> qsort picks 10 as the median, does no swaps on its first pass, and so
> decides to switch to an insertion sort. The idea is that if no swaps
> occur, the data is likely to be nearly already sorted and a good candidate
> for insertion sort. This turns out to be a (very) bad idea if you have
> some unsorted data buried in the middle of a (very) large dataset.
>
> The implementation claims to come from Bentley and McIlroy's paper,
> "Engineering a Sort Function," and this is largely true, but the insertion
> sort optimization(?) isn't in that paper. The GNU qsort.c only does an
> insertion sort if it is below a certain threshhold in size, and so never
> attempts such a sort on the whole dataset. (The GNU qsort does a number
> of other things pooh-poohed in the Bentley paper.)
>
> It's a pretty straightforward change to bypass the insertion sort for
> large subsets of the data. If no one has a strong love for qsort, I'll
> educate myself on how to make and contribute this change.
How about the code following sig. ?
And the other codes and information on:
http://www.mars.dti.ne.jp/~a-wada/qsortlib.html
--
Akira Wada <[EMAIL PROTECTED]>
---------------------------------------------------------------------------------
/* a-wada/qsortlib/ccn/qsorts.c Mar. 30, 1999 */
/* Copyright (c) 1999 Akira Wada <[EMAIL PROTECTED]> */
/* Quick sort compatible with "qsort (); in <stdlib.h>" */
/* efficiency by reducing the times of key comparisons and */
/* word_mode_swapping for word_aligned object */
/* prevention from the degeneration caused by peculiar input */
/* This qsort is not stable, but could avoid the bad behavior by many */
/* equal sort-keys. For stability, add a unique-key to compare-function. */
/* If the object to be sorted is an array of pointers to structs with */
/* an unsigned integer key in ascending order, set the 3'rd argument */
/* to the key position in bytes and the 4'th to NULL, then */
/* radixsort would be applied. for example : */
/* kps = 16; qsort (pv, n, kps, (ftp) 0); */
#include <time.h>
#include <stdlib.h>
void radixspi (void *, size_t, size_t);
void srandom (unsigned long); /* if not in <stdlib.h> */
unsigned long random (); /* if not in <stdlib.h> */
#define MDT 16 /* threshold for median of the 5 or the more, MDT >= 8 */
#define MDA 2 /* adjusting threshold slightly, MDT + MDA >= 4 */
#define MDM 2 /* multiplier for threshold, MDM >= 2 */
#define DGT 256 /* threshold for checking degeneration */
#define DGV 16 /* degeneration assumed if the smaller < DGL */
#define DGM 0 /* threshold for selecting no median */
#define DGS 2 /* threshold for samples distributed uniformly */
#define DGU 4 /* threshold for sampling at random */
#define DGL ((unsigned) psz / DGV) * rl
#define defdgn int dgn = 0, nsp, itv
#define ckdgnr(s, e) if (psz >= DGT && s + DGL > e) dgn++
#define no_med (psz > MDT && DGM < dgn && dgn <= DGS)
#define symptm (psz > MDT && dgn > DGS)
#define xcsmpl() do { \
if (dgn <= DGU) { /* samples distributed uniformly */\
nsp = 3; while (psz > thr) thr *= MDM, nsp += 2; \
itv = ((unsigned) psz / (nsp - 1)) * rl; k = l, n = r; \
for (thr = MDT; psz > thr; thr *= MDM) { \
i += rl, k += itv; swap (i, k); \
j -= rl, n -= itv; swap (n, j);}} \
else { /* samples at random */\
if (dgn == DGU + 1) dgn++, srandom (time (0)); \
for (thr = (unsigned) thr / MDM; psz > thr; thr *= MDM) { \
rdmsmpl (i, i, j); i += rl; \
rdmsmpl (j, i, j); j -= rl;} \
rdmsmpl (m, i, j);} \
i = l, j = r, thr = MDT;} while (0)
#define rdmsmpl(x, s, e) if (x != (n = s + rl * ( \
random () % ((unsigned) (e - (s)) / rl + 1)))) swap (x, n)
#define swpwrk int t, w, z = sizeof (int), alg = ((int) av | rl) & (z - 1)
#define swap(i, j) do { \
w = rl; if (alg == 0) do \
w -= z, t = *((int *) (i + w)), \
*((int *) (i + w)) = *((int *) (j + w)), \
*((int *) (j + w)) = t; while (w > 0); \
else do \
--w, t = *(i + w), *(i + w) = *(j + w), *(j + w) = t; \
while (w > 0);} while (0)
#define rotate(i, j, k) do { \
w = rl; if (alg == 0) do \
w -= z, t = *((int *) (i + w)), \
*((int *) (i + w)) = *((int *) (j + w)), \
*((int *) (j + w)) = *((int *) (k + w)), \
*((int *) (k + w)) = t; while (w > 0); \
else do \
--w, t = *(i + w), *(i + w) = *(j + w), \
*(j + w) = *(k + w), *(k + w) = t; while (w > 0);} while (0)
#define findxc(minmax, x, s, e, m) do { \
n = x, k = s; do \
if ((*qcmp) minmax > 0) n = k; while ((k += rl) <= e); \
if (n == x) swap (m, x); else rotate (m, n, x);} while (0)
#define MIN (n, k)
#define MAX (k, n)
#define cksrtd(l, r) if (psz > MDT) do { \
k = l; while (k < r && (*qcmp)(k, k + rl) <= 0) k += rl; \
if (k >= r) i = j; else if (k >= m) i = m;} while (0)
#define stack ptrtyp s[stksz], *p = s
#define stksz sizeof (size_t) * 8 * 2
#define push(x, y) *(p++) = x, *(p++) = y
#define empty (s >= p)
#define pop(y, x) y = *(--p), x = *(--p)
#define defwrk ptrtyp k, n; defdgn; swpwrk; stack
typedef int (*ftp) (const void *, const void *);
typedef char * ptrtyp;
void qsort (void *av, size_t rn, size_t rl, ftp qcmp) {
ptrtyp l, r, m, i, j; int psz, thr, cst, c; defwrk;
if (qcmp == (ftp) 0) radixspi (av, rn, rl); else /* call RADIX SORT */
if (rn > 1 && rl > 0) {
l = (char *) av, r = l + (rn - 1) * rl;
for ( ; ; ) { /* initialize for current partition */
m = l + ((unsigned) (psz = (r - l) / rl) / 2) * rl,
i = l, j = r, thr = MDT, psz -= (MDA), cst = 1;
if (!no_med) {
if (symptm) xcsmpl ();
for ( ; ; ) { /* bring median of samples to middle */
if ((*qcmp)(m, j) <= 0) {
if (i < m && (*qcmp)(i, m) > 0) {
findxc (MIN, i, j, r, m); cst = 0;}}
else {
if (i >= m || (*qcmp)(i, m) > 0) swap (i, j); else
findxc (MAX, j, l, i, m); cst = 0;}
i += rl, j -= rl;
if (psz > thr) thr *= MDM; else break;}}
if (cst != 0) cksrtd (l, r);
if (i < j) { /* do partitioning by middle as pivot */
for (n = m; ; ) { /* gathering equal keys close to pivot */
while (i < m) {
if ((c = (*qcmp)(i, m)) < 0) i += rl; else
if (c > 0) break; else {
while (i < (m -= rl) && (c = (*qcmp)(m, i)) == 0);
if (i >= m) break; swap (i, m);
if (c > 0) break; else i += rl;}}
while (n < j) {
if ((c = (*qcmp)(n, j)) < 0) j -= rl; else
if (c > 0) break; else {
while ((n += rl) < j && (c = (*qcmp)(j, n)) == 0);
if (n >= j) break; swap (n, j);
if (c > 0) break; else j -= rl;}}
if (i >= m && n >= j) break;
if (n == j) {
if (m == n) {
swap (i, j); j -= rl, n = m = i;} else
if (i < (m -= rl)) {
rotate (i, m, j); n = j -= rl;} else {
swap (i, j); j -= rl; break;}} else
if (i == m) {
if (n == m) {
swap (i, j); i += rl; m = n = j;} else
if ((n += rl) < j) {
rotate (j, n, i); m = i += rl;} else {
swap (i, j); i += rl; break;}} else {
swap (i, j); j -= rl, i += rl;}}
/* select subpartition for next iteration */
if ((i -= rl) - l < r - (j += rl)) {
ckdgnr (l, j); if (l >= i) l = j; else {
push (j, r), r = i; continue;}}
else {
ckdgnr (i, r); if (j >= r) r = i; else {
push (l, i), l = j; continue;}}
if (l < r) continue;}
/* pop up next partition from stack */
if (!empty) pop (r, l); else break;}}}
#include "radixspi-ar.c"
/* a-wada/qsortlib/ccn/qsorts.c end */
/*----------------------------------------------------------------------------**
*source codes on :
radix_ar.c (original for array of unsigned longs by Andre Reinald)
http://www.cubic.org/~submissive/sourcerer/download/radix_ar.c 25-May-97
radixspi-ar.c (modified for array of pointers to structs with u_int key)
http://www.mars.dti.ne.jp/~a-wada/qsortlib/ccn/radixspi-ar.c
**----------------------------------------------------------------------------*/
To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message