A Monday 11 February 2008, Charles R Harris escrigué:
> I'll check it out when I get home. As I say, it was running about 10%
> slower on my machine, but if it does better on most platforms it is
> probably the way to go. We can always change it in the future when
> everyone is running on quantum computers.

We've done some testing on newqsort in several computers in our company.  
Here are the results for ordering a list with 1 million of strings of 
length 15 filled with random information (using C rand()):

1) Ubuntu 7.1 (gcc 4.1.3, -O3, Intel Pentium 4 @ 2 GHz)
C qsort with C style compare: 2.450000
C qsort with Python style compare: 2.440000
NumPy newqsort: 0.650000

2) Windows XP (SP2) (MSVC 7.1, /Ox, Intel Pentium 4 @ 2 GHz)
C qsort with C style compare: 0.971000
C qsort with Python style compare: 0.962000
NumPy newqsort: 0.921000

3) SuSe LE 10.3 (gcc 4.2.1, -O3, AMD Opteron @ 2 GHz)
C qsort with C style compare: 0.640000
C qsort with Python style compare: 0.600000
NumPy newqsort: 0.590000

4) Debian 4.2.2 (lenny) (gcc 4.2.3, -O3, Intel Pentium 4 @ 3.2 GHz)
C qsort with C style compare: 1.770000
C qsort with Python style compare: 1.750000
NumPy newqsort: 0.440000

5) Mandriva 2008.0 (gcc 4.2.2, -O3, Intel Core2 Duo @ 1.5 GHz)
C qsort with C style compare: 1.590000
C qsort with Python style compare: 1.550000
NumPy newqsort: 0.510000

6) Ubuntu 7.1 (gcc 4.1.3, -O3, Intel Pentium 4 @ 2.5 GHz)
C qsort with C style compare: 1.890000
C qsort with Python style compare: 1.900000
NumPy newqsort: 0.500000

7) Ubuntu 7.1 (gcc 4.1.2, -O3, PowerPC 3 @ 1.3 GHz)
C qsort with C style compare: 3.030000
C qsort with Python style compare: 2.970000
NumPy newqsort: 1.040000

8) MacOSX 10.4 (Tiger) (gcc 4.0.1, -O3, PowerPC 3 @ 1.3 GHz)
C qsort with C style compare: 1.560000
C qsort with Python style compare: 1.510000
NumPy newqsort: 1.220000

All benchmarks have been run using the attached benchmark (if anybody 
else wants to join the fiesta, please report back your feedback).

Summarizing, one can say a couple of things:

* Recent Debian distros and derivatives (Ubuntu) as well as Mandriva are 
suffering from a innefficient system qsort (at least the implementation 
for strings).  SuSe Linux Enterprise 10.3 seems to have solved this.  
And Windows XP (SP2) and MacOSX (Tiger) looks like they have a 
relatively efficient implementation of qsort.

* The newqsort performs the best on all the platforms we have checked 
(ranging from a 5% of improvement on Opteron/SuSe, up to 3.8x with some 
Pentium4/Ubuntu systems).

All in all, I'd also say that newqsort would be a good candidate to be 
put into NumPy.

Cheers,

-- 
>0,0<   Francesc Altet     http://www.carabos.com/
V   V   Cárabos Coop. V.   Enjoy Data
 "-"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>


#define NUM 1000000
#define LEN 15

/* This cannot be inlined by MSVC compiler */
/* static int */
/* compare_stringC(const void *s1, const void *s2) */
/* { */
/*   return strncmp(s1, s2, LEN); */
/* } */

static int
compare_stringC(const void *s1, const void *s2)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < LEN; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
	if (c1[i] == 0) return -1;
    }
    return 0;
}


static int
compare_stringP(const void *s1, const void *s2)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < LEN; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
    }
    return 0;
}



#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15

static int
compare_string(char *s1, char *s2, size_t len)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < len; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
    }
    return 0;
}

#define STRING_LT(pa, pb, len) (compare_string(pa, pb, len) < 0)

#define swap_string3(s1, s2, len) { \
    memcpy((vp), (s2), (len));	    \
    memcpy((s2), (s1), (len));	    \
    memcpy((s1), (vp), (len));	    \
  }

static void
swap_string2(char *s1, char *s2, size_t len)
{
  size_t i;
  char t;
  for (i=0; i < len; i++) {
    t = s2[i];
    s2[i] = s1[i];
    s1[i] = t;
  }
}

static void
swap_string(char *s1, char *s2, size_t len)
{
    while(len--) {
        const char t = *s1;
        *s1++ = *s2;
        *s2++ = t;
    }
}


#define opt_memcpy(a,b,n) memcpy((a),(b),(n))

static void
copy_string(char *s1, char *s2, size_t len)
{
    while(len--) {
        *s1++ = *s2++;
    }
}

/* This seems to work faster on my laptop, but YMMV */
static void
opt_memcpy2(void *a, const void *b, size_t n) {
  size_t i;
  if (n<16)
	for (i=0; i<n; i++)
	  ((char *)a)[i] = ((char *)b)[i];
  else
	memcpy(a, b, n);
}


static int
newqsort(char *start, size_t num, size_t len)
{
    char *vp = malloc(len);
    char *pl = start;
    char *pr = start + (num - 1)*len;
    char *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;

    for(;;) {
        while ((pr - pl) > 5*len) {
            /* quicksort partition */
            pm = pl + (((pr - pl)/len) >> 1)*len;
            if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len);
            if (STRING_LT(pr, pm, len)) swap_string(pr, pm, len);
            if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len);
            opt_memcpy(vp, pm, len);
            pi = pl;
            pj = pr - len;
            swap_string(pm, pj, len);
            for(;;) {
                do pi += len; while (STRING_LT(pi, vp, len));
                do pj -= len; while (STRING_LT(vp, pj, len));
                if (pi >= pj)  break;
                swap_string(pi, pj, len);
            }
            pk = pr - len;
            swap_string(pi, pk, len);
            /* push largest partition on stack */
            if (pi - pl < pr - pi) {
                *sptr++ = pi + len;
                *sptr++ = pr;
                pr = pi - len;
            }
            else {
                *sptr++ = pl;
                *sptr++ = pi - len;
                pl = pi + len;
            }
        }

        /* insertion sort */
        for(pi = pl + len; pi <= pr; pi += len) {
            opt_memcpy(vp, pi, len);
            pj = pi;
            pk = pi - len;
            while(pj > pl && STRING_LT(vp, pk, len)) {
                opt_memcpy(pj, pk, len);
                pj -= len;
                pk -= len;
            }
            opt_memcpy(pj, vp, len);
        }
        if (sptr == stack) break;
        pr = *(--sptr);
        pl = *(--sptr);
    }

    free(vp);
    return 0;
}


int main(){
  size_t index;
  char *l = malloc(NUM*LEN);
  char *lcpy = malloc(NUM*LEN);
  clock_t last, current;

  printf("Benchmark with %d strings of size %d\n", NUM, LEN);

/*   srand(time(NULL)); */
  srand(1);

  for(index = 0; index < NUM; ++index){
	sprintf(lcpy+index*LEN, "%d%c", rand(), '\0');
  }

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  qsort(l, NUM, LEN, compare_stringC);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("0-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("C qsort with C style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  qsort(l, NUM, LEN, compare_stringP);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("1-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("C qsort with Python style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  newqsort(l, NUM, LEN);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("2-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("NumPy newqsort: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  free(l);
  free(lcpy);
  return 0;
}

_______________________________________________
Numpy-discussion mailing list
Numpy-discussion@scipy.org
http://projects.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to