A Monday 11 February 2008, Charles R Harris escrigué: > I'll check it out when I get home. As I say, it was running about 10% > slower on my machine, but if it does better on most platforms it is > probably the way to go. We can always change it in the future when > everyone is running on quantum computers.
We've done some testing on newqsort in several computers in our company. Here are the results for ordering a list with 1 million of strings of length 15 filled with random information (using C rand()): 1) Ubuntu 7.1 (gcc 4.1.3, -O3, Intel Pentium 4 @ 2 GHz) C qsort with C style compare: 2.450000 C qsort with Python style compare: 2.440000 NumPy newqsort: 0.650000 2) Windows XP (SP2) (MSVC 7.1, /Ox, Intel Pentium 4 @ 2 GHz) C qsort with C style compare: 0.971000 C qsort with Python style compare: 0.962000 NumPy newqsort: 0.921000 3) SuSe LE 10.3 (gcc 4.2.1, -O3, AMD Opteron @ 2 GHz) C qsort with C style compare: 0.640000 C qsort with Python style compare: 0.600000 NumPy newqsort: 0.590000 4) Debian 4.2.2 (lenny) (gcc 4.2.3, -O3, Intel Pentium 4 @ 3.2 GHz) C qsort with C style compare: 1.770000 C qsort with Python style compare: 1.750000 NumPy newqsort: 0.440000 5) Mandriva 2008.0 (gcc 4.2.2, -O3, Intel Core2 Duo @ 1.5 GHz) C qsort with C style compare: 1.590000 C qsort with Python style compare: 1.550000 NumPy newqsort: 0.510000 6) Ubuntu 7.1 (gcc 4.1.3, -O3, Intel Pentium 4 @ 2.5 GHz) C qsort with C style compare: 1.890000 C qsort with Python style compare: 1.900000 NumPy newqsort: 0.500000 7) Ubuntu 7.1 (gcc 4.1.2, -O3, PowerPC 3 @ 1.3 GHz) C qsort with C style compare: 3.030000 C qsort with Python style compare: 2.970000 NumPy newqsort: 1.040000 8) MacOSX 10.4 (Tiger) (gcc 4.0.1, -O3, PowerPC 3 @ 1.3 GHz) C qsort with C style compare: 1.560000 C qsort with Python style compare: 1.510000 NumPy newqsort: 1.220000 All benchmarks have been run using the attached benchmark (if anybody else wants to join the fiesta, please report back your feedback). Summarizing, one can say a couple of things: * Recent Debian distros and derivatives (Ubuntu) as well as Mandriva are suffering from a innefficient system qsort (at least the implementation for strings). SuSe Linux Enterprise 10.3 seems to have solved this. And Windows XP (SP2) and MacOSX (Tiger) looks like they have a relatively efficient implementation of qsort. * The newqsort performs the best on all the platforms we have checked (ranging from a 5% of improvement on Opteron/SuSe, up to 3.8x with some Pentium4/Ubuntu systems). All in all, I'd also say that newqsort would be a good candidate to be put into NumPy. Cheers, -- >0,0< Francesc Altet http://www.carabos.com/ V V Cárabos Coop. V. Enjoy Data "-"
#include <stdlib.h> #include <stdio.h> #include <time.h> #define NUM 1000000 #define LEN 15 /* This cannot be inlined by MSVC compiler */ /* static int */ /* compare_stringC(const void *s1, const void *s2) */ /* { */ /* return strncmp(s1, s2, LEN); */ /* } */ static int compare_stringC(const void *s1, const void *s2) { const unsigned char *c1 = (unsigned char *)s1; const unsigned char *c2 = (unsigned char *)s2; size_t i; for(i = 0; i < LEN; ++i) { if (c1[i] != c2[i]) { return (c1[i] > c2[i]) ? 1 : -1; } if (c1[i] == 0) return -1; } return 0; } static int compare_stringP(const void *s1, const void *s2) { const unsigned char *c1 = (unsigned char *)s1; const unsigned char *c2 = (unsigned char *)s2; size_t i; for(i = 0; i < LEN; ++i) { if (c1[i] != c2[i]) { return (c1[i] > c2[i]) ? 1 : -1; } } return 0; } #define PYA_QS_STACK 100 #define SMALL_QUICKSORT 15 static int compare_string(char *s1, char *s2, size_t len) { const unsigned char *c1 = (unsigned char *)s1; const unsigned char *c2 = (unsigned char *)s2; size_t i; for(i = 0; i < len; ++i) { if (c1[i] != c2[i]) { return (c1[i] > c2[i]) ? 1 : -1; } } return 0; } #define STRING_LT(pa, pb, len) (compare_string(pa, pb, len) < 0) #define swap_string3(s1, s2, len) { \ memcpy((vp), (s2), (len)); \ memcpy((s2), (s1), (len)); \ memcpy((s1), (vp), (len)); \ } static void swap_string2(char *s1, char *s2, size_t len) { size_t i; char t; for (i=0; i < len; i++) { t = s2[i]; s2[i] = s1[i]; s1[i] = t; } } static void swap_string(char *s1, char *s2, size_t len) { while(len--) { const char t = *s1; *s1++ = *s2; *s2++ = t; } } #define opt_memcpy(a,b,n) memcpy((a),(b),(n)) static void copy_string(char *s1, char *s2, size_t len) { while(len--) { *s1++ = *s2++; } } /* This seems to work faster on my laptop, but YMMV */ static void opt_memcpy2(void *a, const void *b, size_t n) { size_t i; if (n<16) for (i=0; i<n; i++) ((char *)a)[i] = ((char *)b)[i]; else memcpy(a, b, n); } static int newqsort(char *start, size_t num, size_t len) { char *vp = malloc(len); char *pl = start; char *pr = start + (num - 1)*len; char *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; for(;;) { while ((pr - pl) > 5*len) { /* quicksort partition */ pm = pl + (((pr - pl)/len) >> 1)*len; if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len); if (STRING_LT(pr, pm, len)) swap_string(pr, pm, len); if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len); opt_memcpy(vp, pm, len); pi = pl; pj = pr - len; swap_string(pm, pj, len); for(;;) { do pi += len; while (STRING_LT(pi, vp, len)); do pj -= len; while (STRING_LT(vp, pj, len)); if (pi >= pj) break; swap_string(pi, pj, len); } pk = pr - len; swap_string(pi, pk, len); /* push largest partition on stack */ if (pi - pl < pr - pi) { *sptr++ = pi + len; *sptr++ = pr; pr = pi - len; } else { *sptr++ = pl; *sptr++ = pi - len; pl = pi + len; } } /* insertion sort */ for(pi = pl + len; pi <= pr; pi += len) { opt_memcpy(vp, pi, len); pj = pi; pk = pi - len; while(pj > pl && STRING_LT(vp, pk, len)) { opt_memcpy(pj, pk, len); pj -= len; pk -= len; } opt_memcpy(pj, vp, len); } if (sptr == stack) break; pr = *(--sptr); pl = *(--sptr); } free(vp); return 0; } int main(){ size_t index; char *l = malloc(NUM*LEN); char *lcpy = malloc(NUM*LEN); clock_t last, current; printf("Benchmark with %d strings of size %d\n", NUM, LEN); /* srand(time(NULL)); */ srand(1); for(index = 0; index < NUM; ++index){ sprintf(lcpy+index*LEN, "%d%c", rand(), '\0'); } memcpy(l, lcpy, NUM*LEN); last = clock(); qsort(l, NUM, LEN, compare_stringC); current = clock(); /* for(index = 0; index < NUM; ++index){ */ /* printf("0-->%s", l+index*LEN); */ /* } */ /* printf("\n"); */ printf("C qsort with C style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC); memcpy(l, lcpy, NUM*LEN); last = clock(); qsort(l, NUM, LEN, compare_stringP); current = clock(); /* for(index = 0; index < NUM; ++index){ */ /* printf("1-->%s", l+index*LEN); */ /* } */ /* printf("\n"); */ printf("C qsort with Python style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC); memcpy(l, lcpy, NUM*LEN); last = clock(); newqsort(l, NUM, LEN); current = clock(); /* for(index = 0; index < NUM; ++index){ */ /* printf("2-->%s", l+index*LEN); */ /* } */ /* printf("\n"); */ printf("NumPy newqsort: %f\n", (current-last)/(float)CLOCKS_PER_SEC); free(l); free(lcpy); return 0; }
_______________________________________________ Numpy-discussion mailing list Numpy-discussion@scipy.org http://projects.scipy.org/mailman/listinfo/numpy-discussion