On Sun, Nov 07, 2021 at 08:13:36PM -0600, Luke Small wrote:

> https://bugs.llvm.org/show_bug.cgi?id=50026
> 
> I reported it to the llvm people. it is two slightly different quicksort
> algorithms which perform radically differently. The one which you could
> assume would take more time, performs MUCH better.
> 
> I made a custom quicksort algorithm which outperforms qsort by A LOT for
> sorting an array of around 300 randomly created unsigned characters, which
> is what I use it for.
> 
> if you read the report a guy said there's a 10% difference for sorting 3
> million characters on freebsd, but there's about 40% performance difference
> on OpenBSD. maybe it's also how the OpenBSD team modified clang to prevent
> rop chain stuff or something? I'm using a westmere based intell server.
> 
> it's the same for clang 11.
> 
> What's the deal?

1. Your test is too small. I increased the test size to get less error
in measurement. I also changed the code to either run quicksort or
quicksort depending on an argument.

2. I confirmed your observation using time on amd64, arm64 however
shows a difference more in line with expectations:

[otto@mc:8]$ time ./a.out A
quicksort
time = 0.335777021
    0m00.35s real     0m00.35s user     0m00.01s system
[otto@mc:9]$ time ./a.out B 
quicksort1
time = 0.345703033
    0m00.36s real     0m00.36s user     0m00.01s system


I suspect some non-optimal decision of the optimizer on amd64 for the
first quicksort.

A next step could be somebody looking at the code produced by the
compiler.  That is not going to be me.

        -Otto
> 
> -Luke

> 
> /* 
> clang sort_test2.c -O2 -o sort_test && ./sort_test
>  */
> 
> /*
>  * quicksort should be slightly faster than quicksort1 because it is
>  * identical except quicksort1 does more work using global counters.
>  * quicsort1 runs 41% faster. Using gcc, quicksort is instead slightly faster.
>  */
> 
> 
> /*
>  * I'm using clang 10.0.1 on OpenBSD. these are my results:
> 
> 
> quicksort
> time = 0.004501771
> 
> 
> 
> quicksort1
> time = 0.002655233
> */
> 
> 
> #include <err.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> #include <sys/param.h>
> 
> size_t m;
> size_t g;
> size_t i;
> 
> 
> /* 
>  * quicksort sorts characters on a u_char array.
>  * 'a' is the first character on the array.
>  * 'b' is last character on the array
>  */
> static
> void quicksort(u_char a[], u_char b[])
> {
>       u_char temp;
>       
>       if (b - a <= 1)
>       {
>               if (a == b)
>                       return;
> 
>               if (*a > *b)
>               {
>                       temp = *a; *a = *b; *b = temp;
>               }
>               return;
>       }
>       u_char * j = a + 1;
>       u_char * k = b;
>       u_char u = *a;
> 
>       /* 
>        * This if() avoids repeated awkward special case branching 
>        * in the similar if() in the "while (++j < k)" loop
>        */
>       if (*j > u)
>       {
>               do
>               {
>                       if (*k < u)
>                       {
>                               temp = *j; *j = *k; *k = temp;
>                               goto skip0;
>                       }
>               } while (j < --k);
> 
>               quicksort(j, b);
>               return;
> 
>               skip0:
>               if (j == --k)
>               {
>                       *a = *j; *j = u;
> 
>                       quicksort(j+1, b);
>                       return;
>               }
>       }
> 
> 
>       while (++j < k)
>       {
>               if (*j > u)
>               {
>                       do
>                       {
>                               if (*k < u)
>                               {
>                                       temp = *j; *j = *k; *k = temp;
>                                       goto skip;
>                               }
>                       } while (j < --k);
> 
>                       quicksort(j, b);
> 
>                       *a = *--j; *j = u;
> 
>                       quicksort(a, j-1);
>                       return;
> 
>                       skip:
>                       if (j == --k)
>                       {
>                               *a = *j; *j = u;
> 
>                               quicksort(a, j-1);
>                               quicksort(j+1, b);
>                               return;
>                       }
>               }
>       }
> 
>       if (*j > u)
>       {
>               quicksort(j, b);
> 
>               *a = *--j; *j = u;
> 
>               quicksort(a, j-1);
>       }
>       else
>       {
>               *a = *j; *j = u;
> 
>               quicksort(a, j-1);
> 
>               if (j == b)
>                       return;
> 
>               quicksort(j+1, b);
>       }
> }
> 
> /* 
>  * quicksort1 is identical to quicksort() but it alters some global counters
>  * it should take slightly longer to run, but it runs in half the time
>  */
> static
> void quicksort1(u_char a[], u_char b[], size_t level)
> {
>       ++m;
>       if (level > i)
>               i = level;
>               
>       u_char temp;
>       
>       
>       if (b - a <= 1)
>       {
>               ++g;
>               if (a == b)
>                       return;
>               if (*a > *b)
>               {
>                       temp = *a; *a = *b; *b = temp;
>               }
>               return;
>       }
>       u_char * j = a + 1;
>       u_char * k = b;
>       u_char u = *a;
> 
>       /* 
>        * This if() avoids repeated awkward special case branching 
>        * in the similar if() in the "while (++j < k)" loop
>        */
>       if (*j > u)
>       {
>               do
>               {
>                       if (*k < u)
>                       {
>                               temp = *j; *j = *k; *k = temp;
>                               goto skip0;
>                       }
>               } while (j < --k);
> 
>               quicksort1(j, b, level + 1);
>               return;
> 
>               skip0:
>               if (j == --k)
>               {
>                       *a = *j; *j = u;
> 
>                       quicksort1(j+1, b, level + 1);
>                       return;
>               }
>       }
> 
> 
>       while (++j < k)
>       {
>               if (*j > u)
>               {
>                       do
>                       {
>                               if (*k < u)
>                               {
>                                       temp = *j; *j = *k; *k = temp;
>                                       goto skip;
>                               }
>                       } while (j < --k);
> 
>                       quicksort1(j, b, level + 1);
> 
>                       *a = *--j; *j = u;
> 
>                       quicksort1(a, j-1, level + 1);
>                       return;
> 
>                       skip:
>                       if (j == --k)
>                       {
>                               *a = *j; *j = u;
> 
>                               quicksort1(a, j-1, level + 1);
>                               quicksort1(j+1, b, level + 1);
>                               return;
>                       }
>               }
>       }
> 
>       if (*j > u)
>       {
>               quicksort1(j, b, level + 1);
> 
>               *a = *--j; *j = u;
> 
>               quicksort1(a, j-1, level + 1);
>       }
>       else
>       {
>               *a = *j; *j = u;
> 
>               quicksort1(a, j-1, level + 1);
> 
>               if (j == b)
>                       return;
> 
>               quicksort1(j+1, b, level + 1);
>       }
> }
> 
> 
> int main()
> {
> 
>       long double cpu_time_used;
>       struct timespec start, end;
>       u_char *buffer, *buffer2;
>       
>       size_t y;
>       size_t n = 30000;
>       
>       buffer = (u_char*)malloc(n);
>       if (buffer == NULL) err(1, "malloc");
>       buffer2 = (u_char*)malloc(n);
>       if (buffer2 == NULL) err(1, "malloc");
>       
>       
>       for (y = 0; y < n; ++y)
>               buffer[y] = rand() % 256;
> 
>       // run it once to clear any wrappers
>       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
> 
> 
>       memcpy(buffer2, buffer, n);
> 
>       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
>       quicksort(buffer2, buffer2 + n - 1);
>       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
> 
>       cpu_time_used =
>           (long double) (end.tv_sec - start.tv_sec) +
>           (long double) (end.tv_nsec - start.tv_nsec) /
>           (long double) 1000000000;
> 
>       printf("\n\nquicksort\n");
>       printf("time = %.9Lf\n\n", cpu_time_used);
> 
> 
> 
>       memcpy(buffer2, buffer, n);
> 
>       i = m = g = 0;
>       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
>       quicksort1(buffer2, buffer2 + n - 1, 1);
>       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
> 
>       cpu_time_used =
>           (long double) (end.tv_sec - start.tv_sec) +
>           (long double) (end.tv_nsec - start.tv_nsec) /
>           (long double) 1000000000;
> 
>       printf("\n\nquicksort1\n");
>       //~ printf("time = %.9Lf, max depth: %lu, calls: %lu, g: %lu\n\n\n", 
> cpu_time_used, i, m, g);
>       printf("time = %.9Lf\n", cpu_time_used);
> 
> 
>       return 0;
> }

Reply via email to