Whoopsie:

- size_t v = rand() & ~UMMAX;
+ size_t v = rand() & (UMMAX - 1);


dlibenzi@dlibenzi:/tmp$ time ./test_div
c = 100396000

real 0m3.372s
user 0m3.368s
sys 0m0.000s

dlibenzi@dlibenzi:/tmp$ time ./test_obsd
c = 100396000

real 0m3.798s
user 0m3.794s
sys 0m0.000s

dlibenzi@dlibenzi:/tmp$ time ./test_int128
c = 100396000

real 0m0.551s
user 0m0.550s
sys 0m0.000s



On Tue, Dec 1, 2015 at 3:39 PM, Davide Libenzi <[email protected]> wrote:

> Let's make it really random now:
>
> [main_test.c]
> #include <stddef.h>
> #include <stdio.h>
> #include <stdlib.h>
>
> #define UMMAX (1UL << (sizeof(size_t) * 4))
>
> extern int testovf(size_t a, size_t b);
> extern size_t base(void);
>
> size_t num_values = 100000;
>
> size_t qrand(void)
> {
>     size_t v = rand() & ~UMMAX;
>
>     if (rand() & 16)
>         v += UMMAX;
>
>     return v;
> }
>
> int main(void)
> {
>     size_t i, j, c;
>     size_t *vals = calloc(num_values, sizeof(size_t));
>
>     for (c = 0; c < num_values; c++) {
>         do {
>             vals[c] = qrand();
>         } while (!vals[c]);
>     }
>     c = 0;
>     for (i = 0; i < 4000; i++) {
>         for (j = base(); j < num_values; j++) {
>             c += testovf(vals[j - 1], vals[j]);
>         }
>     }
>     printf("c = %zd\n", c);
>
>     return 0;
> }
>
>
> [test_null.c]
> #include <stddef.h>
> #include <stdio.h>
> #include <stdlib.h>
>
> int testovf(size_t a, size_t b)
> {
>     return 0;
> }
>
> size_t base(void)
> {
>     return 0;
> }
>
>
>
> dlibenzi@dlibenzi:/tmp$ time ./test_div
> c = 100396000
>
> real 0m3.325s
> user 0m3.321s
> sys 0m0.000s
>
> dlibenzi@dlibenzi:/tmp$ time ./test_obsd
> c = 100396000
>
> real 0m3.822s
> user 0m3.818s
> sys 0m0.005s
>
> dlibenzi@dlibenzi:/tmp$ time ./test_int128
> c = 100396000
>
> real 0m0.560s
> user 0m0.559s
> sys 0m0.000s
>
> dlibenzi@dlibenzi:/tmp$ time ./test_null
> c = 0
>
> real 0m0.474s
> user 0m0.469s
> sys 0m0.004s
>
>
>
> On Tue, Dec 1, 2015 at 3:28 PM, Dan Cross <[email protected]> wrote:
>
>> On Tue, Dec 1, 2015 at 5:40 PM, 'Davide Libenzi' via Akaros <
>> [email protected]> wrote:
>>
>>> Now try with something that emulates a bit more reality (cold branch),
>>> in making that front branch be random, instead of scorching hot towards a
>>> single hint.
>>>
>>
>> Except the numbers you generated are not random; they're greater than
>> OpenBSD's MUL_NO_OVERFLOW almost 90% of the time on my machine. Thus, you
>> are overflowing most of the time: this is certainly not the expected case
>> for which one is trying to optimize. If I replace your 'qrand()' with a
>> call to 'arc4random()' I get results far closer to what I originally posted
>> and again within a factor of two of the __int128 solution (I'm glad you
>> made that unsigned; mine of course had the bug that SIZE_MAX * SIZE_MAX
>> could cause signed overflow!!).
>>
>> But of course, using arc4random() will never overflow since it returns
>> uint32_t.... If I change your 'qrand()' to: 'return (size_t)arc4random() +
>> (arc4random() >> 2);' then I'm forcing overflows about 3% of the time
>> (which would still be an absurdly high rate) and the optimization still
>> looks pretty good:
>>
>> : tempest; time ./test_div
>> c = 13876000
>>
>> real 0m3.306s
>> user 0m3.301s
>> sys 0m0.003s
>> : tempest; time ./test_obsd
>> c = 13448000
>>
>> real 0m1.966s
>> user 0m1.961s
>> sys 0m0.003s
>> : tempest;
>>
>> This is interesting; I hope you're enjoying this discussion as much as I
>> am. :-)  I don't mean to come across as off-putting or dismissive, and I
>> hope I'm not doing that.
>>
>>         - Dan C.
>>
>> [main_test.c]
>>> #include <stddef.h>
>>> #include <stdio.h>
>>> #include <stdlib.h>
>>>
>>> extern int testovf(size_t a, size_t b);
>>> extern size_t base(void);
>>>
>>> size_t num_values = 100000;
>>>
>>> size_t qrand(void)
>>> {
>>>     return ((size_t) rand() << 4) ^ rand();
>>> }
>>>
>>> int main(void)
>>> {
>>>     size_t i, j, c;
>>>     size_t *vals = calloc(num_values, sizeof(size_t));
>>>
>>>     for (c = 0; c < num_values; c++) {
>>>         do {
>>>             vals[c] = qrand();
>>>         } while (!vals[c]);
>>>     }
>>>     c = 0;
>>>     for (i = 0; i < 4000; i++) {
>>>         for (j = base(); j < num_values; j++) {
>>>             c += testovf(vals[j - 1], vals[j]);
>>>         }
>>>     }
>>>     printf("c = %zd\n", c);
>>>
>>>     return 0;
>>> }
>>>
>>> [test_div.c]
>>> #include <stddef.h>
>>> #include <stdio.h>
>>> #include <stdlib.h>
>>>
>>> #define SIZE_MAX (~(size_t) 0)
>>>
>>> int testovf(size_t a, size_t b)
>>> {
>>>     if (a > 0 && SIZE_MAX / a < b)
>>>         return 0;
>>>
>>>     return 1;
>>> }
>>>
>>> size_t base(void)
>>> {
>>>     return 0;
>>> }
>>>
>>> real 0m3.438s
>>> user 0m3.430s
>>> sys 0m0.004s
>>>
>>>
>>> [test_obsd.c]
>>> #include <stddef.h>
>>> #include <stdio.h>
>>> #include <stdlib.h>
>>>
>>> #define SIZE_MAX (~(size_t) 0)
>>> #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4))
>>>
>>> int testovf(size_t a, size_t b)
>>> {
>>>     if ((a >= MUL_NO_OVERFLOW || b >= MUL_NO_OVERFLOW) &&
>>>         a > 0 && SIZE_MAX / a < b) {
>>>         return 1;
>>>     }
>>>
>>>     return 0;
>>> }
>>>
>>> size_t base(void)
>>> {
>>>     return 0;
>>> }
>>>
>>> real 0m3.875s
>>> user 0m3.871s
>>> sys 0m0.000s
>>>
>>>
>>> [test_int128.c]
>>> #include <stddef.h>
>>> #include <stdio.h>
>>> #include <stdlib.h>
>>>
>>> #define SIZE_MAX (~(size_t) 0)
>>>
>>> int testovf(size_t a, size_t b)
>>> {
>>>     unsigned __int128 p = (unsigned __int128) a * (unsigned __int128) b;
>>>
>>>     if (__builtin_expect(p > SIZE_MAX, 0))
>>>         return 1;
>>>
>>>     return 0;
>>> }
>>>
>>> size_t base(void)
>>> {
>>>     return 0;
>>> }
>>>
>>> real 0m0.555s
>>> user 0m0.550s
>>> sys 0m0.004s
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> On Tue, Dec 1, 2015 at 11:18 AM, Dan Cross <[email protected]> wrote:
>>>
>>>> On Tue, Dec 1, 2015 at 11:54 AM, 'Davide Libenzi' via Akaros <
>>>> [email protected]> wrote:
>>>>
>>>>> Eh? I didn't *defend* it, I just explained it. That code was a direct
>>>>>> import from OpenBSD.... I didn't write it, and given what it's doing, I 
>>>>>> saw
>>>>>> no reason to change it. :-)
>>>>>>
>>>>>
>>>>> You are arguing for simplicity, and yet you want to leave complex and
>>>>> useless optimizations in place? ☺
>>>>>
>>>>
>>>> Oh, and also: https://www.youtube.com/watch?v=nXaxk27zwlk <-- Well
>>>> worth the watch.
>>>>
>>>> --
>>>> You received this message because you are subscribed to the Google
>>>> Groups "Akaros" group.
>>>> To unsubscribe from this group and stop receiving emails from it, send
>>>> an email to [email protected].
>>>> To post to this group, send email to [email protected].
>>>> For more options, visit https://groups.google.com/d/optout.
>>>>
>>>
>>> --
>>> You received this message because you are subscribed to the Google
>>> Groups "Akaros" group.
>>> To unsubscribe from this group and stop receiving emails from it, send
>>> an email to [email protected].
>>> To post to this group, send email to [email protected].
>>> For more options, visit https://groups.google.com/d/optout.
>>>
>>
>> --
>> You received this message because you are subscribed to the Google Groups
>> "Akaros" group.
>> To unsubscribe from this group and stop receiving emails from it, send an
>> email to [email protected].
>> To post to this group, send email to [email protected].
>> For more options, visit https://groups.google.com/d/optout.
>>
>
>

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to