On Sep 4, 2014, at 2:38 PM, Andrew Morton <a...@linux-foundation.org> wrote:

>> From: Mark Rustad <mark.d.rus...@intel.com>
>> 
>> Resolve a shadow warning in W=2 builds arising from min/max macro
>> references in a parameter to a min3/max3 macro. There is no
>> functional issue - the warning is benign - but simply changing
>> some local variable names will eliminate it.
>> 
>> ...
>> 
>> --- a/include/linux/kernel.h
>> +++ b/include/linux/kernel.h
>> @@ -716,22 +716,22 @@ static inline void ftrace_dump(enum ftrace_dump_mode 
>> oops_dump_mode) { }
>>      _max1 > _max2 ? _max1 : _max2; })
>> 
>> #define min3(x, y, z) ({                     \
>> -    typeof(x) _min1 = (x);                  \
>> -    typeof(y) _min2 = (y);                  \
>> -    typeof(z) _min3 = (z);                  \
>> -    (void) (&_min1 == &_min2);              \
>> -    (void) (&_min1 == &_min3);              \
>> -    _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
>> -            (_min2 < _min3 ? _min2 : _min3); })
>> +    typeof(x) _min31 = (x);                 \
>> +    typeof(y) _min32 = (y);                 \
>> +    typeof(z) _min33 = (z);                 \
>> +    (void) (&_min31 == &_min32);            \
>> +    (void) (&_min31 == &_min33);            \
>> +    _min31 < _min32 ? (_min31 < _min33 ? _min31 : _min33) : \
>> +            (_min32 < _min33 ? _min32 : _min33); })
>> 
>> #define max3(x, y, z) ({                     \
>> -    typeof(x) _max1 = (x);                  \
>> -    typeof(y) _max2 = (y);                  \
>> -    typeof(z) _max3 = (z);                  \
>> -    (void) (&_max1 == &_max2);              \
>> -    (void) (&_max1 == &_max3);              \
>> -    _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
>> -            (_max2 > _max3 ? _max2 : _max3); })
>> +    typeof(x) _max31 = (x);                 \
>> +    typeof(y) _max32 = (y);                 \
>> +    typeof(z) _max33 = (z);                 \
>> +    (void) (&_max31 == &_max32);            \
>> +    (void) (&_max31 == &_max33);            \
>> +    _max31 > _max32 ? (_max31 > _max33 ? _max31 : _max33) : \
>> +            (_max32 > _max33 ? _max32 : _max33); })
>> 
>> /**
>>  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
> 
> I'm still sitting on the below patch.  It's stalled because I have a
> note that David potentially found issues with it.  But on rechecking,
> that appears to be stale, or not serious enough to prevent inclusion.
> 
> I can't (be bothered to) check whether this patch fixes the warnings
> because your changelog didn't tell me how to trigger the warnings (bad
> changelog).  But it might fix them!  Can you please test it?

Actually it does. W=2 builds get warnings when a min/max macro is used as a 
parameter to min3/max3. If you move to the min3/max3 that makes nested calls to 
min/max, every reference to min3/max3 will generate a warning in W=2 builds no 
matter what. It is interesting that the compiler optimizes that better - I 
hadn't looked at that.

Not wanting to argue for poorer code, lets forget about the warnings for now. 
These particular shadow warnings are pretty trivial. I'll consider revisiting 
it later. And next time I'll check the code generation.

> From: Michal Nazarewicz <min...@mina86.com>
> Subject: include/linux/kernel.h: rewrite min3, max3 and clamp using min and 
> max
> 
> It appears that gcc is better at optimising a double call to min and max
> rather than open coded min3 and max3.  This can be observed here:
> 
>    $ cat min-max.c
>    #define min(x, y) ({                               \
>       typeof(x) _min1 = (x);                  \
>       typeof(y) _min2 = (y);                  \
>       (void) (&_min1 == &_min2);              \
>       _min1 < _min2 ? _min1 : _min2; })
>    #define min3(x, y, z) ({                   \
>       typeof(x) _min1 = (x);                  \
>       typeof(y) _min2 = (y);                  \
>       typeof(z) _min3 = (z);                  \
>       (void) (&_min1 == &_min2);              \
>       (void) (&_min1 == &_min3);              \
>       _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
>               (_min2 < _min3 ? _min2 : _min3); })
> 
>    int fmin3(int x, int y, int z) { return min3(x, y, z); }
>    int fmin2(int x, int y, int z) { return min(min(x, y), z); }
> 
>    $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
>       .file   "min-max.c"
>       .text
>       .p2align 4,,15
>       .globl  fmin3
>       .type   fmin3, @function
>    fmin3:
>    .LFB0:
>       .cfi_startproc
>       cmpl    %esi, %edi
>       jl      .L5
>       cmpl    %esi, %edx
>       movl    %esi, %eax
>       cmovle  %edx, %eax
>       ret
>       .p2align 4,,10
>       .p2align 3
>    .L5:
>       cmpl    %edi, %edx
>       movl    %edi, %eax
>       cmovle  %edx, %eax
>       ret
>       .cfi_endproc
>    .LFE0:
>       .size   fmin3, .-fmin3
>       .p2align 4,,15
>       .globl  fmin2
>       .type   fmin2, @function
>    fmin2:
>    .LFB1:
>       .cfi_startproc
>       cmpl    %edi, %esi
>       movl    %edx, %eax
>       cmovle  %esi, %edi
>       cmpl    %edx, %edi
>       cmovle  %edi, %eax
>       ret
>       .cfi_endproc
>    .LFE1:
>       .size   fmin2, .-fmin2
>       .ident  "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
>       .section        .note.GNU-stack,"",@progbits
> 
> fmin3 function, which uses open-coded min3 macro, is compiled into total
> of ten instructions including a conditional branch, whereas fmin2
> function, which uses two calls to min2 macro, is compiled into six
> instructions with no branches.
> 
> Similarly, open-coded clamp produces the same code as clamp using min and
> max macros, but the latter is much shorter:
> 
>    $ cat clamp.c
>    #define clamp(val, min, max) ({                    \
>       typeof(val) __val = (val);              \
>       typeof(min) __min = (min);              \
>       typeof(max) __max = (max);              \
>       (void) (&__val == &__min);              \
>       (void) (&__val == &__max);              \
>       __val = __val < __min ? __min: __val;   \
>       __val > __max ? __max: __val; })
>    #define min(x, y) ({                               \
>       typeof(x) _min1 = (x);                  \
>       typeof(y) _min2 = (y);                  \
>       (void) (&_min1 == &_min2);              \
>       _min1 < _min2 ? _min1 : _min2; })
>    #define max(x, y) ({                               \
>       typeof(x) _max1 = (x);                  \
>       typeof(y) _max2 = (y);                  \
>       (void) (&_max1 == &_max2);              \
>       _max1 > _max2 ? _max1 : _max2; })
> 
>    int fclamp(int v, int min, int max) { return clamp(v, min, max); }
>    int fclampmm(int v, int min, int max) { return min(max(v, min), max); }
> 
>    $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
>       .file   "clamp.c"
>       .text
>       .p2align 4,,15
>       .globl  fclamp
>       .type   fclamp, @function
>    fclamp:
>    .LFB0:
>       .cfi_startproc
>       cmpl    %edi, %esi
>       movl    %edx, %eax
>       cmovge  %esi, %edi
>       cmpl    %edx, %edi
>       cmovle  %edi, %eax
>       ret
>       .cfi_endproc
>    .LFE0:
>       .size   fclamp, .-fclamp
>       .p2align 4,,15
>       .globl  fclampmm
>       .type   fclampmm, @function
>    fclampmm:
>    .LFB1:
>       .cfi_startproc
>       cmpl    %edi, %esi
>       cmovge  %esi, %edi
>       cmpl    %edi, %edx
>       movl    %edi, %eax
>       cmovle  %edx, %eax
>       ret
>       .cfi_endproc
>    .LFE1:
>       .size   fclampmm, .-fclampmm
>       .ident  "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
>       .section        .note.GNU-stack,"",@progbits
> 
>    Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 
> 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
>    gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
>    Copyright (C) 2011 Free Software Foundation, Inc.
>    This is free software; see the source for copying conditions.  There is NO
>    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> 
>    -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
>    -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after
> 
> 48 bytes reduction.  The do_fault_around was a few instruction shorter
> and as far as I can tell saved 12 bytes on the stack, i.e.:
> 
>    $ grep -e rsp -e pop -e push do_fault_around.*
>    do_fault_around.before.s:push   %rbp
>    do_fault_around.before.s:mov    %rsp,%rbp
>    do_fault_around.before.s:push   %r13
>    do_fault_around.before.s:push   %r12
>    do_fault_around.before.s:push   %rbx
>    do_fault_around.before.s:sub    $0x38,%rsp
>    do_fault_around.before.s:add    $0x38,%rsp
>    do_fault_around.before.s:pop    %rbx
>    do_fault_around.before.s:pop    %r12
>    do_fault_around.before.s:pop    %r13
>    do_fault_around.before.s:pop    %rbp
> 
>    do_fault_around.after.s:push   %rbp
>    do_fault_around.after.s:mov    %rsp,%rbp
>    do_fault_around.after.s:push   %r12
>    do_fault_around.after.s:push   %rbx
>    do_fault_around.after.s:sub    $0x30,%rsp
>    do_fault_around.after.s:add    $0x30,%rsp
>    do_fault_around.after.s:pop    %rbx
>    do_fault_around.after.s:pop    %r12
>    do_fault_around.after.s:pop    %rbp
> 
> or here side-by-side:
> 
>    Before                    After
>    push   %rbp               push   %rbp          
>    mov    %rsp,%rbp          mov    %rsp,%rbp     
>    push   %r13                                    
>    push   %r12               push   %r12          
>    push   %rbx               push   %rbx          
>    sub    $0x38,%rsp         sub    $0x30,%rsp    
>    add    $0x38,%rsp         add    $0x30,%rsp    
>    pop    %rbx               pop    %rbx          
>    pop    %r12               pop    %r12          
>    pop    %r13                                    
>    pop    %rbp               pop    %rbp          
> 
> There are also fewer branches:
> 
>    $ grep ^j do_fault_around.*
>    do_fault_around.before.s:jae    ffffffff812079b7
>    do_fault_around.before.s:jmp    ffffffff812079c5
>    do_fault_around.before.s:jmp    ffffffff81207a14
>    do_fault_around.before.s:ja     ffffffff812079f9
>    do_fault_around.before.s:jb     ffffffff81207a10
>    do_fault_around.before.s:jmp    ffffffff81207a63
>    do_fault_around.before.s:jne    ffffffff812079df
> 
>    do_fault_around.after.s:jmp    ffffffff812079fd
>    do_fault_around.after.s:ja     ffffffff812079e2
>    do_fault_around.after.s:jb     ffffffff812079f9
>    do_fault_around.after.s:jmp    ffffffff81207a4c
>    do_fault_around.after.s:jne    ffffffff812079c8
> 
> And here's with allyesconfig on a different machine:
> 
>    $ uname -a; gcc --version; ls -l vmlinux.*
>    Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD 
> Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
>    gcc (GCC) 4.8.3
>    Copyright (C) 2013 Free Software Foundation, Inc.
>    This is free software; see the source for copying conditions.  There is NO
>    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> 
>    -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
>    -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after
> 
> 530 bytes reduction.
> 
> Signed-off-by: Michal Nazarewicz <min...@mina86.com>
> Signed-off-by: Hagen Paul Pfeifer <ha...@jauu.net>
> Acked-by: Steven Rostedt <rost...@goodmis.org>
> Cc: Hagen Paul Pfeifer <ha...@jauu.net>
> Cc: David Rientjes <rient...@google.com>
> Signed-off-by: Andrew Morton <a...@linux-foundation.org>
> ---
> 
> include/linux/kernel.h |   32 +++++---------------------------
> 1 file changed, 5 insertions(+), 27 deletions(-)
> 
> diff -puN 
> include/linux/kernel.h~include-kernelh-rewrite-min3-max3-and-clamp-using-min-and-max
>  include/linux/kernel.h
> --- 
> a/include/linux/kernel.h~include-kernelh-rewrite-min3-max3-and-clamp-using-min-and-max
> +++ a/include/linux/kernel.h
> @@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftra
>       (void) (&_max1 == &_max2);              \
>       _max1 > _max2 ? _max1 : _max2; })
> 
> -#define min3(x, y, z) ({                     \
> -     typeof(x) _min1 = (x);                  \
> -     typeof(y) _min2 = (y);                  \
> -     typeof(z) _min3 = (z);                  \
> -     (void) (&_min1 == &_min2);              \
> -     (void) (&_min1 == &_min3);              \
> -     _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
> -             (_min2 < _min3 ? _min2 : _min3); })
> -
> -#define max3(x, y, z) ({                     \
> -     typeof(x) _max1 = (x);                  \
> -     typeof(y) _max2 = (y);                  \
> -     typeof(z) _max3 = (z);                  \
> -     (void) (&_max1 == &_max2);              \
> -     (void) (&_max1 == &_max3);              \
> -     _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
> -             (_max2 > _max3 ? _max2 : _max3); })
> +#define min3(x, y, z) min((typeof(x))min(x, y), z)
> +#define max3(x, y, z) max((typeof(x))max(x, y), z)
> 
> /**
>  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
> @@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftra
> /**
>  * clamp - return a value clamped to a given range with strict typechecking
>  * @val: current value
> - * @min: minimum allowable value
> - * @max: maximum allowable value
> + * @lo: lowest allowable value
> + * @hi: highest allowable value
>  *
>  * This macro does strict typechecking of min/max to make sure they are of the
>  * same type as val.  See the unnecessary pointer comparisons.
>  */
> -#define clamp(val, min, max) ({                      \
> -     typeof(val) __val = (val);              \
> -     typeof(min) __min = (min);              \
> -     typeof(max) __max = (max);              \
> -     (void) (&__val == &__min);              \
> -     (void) (&__val == &__max);              \
> -     __val = __val < __min ? __min: __val;   \
> -     __val > __max ? __max: __val; })
> +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
> 
> /*
>  * ..and if you can't take the strict
> _
> 

-- 
Mark Rustad, Networking Division, Intel Corporation

Attachment: signature.asc
Description: Message signed with OpenPGP using GPGMail

Reply via email to