On 3/2/16, Peter Zijlstra <[email protected]> wrote:
> On Wed, Mar 02, 2016 at 04:53:36PM +0100, Sedat Dilek wrote:
>> ffffffff8110f570 <del_timer_sync>:
>> ffffffff8110f570:    55                      push   %rbp
>> ffffffff8110f571:    48 89 e5                mov    %rsp,%rbp
>> ffffffff8110f574:    41 57                   push   %r15
>> ffffffff8110f576:    41 56                   push   %r14
>> ffffffff8110f578:    53                      push   %rbx
>> ffffffff8110f579:    48 83 ec 28             sub    $0x28,%rsp
>
> stack offset is 0x28 bytes [*]
>
>> ffffffff8110f57d:    48 89 fb                mov    %rdi,%rbx
>> ffffffff8110f580:    e8 6b 6e 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f585:    e8 66 6e 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f58a:    e8 61 6e 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f58f:    e8 5c 6e 80 00          callq  ffffffff819163f0 <mcount>
>
> Your compiler is on drugs!
>
>> ffffffff8110f594:    9c                      pushfq
>> ffffffff8110f595:    8f 45 e0                popq   -0x20(%rbp)
>
> Saves flags in -0x20(%rbp)
>
>> ffffffff8110f598:    4c 8b 7d e0             mov    -0x20(%rbp),%r15
>
> And in %r15
>
> /me wonders what's wrong with: popf %r15
>
>> ffffffff8110f59c:    e8 4f 6e 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f5a1:    e8 4a 6e 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f5a6:    fa                      cli
>> ffffffff8110f5a7:    e8 84 cb fc ff          callq  ffffffff810dc130
>> <trace_hardirqs_off>
>> ffffffff8110f5ac:    4c 8d 73 50             lea    0x50(%rbx),%r14
>> ffffffff8110f5b0:    48 c7 04 24 b0 f5 10    movq   
>> $0xffffffff8110f5b0,(%rsp)
>> ffffffff8110f5b7:    81
>> ffffffff8110f5b8:    31 f6                   xor    %esi,%esi
>> ffffffff8110f5ba:    31 d2                   xor    %edx,%edx
>> ffffffff8110f5bc:    31 c9                   xor    %ecx,%ecx
>> ffffffff8110f5be:    41 b8 01 00 00 00       mov    $0x1,%r8d
>> ffffffff8110f5c4:    45 31 c9                xor    %r9d,%r9d
>> ffffffff8110f5c7:    4c 89 f7                mov    %r14,%rdi
>> ffffffff8110f5ca:    e8 c1 e5 fc ff          callq  ffffffff810ddb90
>> <lock_acquire>
>> ffffffff8110f5cf:    be 01 00 00 00          mov    $0x1,%esi
>> ffffffff8110f5d4:    48 c7 c2 cf f5 10 81    mov    $0xffffffff8110f5cf,%rdx
>> ffffffff8110f5db:    4c 89 f7                mov    %r14,%rdi
>> ffffffff8110f5de:    e8 8d 08 fd ff          callq  ffffffff810dfe70
>> <lock_release>
>> ffffffff8110f5e3:    e8 08 6e 80 00          callq  ffffffff819163f0 <mcount>
>
>> ffffffff8110f5e8:    4c 89 f8                mov    %r15,%rax
>> ffffffff8110f5eb:    49 89 c6                mov    %rax,%r14
>
> Moves r15 into r14 through rax
>
>
>> ffffffff8110f5ee:    f6 c4 02                test   $0x2,%ah
>> ffffffff8110f5f1:    75 19                   jne    ffffffff8110f60c
>> <del_timer_sync+0x9c>
>> ffffffff8110f5f3:    e8 f8 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f5f8:    e8 f3 6d 80 00          callq  ffffffff819163f0 <mcount>
>
>
>> ffffffff8110f5fd:    4c 89 75 d0             mov    %r14,-0x30(%rbp)
>> ffffffff8110f601:    ff 75 d0                pushq  -0x30(%rbp)
>> ffffffff8110f604:    9d                      popfq
>
> put r14 into -0x30(rbp) and pushes/pops that, see [*] this is 8 bytes
> over stack ?!
>
>> ffffffff8110f605:    e8 26 cb fc ff          callq  ffffffff810dc130
>> <trace_hardirqs_off>
>> ffffffff8110f60a:    eb 17                   jmp    ffffffff8110f623
>> <del_timer_sync+0xb3>
>> ffffffff8110f60c:    e8 2f cb fc ff          callq  ffffffff810dc140
>> <trace_hardirqs_on>
>> ffffffff8110f611:    e8 da 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f616:    e8 d5 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f61b:    4c 89 75 d8             mov    %r14,-0x28(%rbp)
>> ffffffff8110f61f:    ff 75 d8                pushq  -0x28(%rbp)
>> ffffffff8110f622:    9d                      popfq
>
> puts r14 into -0x28(rbp) and pushes/pops that
>
>> ffffffff8110f623:    e8 c8 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f628:    65 8b 04 25 d4 ae 00    mov    %gs:0xaed4,%eax
>> ffffffff8110f62f:    00
>> ffffffff8110f630:    a9 00 00 0f 00          test   $0xf0000,%eax
>> ffffffff8110f635:    74 25                   je     ffffffff8110f65c
>> <del_timer_sync+0xec>
>> ffffffff8110f637:    f6 43 2a 20             testb  $0x20,0x2a(%rbx)
>> ffffffff8110f63b:    75 1f                   jne    ffffffff8110f65c
>> <del_timer_sync+0xec>
>> ffffffff8110f63d:    48 c7 c7 04 54 c5 81    mov    $0xffffffff81c55404,%rdi
>> ffffffff8110f644:    be 61 04 00 00          mov    $0x461,%esi
>> ffffffff8110f649:    e8 12 c4 f6 ff          callq  ffffffff8107ba60
>> <warn_slowpath_null>
>> ffffffff8110f64e:    eb 0c                   jmp    ffffffff8110f65c
>> <del_timer_sync+0xec>
>> ffffffff8110f650:    e8 9b 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f655:    e8 96 6d 80 00          callq  ffffffff819163f0 <mcount>
>> ffffffff8110f65a:    f3 90                   pause
>> ffffffff8110f65c:    48 89 df                mov    %rbx,%rdi
>> ffffffff8110f65f:    e8 4c fe ff ff          callq  ffffffff8110f4b0
>> <try_to_del_timer_sync>
>> ffffffff8110f664:    85 c0                   test   %eax,%eax
>> ffffffff8110f666:    78 e8                   js     ffffffff8110f650
>> <del_timer_sync+0xe0>
>> ffffffff8110f668:    48 83 c4 28             add    $0x28,%rsp
>> ffffffff8110f66c:    5b                      pop    %rbx
>> ffffffff8110f66d:    41 5e                   pop    %r14
>> ffffffff8110f66f:    41 5f                   pop    %r15
>> ffffffff8110f671:    5d                      pop    %rbp
>> ffffffff8110f672:    c3                      retq
>> ffffffff8110f673:    66 66 66 66 2e 0f 1f    data32 data32 data32 nopw
>> %cs:0x0(%rax,%rax,1)
>> ffffffff8110f67a:    84 00 00 00 00 00
>
>
> That LLVM generate disgusting code, as a contrast, this is what my GCC
> makes of this:
>
> ffffffff81155f50 <del_timer_sync>:
> ffffffff81155f50:       e8 bb 02 9c 00          callq  ffffffff81b16210
> <__fentry__>
> ffffffff81155f55:       55                      push   %rbp
> ffffffff81155f56:       48 89 e5                mov    %rsp,%rbp
> ffffffff81155f59:       41 55                   push   %r13
> ffffffff81155f5b:       41 54                   push   %r12
> ffffffff81155f5d:       53                      push   %rbx
> ffffffff81155f5e:       48 89 fb                mov    %rdi,%rbx
> ffffffff81155f61:       48 83 ec 08             sub    $0x8,%rsp
> ffffffff81155f65:       9c                      pushfq
> ffffffff81155f66:       41 5c                   pop    %r12
> ffffffff81155f68:       fa                      cli
> ffffffff81155f69:       e8 72 5c fd ff          callq  ffffffff8112bbe0
> <trace_hardirqs_off>
> ffffffff81155f6e:       4c 8d 6b 50             lea    0x50(%rbx),%r13
> ffffffff81155f72:       45 31 c9                xor    %r9d,%r9d
> ffffffff81155f75:       31 c9                   xor    %ecx,%ecx
> ffffffff81155f77:       31 d2                   xor    %edx,%edx
> ffffffff81155f79:       31 f6                   xor    %esi,%esi
> ffffffff81155f7b:       4c 89 ef                mov    %r13,%rdi
> ffffffff81155f7e:       48 c7 04 24 55 5f 15    movq
> $0xffffffff81155f55,(%rsp)
> ffffffff81155f85:       81
> ffffffff81155f86:       41 b8 01 00 00 00       mov    $0x1,%r8d
> ffffffff81155f8c:       e8 3f b3 fd ff          callq  ffffffff811312d0
> <lock_acquire>
> ffffffff81155f91:       48 c7 c2 55 5f 15 81    mov
> $0xffffffff81155f55,%rdx
> ffffffff81155f98:       be 01 00 00 00          mov    $0x1,%esi
> ffffffff81155f9d:       4c 89 ef                mov    %r13,%rdi
> ffffffff81155fa0:       e8 0b b5 fd ff          callq  ffffffff811314b0
> <lock_release>
> ffffffff81155fa5:       41 f7 c4 00 02 00 00    test   $0x200,%r12d
> ffffffff81155fac:       75 52                   jne    ffffffff81156000
> <del_timer_sync+0xb0>
> ffffffff81155fae:       41 54                   push   %r12
> ffffffff81155fb0:       9d                      popfq
> ffffffff81155fb1:       e8 2a 5c fd ff          callq  ffffffff8112bbe0
> <trace_hardirqs_off>
> ffffffff81155fb6:       65 8b 05 5b 66 eb 7e    mov
> %gs:0x7eeb665b(%rip),%eax        # c618 <__preempt_count>
> ffffffff81155fbd:       a9 00 00 0f 00          test   $0xf0000,%eax
> ffffffff81155fc2:       74 25                   je     ffffffff81155fe9
> <del_timer_sync+0x99>
> ffffffff81155fc4:       f6 43 2a 20             testb  $0x20,0x2a(%rbx)
> ffffffff81155fc8:       75 1f                   jne    ffffffff81155fe9
> <del_timer_sync+0x99>
> ffffffff81155fca:       be 61 04 00 00          mov    $0x461,%esi
> ffffffff81155fcf:       48 c7 c7 09 87 f3 81    mov
> $0xffffffff81f38709,%rdi
> ffffffff81155fd6:       e8 15 03 f8 ff          callq  ffffffff810d62f0
> <warn_slowpath_null>
> ffffffff81155fdb:       48 89 df                mov    %rbx,%rdi
> ffffffff81155fde:       e8 fd fe ff ff          callq  ffffffff81155ee0
> <try_to_del_timer_sync>
> ffffffff81155fe3:       85 c0                   test   %eax,%eax
> ffffffff81155fe5:       79 0e                   jns    ffffffff81155ff5
> <del_timer_sync+0xa5>
> ffffffff81155fe7:       f3 90                   pause
> ffffffff81155fe9:       48 89 df                mov    %rbx,%rdi
> ffffffff81155fec:       e8 ef fe ff ff          callq  ffffffff81155ee0
> <try_to_del_timer_sync>
> ffffffff81155ff1:       85 c0                   test   %eax,%eax
> ffffffff81155ff3:       78 f2                   js     ffffffff81155fe7
> <del_timer_sync+0x97>
> ffffffff81155ff5:       48 83 c4 08             add    $0x8,%rsp
> ffffffff81155ff9:       5b                      pop    %rbx
> ffffffff81155ffa:       41 5c                   pop    %r12
> ffffffff81155ffc:       41 5d                   pop    %r13
> ffffffff81155ffe:       5d                      pop    %rbp
> ffffffff81155fff:       c3                      retq
> ffffffff81156000:       e8 4b 8b fd ff          callq  ffffffff8112eb50
> <trace_hardirqs_on>
> ffffffff81156005:       41 54                   push   %r12
> ffffffff81156007:       9d                      popfq
> ffffffff81156008:       eb ac                   jmp    ffffffff81155fb6
> <del_timer_sync+0x66>
> ffffffff8115600a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
>

OK, I cannot say much to your analysis.

The full make-lines look differently.

GCC for example has '-mfentry -DCC_USING_FENTRY'.

[ CLANG ]

mycompiler -Wp,-MD,kernel/.workqueue.o.d  -nostdinc -isystem
/opt/llvm-toolchain-3.8.0rc3/bin/../lib/clang/3.8.0/include -nostdinc
-isystem /opt/llvm-toolchain-3.8.0rc3/bin/../lib/clang/3.8.0/include
-I./arch/x86/include -Iarch/x86/include/generated/uapi
-Iarch/x86/include/generated  -Iinclude -I./arch/x86/include/uapi
-Iarch/x86/include/generated/uapi -I./include/uapi
-Iinclude/generated/uapi -include ./include/linux/kconfig.h
-D__KERNEL__ -Qunused-arguments -Wno-unknown-warning-option -Wall
-Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing
-fno-common -Werror-implicit-function-declaration -Wno-format-security
-std=gnu89 -no-integrated-as -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
-mno-avx -m64 -mtune=generic -mno-red-zone -mcmodel=kernel
-funit-at-a-time -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1
-DCONFIG_AS_CFI_SIGNAL_FRAME=1 -DCONFIG_AS_CFI_SECTIONS=1
-DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_CRC32=1
-DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe -Wno-sign-compare
-fno-asynchronous-unwind-tables -O2 -Wframe-larger-than=1024
-fno-stack-protector -Wno-unused-variable
-Wno-format-invalid-specifier -Wno-gnu -Wno-asm-operand-widths
-Wno-initializer-overrides -fno-builtin -Wno-tautological-compare
-mno-global-merge -fno-omit-frame-pointer -fno-optimize-sibling-calls
-pg -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -Werror=implicit-int -Werror=strict-prototypes
-Werror=date-time -Wno-initializer-overrides -Wno-unused-value
-Wno-format -Wno-unknown-warning-option -Wno-sign-compare
-Wno-format-zero-length -Wno-uninitialized    -D"KBUILD_STR(s)=#s"
-D"KBUILD_BASENAME=KBUILD_STR(workqueue)"
-D"KBUILD_MODNAME=KBUILD_STR(workqueue)" -c -o kernel/.tmp_workqueue.o
kernel/workqueue.c

mycompiler -Wp,-MD,kernel/time/.timer.o.d  -nostdinc -isystem
/opt/llvm-toolchain-3.8.0rc3/bin/../lib/clang/3.8.0/include -nostdinc
-isystem /opt/llvm-toolchain-3.8.0rc3/bin/../lib/clang/3.8.0/include
-I./arch/x86/include -Iarch/x86/include/generated/uapi
-Iarch/x86/include/generated  -Iinclude -I./arch/x86/include/uapi
-Iarch/x86/include/generated/uapi -I./include/uapi
-Iinclude/generated/uapi -include ./include/linux/kconfig.h
-D__KERNEL__ -Qunused-arguments -Wno-unknown-warning-option -Wall
-Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing
-fno-common -Werror-implicit-function-declaration -Wno-format-security
-std=gnu89 -no-integrated-as -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
-mno-avx -m64 -mtune=generic -mno-red-zone -mcmodel=kernel
-funit-at-a-time -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1
-DCONFIG_AS_CFI_SIGNAL_FRAME=1 -DCONFIG_AS_CFI_SECTIONS=1
-DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_CRC32=1
-DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe -Wno-sign-compare
-fno-asynchronous-unwind-tables -O2 -Wframe-larger-than=1024
-fno-stack-protector -Wno-unused-variable
-Wno-format-invalid-specifier -Wno-gnu -Wno-asm-operand-widths
-Wno-initializer-overrides -fno-builtin -Wno-tautological-compare
-mno-global-merge -fno-omit-frame-pointer -fno-optimize-sibling-calls
-pg -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -Werror=implicit-int -Werror=strict-prototypes
-Werror=date-time -Wno-initializer-overrides -Wno-unused-value
-Wno-format -Wno-unknown-warning-option -Wno-sign-compare
-Wno-format-zero-length -Wno-uninitialized    -D"KBUILD_STR(s)=#s"
-D"KBUILD_BASENAME=KBUILD_STR(timer)"
-D"KBUILD_MODNAME=KBUILD_STR(timer)" -c -o kernel/time/.tmp_timer.o
kernel/time/timer.c

[ GCC ]

mycompiler -Wp,-MD,kernel/.workqueue.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -pg -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(workqueue)"
-D"KBUILD_MODNAME=KBUILD_STR(workqueue)" -c -o kernel/.tmp_workqueue.o
kernel/workqueue.c

mycompiler -Wp,-MD,kernel/time/.timer.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -pg -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(timer)"
-D"KBUILD_MODNAME=KBUILD_STR(timer)" -c -o kernel/time/.tmp_timer.o
kernel/time/timer.c

I can try to use the make-line of GCC and re-compile.

- Sedat -

Reply via email to