#4308: LLVM compiles Updates.cmm badly
---------------------------------+------------------------------------------
    Reporter:  dterei            |       Owner:  dterei                 
        Type:  bug               |      Status:  new                    
    Priority:  normal            |   Component:  Compiler (LLVM)        
     Version:  6.13              |    Keywords:                         
    Testcase:                    |   Blockedby:                         
          Os:  Unknown/Multiple  |    Blocking:                         
Architecture:  x86_64 (amd64)    |     Failure:  Runtime performance bug
---------------------------------+------------------------------------------
Description changed by dterei:

Old description:

> Simon M. reported that compiled rts/Updates.cmm on x86-64 with the LLVM
> backend produced some pretty bad code. The ncg produces this:
>
> stg_upd_frame_info:
> .Lco:
>        movq 8(%rbp),%rax
>        addq $16,%rbp
>        movq %rbx,8(%rax)
>        movq $stg_BLACKHOLE_info,0(%rax)
>        movq %rax,%rcx
>        andq $-1048576,%rcx
>        movq %rax,%rdx
>        andq $1044480,%rdx
>        shrq $6,%rdx
>        orq %rcx,%rdx
>        cmpw $0,52(%rdx)
>        jne .Lcf
>        jmp *0(%rbp)
> .Lcf:
>        [...]
>
> The LLVM backend produces this though:
>
> stg_upd_frame_info:                     # @stg_upd_frame_info
> # BB#0:                                 # %co
>        subq    $104, %rsp
>        movq    8(%rbp), %rax
>        movq    %rax, 24(%rsp)          # 8-byte Spill
>        movq    %rbx, 8(%rax)
>        mfence
>        movq    $stg_BLACKHOLE_info, (%rax)
>        movq    %rax, %rcx
>        andq    $-1048576, %rcx         # imm = 0xFFFFFFFFFFF00000
>        andq    $1044480, %rax          # imm = 0xFF000
>        shrq    $6, %rax
>        addq    %rcx, %rax
>        addq    $16, %rbp
>        cmpw    $0, 52(%rax)
>        movsd   %xmm6, 88(%rsp)         # 8-byte Spill
>        movsd   %xmm5, 80(%rsp)         # 8-byte Spill
>        movss   %xmm4, 76(%rsp)         # 4-byte Spill
>        movss   %xmm3, 72(%rsp)         # 4-byte Spill
>        movss   %xmm2, 68(%rsp)         # 4-byte Spill
>        movss   %xmm1, 64(%rsp)         # 4-byte Spill
>        movq    %r9, 56(%rsp)           # 8-byte Spill
>        movq    %r8, 48(%rsp)           # 8-byte Spill
>        movq    %rdi, 40(%rsp)          # 8-byte Spill
>        movq    %rsi, 32(%rsp)          # 8-byte Spill
>        je      .LBB1_4
>
> This has two main problems:
>
>   1. mfence instruction (write barrier) isn't required. (write-write
> barriers aren't required on x86)
>   2. LLVM backend is spilling a lot of stuff unnecessarily.
>
> Both these I think are fairly easy fixes. LLVM is handling write barriers
> quite naively at the moment so 1. is easy. The spilling problem I think
> is related to a previous fix I made where I need to explicitly kill some
> of the stg registers if they aren't live across the call, otherwise LLVM
> rightly thinks they are live since I always pass the stg registers around
> (so live on entry and exit of every function unless I kill them).

New description:

 Simon M. reported that compiled rts/Updates.cmm on x86-64 with the LLVM
 backend produced some pretty bad code. The ncg produces this:

 {{{
 stg_upd_frame_info:
 .Lco:
        movq 8(%rbp),%rax
        addq $16,%rbp
        movq %rbx,8(%rax)
        movq $stg_BLACKHOLE_info,0(%rax)
        movq %rax,%rcx
        andq $-1048576,%rcx
        movq %rax,%rdx
        andq $1044480,%rdx
        shrq $6,%rdx
        orq %rcx,%rdx
        cmpw $0,52(%rdx)
        jne .Lcf
        jmp *0(%rbp)
 .Lcf:
        [...]
 }}}

 The LLVM backend produces this though:

 {{{
 stg_upd_frame_info:                     # @stg_upd_frame_info
 # BB#0:                                 # %co
        subq    $104, %rsp
        movq    8(%rbp), %rax
        movq    %rax, 24(%rsp)          # 8-byte Spill
        movq    %rbx, 8(%rax)
        mfence
        movq    $stg_BLACKHOLE_info, (%rax)
        movq    %rax, %rcx
        andq    $-1048576, %rcx         # imm = 0xFFFFFFFFFFF00000
        andq    $1044480, %rax          # imm = 0xFF000
        shrq    $6, %rax
        addq    %rcx, %rax
        addq    $16, %rbp
        cmpw    $0, 52(%rax)
        movsd   %xmm6, 88(%rsp)         # 8-byte Spill
        movsd   %xmm5, 80(%rsp)         # 8-byte Spill
        movss   %xmm4, 76(%rsp)         # 4-byte Spill
        movss   %xmm3, 72(%rsp)         # 4-byte Spill
        movss   %xmm2, 68(%rsp)         # 4-byte Spill
        movss   %xmm1, 64(%rsp)         # 4-byte Spill
        movq    %r9, 56(%rsp)           # 8-byte Spill
        movq    %r8, 48(%rsp)           # 8-byte Spill
        movq    %rdi, 40(%rsp)          # 8-byte Spill
        movq    %rsi, 32(%rsp)          # 8-byte Spill
        je      .LBB1_4
 }}}

 This has two main problems:

   1. mfence instruction (write barrier) isn't required. (write-write
 barriers aren't required on x86)
   2. LLVM backend is spilling a lot of stuff unnecessarily.

 Both these I think are fairly easy fixes. LLVM is handling write barriers
 quite naively at the moment so 1. is easy. The spilling problem I think is
 related to a previous fix I made where I need to explicitly kill some of
 the stg registers if they aren't live across the call, otherwise LLVM
 rightly thinks they are live since I always pass the stg registers around
 (so live on entry and exit of every function unless I kill them).

--

-- 
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/4308#comment:2>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs

Reply via email to