#2253: Native code generator could do better
-----------------------------------------+----------------------------------
Reporter: dons | Owner:
Type: run-time performance bug | Status: new
Priority: normal | Component: Compiler (NCG)
Version: 6.8.2 | Severity: normal
Keywords: | Testcase:
Architecture: x86_64 (amd64) | Os: Unknown
-----------------------------------------+----------------------------------
An example set of programs that came up in the ndp library, where the C
backend outperforms
the current native code generator. Logging them here so we don't forget to
check again with
the new backend.
== Program 1 ==
{{{
import Data.Array.Vector
import Data.Bits
main = print . sumU $ zipWith3U (\x y z -> x * y * z)
(enumFromToU 1 (100000000 :: Int))
(enumFromToU 2 (100000001 :: Int))
(enumFromToU 7 (100000008 :: Int))
}}}
Core:
{{{
Main.$s$wfold =
\ (sc_sPH :: Int#)
(sc1_sPI :: Int#)
(sc2_sPJ :: Int#)
(sc3_sPK :: Int#) ->
case ># sc2_sPJ 100000000 of wild_aJo {
False ->
case ># sc1_sPI 100000001 of wild1_XK6 {
False ->
case ># sc_sPH 100000008 of wild2_XKd {
False ->
Main.$s$wfold
(+# sc_sPH 1)
(+# sc1_sPI 1)
(+# sc2_sPJ 1)
(+# sc3_sPK (*# (*# sc2_sPJ sc1_sPI) sc_sPH));
True -> sc3_sPK
};
True -> sc3_sPK
};
True -> sc3_sPK
}
}}}}
Which is great.
C backend:
{{{
Main_zdszdwfold_info:
.text
.p2align 4,,15
.text
.align 8
.type Main_zdszdwfold_info, @function
cmpq $100000000, %r8
jg .L9
cmpq $100000001, %rdi
jg .L9
cmpq $100000008, %rsi
jg .L9
movq %r8, %rdx
incq %r8
imulq %rdi, %rdx
incq %rdi
imulq %rsi, %rdx
incq %rsi
addq %rdx, %r9
jmp Main_zdszdwfold_info
.L5:
.L7:
.p2align 6,,7
.L9:
movq %r9, %rbx
jmp *(%rbp)
}}}
Native code generator:
{{{
Main_zdszdwfold_info:
cmpq $100000000,%r8
jg .LcRP
cmpq $100000001,%rdi
jg .LcRR
cmpq $100000008,%rsi
jg .LcRU
movq %rdi,%rax
imulq %rsi,%rax
movq %r8,%rcx
imulq %rax,%rcx
movq %r9,%rax
addq %rcx,%rax
leaq 1(%r8),%rcx
leaq 1(%rdi),%rdx
incq %rsi
movq %rdx,%rdi
movq %rcx,%r8
movq %rax,%r9
jmp Main_zdszdwfold_info
.LcRP:
movq %r9,%rbx
jmp *(%rbp)
.LcRR:
movq %r9,%rbx
jmp *(%rbp)
.LcRU:
movq %r9,%rbx
jmp *(%rbp)
}}}
Runtime performance:
C backend: 0.269
Asm backend: 0.410s
== Program 2 ==
Source:
{{{
import Data.Array.Vector
import Data.Bits
main = print . sumU . mapU (`shiftL` 2) $
appendU (replicateU 1000000000 (1::Int))
(replicateU 1000000000 (7::Int))
}}}
Core:
{{{
$s$wfold_rPr =
\ (sc_sOw :: Int#) (sc1_sOx :: Int#) ->
case sc_sOw of wild_X1j {
__DEFAULT -> $s$wfold_rPr (+# wild_X1j 1) (+# sc1_sOx 28);
1000000000 -> sc1_sOx
}
}}}
Runtime:
Native backend: 2.637
C backend: 2.365
--
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/2253>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs