#2289: Needless reboxing of values when returning from a tight loop
-------------------------------------------+--------------------------------
Reporter: dons | Owner:
Type: run-time performance bug | Status: new
Priority: normal | Component: Compiler
Version: 6.8.2 | Severity: normal
Keywords: boxing, loops, performance | Testcase:
Architecture: Unknown | Os: Unknown
-------------------------------------------+--------------------------------
GHC wants to box up strict values when returning from tight inner loops,
even when they're
immediately taken apart. This leads to redundant instructions in the
bodies of tight loops,
and more code.
It affects, in particular, loops that result from fusion, which need to be
tight, but often return multiple values via unlifted pairs.
Consider this program:
{{{
{-# OPTIONS -fexcess-precision #-}
{-# LANGUAGE TypeOperators #-}
import System.Environment
import Text.Printf
import Data.Array.Vector
mean :: UArr Double -> Double
mean arr = s / fromIntegral l
where
s :*: l = foldlU k (0 :*: 0) arr :: (Double :*: Int)
k (s :*: n) x = s+x :*: n+1
main = do
[d] <- map read `fmap` getArgs
printf "%f\n" (mean (enumFromToFracU 1 d))
}}}
It generates this rather good Core (ghc 6.8.2):
{{{
$s$wfold_s1rB :: Double#
-> Int#
-> Double#
-> (# Double, Int #)
$s$wfold_s1rB =
\ (sc_s1rr :: Double#)
(sc1_s1rs :: Int#)
(sc2_s1rt :: Double#) ->
case >## sc_s1rr y_a1pr of wild4_X1no {
False ->
$s$wfold_s1rB
(+## sc_s1rr 1.0)
(+# sc1_s1rs 1)
(+## sc2_s1rt sc_s1rr);
True -> (# D# sc2_s1rt, I# sc1_s1rs #)
};
} in
case $s$wfold_s1rB 2.0 1 1.0 of ww_s1qg { (# ww1_s1qi, ww2_s1qj #) ->
case ww1_s1qi of wild4_a1mC { D# x_a1mE ->
case ww2_s1qj of wild5_aP6 { I# x1_aP8 ->
case /## x_a1mE (int2Double# x1_aP8)
of wild21_a1mK { __DEFAULT ->
D# wild21_a1mK
}}}
But note, what's this?
{{{
True -> (# D# sc2_s1rt, I# sc1_s1rs #)
};
} in
case $s$wfold_s1rB 2.0 1 1.0 of ww_s1qg { (# ww1_s1qi, ww2_s1qj #) ->
case ww1_s1qi of wild4_a1mC { D# x_a1mE ->
case ww2_s1qj of wild5_aP6 { I# x1_aP8 ->
case /## x_a1mE (int2Double# x1_aP8)
}}}
The return values of what was a strict pair are boxed, placed in an
unboxed tuple,
and then immediately unboxed and the division takes place.
Ok, let's isolate this. Here, the boxed return, from the inner loop:
{{{
mean_s19V :: Double#
-> Int#
-> Double#
-> (# Double, Int #)
mean_s19V =
\ (ds1_dD3 :: Double#)
(ds2_dD4 :: Int#)
(ds3_dD5 :: Double#) ->
case >## ds1_dD3 d#_aoG of wild4_Xw {
False ->
mean_s19V
(+## ds1_dD3 1.0)
(+# ds2_dD4 1)
(+## ds3_dD5 ds1_dD3);
True -> (# D# ds3_dD5, I# ds2_dD4 #)
};
} in
case mean_s19V 2.0 1 1.0 of wild4_Xr { (# ds1_dCV, ds2_dCW #) ->
case ds1_dCV of wild5_Xv { D# x_aoR ->
case ds2_dCW of wild6_Xy { I# y_aoS ->
case /## x_aoR (int2Double# y_aoS) of wild7_XB { __DEFAULT ->
D# wild7_XB
}}}
And the inner loop and exit:
{{{
s1bd_info:
-- what's this stuff?
leaq 32(%r12), %rax
cmpq %r15, %rax
movq %rax, %r12
ja .L17
-- ok, to business:
ucomisd 5(%rbx), %xmm5
ja .L19
movapd %xmm6, %xmm0
leaq -32(%rax), %r12
incq %rsi
addsd %xmm5, %xmm0
addsd .LC1(%rip), %xmm5
movapd %xmm0, %xmm6
jmp s1bd_info
.L19:
movq %rsi, -16(%rax)
movq $base_GHCziBase_Izh_con_info, -24(%rax)
movq $base_GHCziFloat_Dzh_con_info, -8(%rax)
movsd %xmm6, (%rax)
leaq -7(%rax), %rbx
leaq -23(%rax), %rsi
jmp *(%rbp)
}}}
Now, I can avoid the reboxing manually:
{{{
mean_s19R :: Double#
-> Int#
-> Double#
-> (# Double#, Int# #)
mean_s19R =
\ (ds1_dCZ :: Double#)
(ds2_dD0 :: Int#)
(ds3_dD1 :: Double#) ->
case >## ds1_dCZ d#_aoG of wild4_Xw {
False ->
mean_s19R
(+## ds1_dCZ 1.0)
(+# ds2_dD0 1)
(+## ds3_dD1 ds1_dCZ);
True -> (# ds3_dD1, ds2_dD0 #)
};
} in
case mean_s19R 2.0 1 1.0 of wild4_Xr { (# x_aoR, y_aoS #) ->
case /## x_aoR (int2Double# y_aoS) of wild5_Xv { __DEFAULT ->
D# wild5_Xv
}}}
And we get:
{{{
s1b9_info:
-- hey , our junk is gone!
ucomisd 5(%rbx), %xmm5
ja .L17
movapd %xmm6, %xmm0
incq %rsi
addsd %xmm5, %xmm0
addsd .LC1(%rip), %xmm5
movapd %xmm0, %xmm6
jmp s1b9_info
-- cool, that was it, let's go home:
.L17:
movapd %xmm6, %xmm5
movq %rsi, %rbx
jmp *(%rbp)
}}}
Which is a much better result. The loop is tighter.
What can be done here?
--
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/2289>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs