Hi again, Still playing with the Mersenne Twister and here is the updated 64 bit version so that there are not so many constructor calls on next64 (together with updated compiling flags).
I was wondering why different runs can have such different run times and the cause was found to be my system: also the C version running times can vary (usually 0.65 but sometimes 0.3). The 64 bit version took usually about 1.1 or 1.2 seconds while 32bit version required only 0.78 (against 0.65 with C for both 32 and 64 bit versions). Since the real work horse here is the next64 function, I took a look of Core. There seems to be an extra case-statement in 64bit version and this might explain the performance drop (about 6 or 7 lines below _DEFAULT text on both versions below). Relevant parts of the Core below, code attached. It is very possible that I'm missing something obvious here. So what is happening here? :) Thanks again for any comments! br, Isto -------------------------------------------- Core (32 and 64 nexts) Rec { Mersenne.$wnext64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64 -> GHC.Prim.Int# -> GHC.Prim.State# GHC.Prim.RealWorld -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word64, GHC.Base.Int) #) [GlobalId] [Arity 3 Str: DmdType LLL] Mersenne.$wnext64 = \ (w_s2Zq :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64) (ww_s2Zt :: GHC.Prim.Int#) (w1_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) -> case ww_s2Zt of ds_X2F1 { __DEFAULT -> case w_s2Zq of wild_a2Pp { Data.Array.Base.STUArray ds2_a2Pr ds3_a2Ps marr#_a2Pt -> case GHC.Prim.readWord64Array# @ GHC.Prim.RealWorld marr#_a2Pt ds_X2F1 w1_s2Zv of wild2_a2PA { (# s2#_a2PC, e#_a2PD #) -> (# s2#_a2PC, ((case lit_r34C of wild1_a2Ol { GHC.Word.W64# y#_a2On -> let { ww1_a2NY [Just L] :: GHC.Prim.Word# [Str: DmdType] ww1_a2NY = GHC.Prim.xor# e#_a2PD (GHC.Prim.and# (GHC.Prim.uncheckedShiftRL# e#_a2PD 29) __word 6148914691236517205) } in let { ww2_X2Q0 [Just L] :: GHC.Prim.Word# [Str: DmdType] ww2_X2Q0 = GHC.Prim.xor# ww1_a2NY (GHC.Prim.and# (GHC.Prim.uncheckedShiftL# ww1_a2NY 17) __word 8202884508482404352) } in let { ww3_X2QE [Just L] :: GHC.Prim.Word# [Str: DmdType] ww3_X2QE = GHC.Prim.xor# ww2_X2Q0 (GHC.Prim.and# (GHC.Prim.uncheckedShiftL# ww2_X2Q0 37) y#_a2On) } in GHC.Word.W64# (GHC.Prim.xor# ww3_X2QE (GHC.Prim.uncheckedShiftRL# ww3_X2QE 43)) }), (GHC.Base.I# (GHC.Prim.+# ds_X2F1 1))) #) } }; 312 -> case Mersenne.generateNumbers64 w_s2Zq w1_s2Zv of wild_a2DL { (# new_s_a2DN, a87_a2DO #) -> case Mersenne.$wnext64 w_s2Zq 0 new_s_a2DN of wild1_X2Fy { (# new_s1_X2FB, a871_X2FD #) -> case a871_X2FD of wild2_Xar { (w2_aU2, iN_aU3) -> (# new_s1_X2FB, wild2_Xar #) } } } } end Rec } Mersenne.next64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64 -> GHC.Base.Int -> GHC.IOBase.IO (GHC.Word.Word64, GHC.Base.Int) [GlobalId] [Arity 3 Worker Mersenne.$wnext64 Str: DmdType LU(L)L] Mersenne.next64 = __inline_me (\ (w_s2Zq :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64) (w1_s2Zr :: GHC.Base.Int) (w2_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) -> case w1_s2Zr of w3_X30R { GHC.Base.I# ww_s2Zt -> Mersenne.$wnext64 w_s2Zq ww_s2Zt w2_s2Zv }) Rec { Mersenne.$wnext32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32 -> GHC.Prim.Int# -> GHC.Prim.State# GHC.Prim.RealWorld -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word32, GHC.Base.Int) #) [GlobalId] [Arity 3 NoCafRefs Str: DmdType LLL] Mersenne.$wnext32 = \ (w_s2YJ :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32) (ww_s2YM :: GHC.Prim.Int#) (w1_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) -> case ww_s2YM of ds_X2CS { __DEFAULT -> case w_s2YJ of wild_a2Hd { Data.Array.Base.STUArray ds2_a2Hf ds3_a2Hj marr#_a2Hk -> case GHC.Prim.readWord32Array# @ GHC.Prim.RealWorld marr#_a2Hk ds_X2CS w1_s2YO of wild2_a2Hr { (# s2#_a2Ht, e#_a2Hu #) -> (# s2#_a2Ht, ((let { ww1_a2Fr [Just L] :: GHC.Prim.Word# [Str: DmdType] ww1_a2Fr = GHC.Prim.xor# e#_a2Hu (GHC.Prim.uncheckedShiftRL# e#_a2Hu 11) } in let { ww2_X2GX [Just L] :: GHC.Prim.Word# [Str: DmdType] ww2_X2GX = GHC.Prim.xor# ww1_a2Fr (GHC.Prim.and# (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww1_a2Fr 7)) __word 2636928640) } in let { ww3_X2Hp [Just L] :: GHC.Prim.Word# [Str: DmdType] ww3_X2Hp = GHC.Prim.xor# ww2_X2GX (GHC.Prim.and# (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww2_X2GX 15)) __word 4022730752) } in GHC.Word.W32# (GHC.Prim.xor# ww3_X2Hp (GHC.Prim.uncheckedShiftRL# ww3_X2Hp 18))), (GHC.Base.I# (GHC.Prim.+# ds_X2CS 1))) #) } }; 624 -> case Mersenne.generateNumbers32 w_s2YJ w1_s2YO of wild_a2DL { (# new_s_a2DN, a87_a2DO #) -> case Mersenne.$wnext32 w_s2YJ 0 new_s_a2DN of wild1_X2F2 { (# new_s1_X2F5, a871_X2F7 #) -> case a871_X2F7 of wild2_X80 { (w2_aSH, iN_aSI) -> (# new_s1_X2F5, wild2_X80 #) } } } } end Rec } Mersenne.next32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32 -> GHC.Base.Int -> GHC.IOBase.IO (GHC.Word.Word32, GHC.Base.Int) [GlobalId] [Arity 3 Worker Mersenne.$wnext32 NoCafRefs Str: DmdType LU(L)L] Mersenne.next32 = __inline_me (\ (w_s2YJ :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32) (w1_s2YK :: GHC.Base.Int) (w2_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) -> case w1_s2YK of w3_X2ZO { GHC.Base.I# ww_s2YM -> Mersenne.$wnext32 w_s2YJ ww_s2YM w2_s2YO })
module Mersenne where import Data.Bits import Data.Word import Data.Array.Base import Data.Array.MArray import Data.Array.IO data MT32 = MT32 (IOUArray Int Word32) Int data MT64 = MT64 (IOUArray Int Word64) Int last32bitsof :: Word32 -> Word32 last32bitsof a = a .&. 0xffffffff -- == (2^32-1) lm32 = 0x7fffffff :: Word32 um32 = 0x80000000 :: Word32 mA32 = 0x9908b0df :: Word32 -- == 2567483615 -- Array of length 624. initialiseGenerator32 :: Int -> IO MT32 initialiseGenerator32 seed = do let s = last32bitsof (fromIntegral seed)::Word32 mt <- newArray (0,623) (0::Word32) unsafeWrite mt 0 s mtLoop32 mt s 1 generateNumbers32 mt return (MT32 mt 0) mtLoop32 :: (IOUArray Int Word32) -> Word32 -> Int -> IO () mtLoop32 mt lastNro n = loop lastNro n where loop :: Word32 -> Int -> IO () loop lastNro 624 = return () loop lastNro n = do let n1 = lastNro `xor` (shiftR lastNro 30) new = (1812433253 * n1 + (fromIntegral n)::Word32) unsafeWrite mt n new loop new $! (n+1) generateNumbers32 :: (IOUArray Int Word32) -> IO () generateNumbers32 mt = do gLoop1 0 gLoop2 227 wL <- unsafeRead mt 623 w0 <- unsafeRead mt 0 w396 <- unsafeRead mt 396 let y = (wL .&. um32) .|. (w0 .&. lm32) :: Word32 if even y then unsafeWrite mt 623 (w396 `xor` (shiftR y 1)) else unsafeWrite mt 623 (w396 `xor` (shiftR y 1) `xor` mA32) return () where gLoop1 :: Int -> IO () gLoop1 227 = return () gLoop1 i = do wi <- unsafeRead mt i wi1 <- unsafeRead mt (i+1) w3 <- unsafeRead mt (i+397) let y = (wi .&. um32) .|. (wi1 .&. lm32) if even y then unsafeWrite mt i (w3 `xor` (shiftR y 1)) else unsafeWrite mt i (w3 `xor` (shiftR y 1) `xor` mA32) gLoop1 $! (i+1) gLoop2 :: Int -> IO () gLoop2 623 = return () gLoop2 i = do wi <- unsafeRead mt i wi1 <- unsafeRead mt (i+1) w3 <- unsafeRead mt (i-227) let y = (wi .&. um32) .|. (wi1 .&. lm32) if even y then unsafeWrite mt i (w3 `xor` (shiftR y 1)) else unsafeWrite mt i (w3 `xor` (shiftR y 1) `xor` mA32) gLoop2 $! (i+1) next32 :: IOUArray Int Word32 -> Int -> IO (Word32, Int) next32 mt 624 = do generateNumbers32 mt (w,iN) <- next32 mt 0 return (w,iN) next32 mt i = do y <- unsafeRead mt i let y1 = y `xor` (shiftR y 11) y2 = y1 `xor` ((shiftL y1 7 ) .&. 0x9d2c5680) -- == 2636928640 y3 = y2 `xor` ((shiftL y2 15) .&. 0xefc60000) -- == 4022730752 y4 = y3 `xor` (shiftR y3 18) return $ (y4, (i+1)) mA64 = 0xB5026F5AA96619E9 :: Word64 um64 = 0xFFFFFFFF80000000 :: Word64 lm64 = 0x7FFFFFFF :: Word64 initialiseGenerator64 :: Int -> IO (MT64) initialiseGenerator64 seed = do let s = (fromIntegral seed)::Word64 mt <- newArray (0,311) (0::Word64) unsafeWrite mt 0 s mtLoop64 mt s 1 generateNumbers64 mt return (MT64 mt 0) mtLoop64 :: (IOUArray Int Word64) -> Word64 -> Int -> IO () mtLoop64 mt lastNro n = loop lastNro n where loop :: Word64 -> Int -> IO () loop lastNro 312 = return () loop lastNro n = do let n1 = lastNro `xor` (shiftR lastNro 62) new = (6364136223846793005 * n1 + (fromIntegral n)::Word64) unsafeWrite mt n new loop new $! (n+1) generateNumbers64 :: (IOUArray Int Word64) -> IO () generateNumbers64 mt = do gLoop1 0 gLoop2 156 wL <- unsafeRead mt 311 w0 <- unsafeRead mt 0 w155 <- unsafeRead mt 155 let y = (wL .&. um64) .|. (w0 .&. lm64) :: Word64 if even y then unsafeWrite mt 311 (w155 `xor` (shiftR y 1)) else unsafeWrite mt 311 (w155 `xor` (shiftR y 1) `xor` mA64) return () where gLoop1 :: Int -> IO () gLoop1 156 = return () gLoop1 i = do wi <- unsafeRead mt i wi1 <- unsafeRead mt (i+1) w3 <- unsafeRead mt (i+156) let y = (wi .&. um64) .|. (wi1 .&. lm64) if even y then unsafeWrite mt i (w3 `xor` (shiftR y 1)) else unsafeWrite mt i (w3 `xor` (shiftR y 1) `xor` mA64) gLoop1 $! (i+1) gLoop2 :: Int -> IO () gLoop2 311 = return () gLoop2 i = do wi <- unsafeRead mt i wi1 <- unsafeRead mt (i+1) w3 <- unsafeRead mt (i-156) let y = (wi .&. um64) .|. (wi1 .&. lm64) if even y then unsafeWrite mt i (w3 `xor` (shiftR y 1)) else unsafeWrite mt i (w3 `xor` (shiftR y 1) `xor` mA64) gLoop2 $! (i+1) next64 :: IOUArray Int Word64 -> Int -> IO (Word64, Int) next64 mt 312 = do generateNumbers64 mt (w,iN) <- next64 mt 0 return (w,iN) next64 mt i = do y <- unsafeRead mt i let y1 = y `xor` ((shiftR y 29) .&. 0x5555555555555555) y2 = y1 `xor` ((shiftL y1 17) .&. 0x71D67FFFEDA60000) y3 = y2 `xor` ((shiftL y2 37) .&. 0xFFF7EEE000000000) y4 = y3 `xor` (shiftR y3 43) return $! (y4, (i+1))
module Main where -- Compile eg with -- ghc -O3 -optc-O3 -optc-ffast-math -fexcess-precision -funfolding-use-threshold=16 --make testMT -- -- -ddump-simpl makes the Core. import Mersenne genRNums32 :: MT32 -> Int -> IO MT32 genRNums32 (MT32 mt pos) nCnt = gRN nCnt pos where gRN :: Int -> Int -> IO MT32 gRN 1 iCurr = do (r,iNew) <- next32 mt iCurr putStrLn $ (show r) return (MT32 mt iNew) gRN nCnt iCurr = do (_,iNew) <- next32 mt iCurr gRN (nCnt-1) $! iNew genRNums64 :: MT64 -> Int -> IO MT64 genRNums64 (MT64 mt pos) nCnt = gRN nCnt pos where gRN :: Int -> Int -> IO MT64 gRN 1 iCurr = do (r,iNew) <- next64 mt iCurr putStrLn $ (show r) return (MT64 mt iNew) gRN nCnt iCurr = do (_,iNew) <- next64 mt iCurr gRN (nCnt-1) $! iNew main = do putStrLn "Testing Mersenne Twister." -- mt32 <- initialiseGenerator32 100 -- genRNums32 mt32 10000000 mt64 <- initialiseGenerator64 100 genRNums64 mt64 10000000
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe