This is a work in progress, see comments for notable infelicities. But since it's good to get continuous feedback, I've sent a copy of the patch via mail.
Signed-off-by: Edward Z. Yang <[email protected]> --- compiler/cmm/CmmCPS.hs | 5 +- compiler/cmm/CmmExpr.hs | 2 + compiler/cmm/CmmSpillReload.hs | 256 +++++++++++++++++++++++++--------------- compiler/utils/UniqFM.lhs | 6 +- 4 files changed, 171 insertions(+), 98 deletions(-) diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs index b9f6db3..d386d9f 100644 --- a/compiler/cmm/CmmCPS.hs +++ b/compiler/cmm/CmmCPS.hs @@ -95,9 +95,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) (dualLivenessWithInsertion procPoints) g -- Insert spills at defns; reloads at return points g <- - -- pprTrace "pre insertLateReloads" (ppr g) $ - runOptimization $ insertLateReloads g -- Duplicate reloads just before uses - dump Opt_D_dump_cmmz "Post late reloads" g + runOptimization $ insertLateAssignments g + dump Opt_D_dump_cmmz "Post late assignments" g g <- -- pprTrace "post insertLateReloads" (ppr g) $ dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination" diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 3ae2996..c1be6ba 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -124,6 +124,8 @@ cmmExprType (CmmReg reg) = cmmRegType reg cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) cmmExprType (CmmRegOff reg _) = cmmRegType reg cmmExprType (CmmStackSlot _ _) = bWord -- an address +-- Careful though: what is stored at the stack slot may be bigger than +-- an address cmmLitType :: CmmLit -> CmmType cmmLitType (CmmInt _ width) = cmmBits width diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index 4e2dd38..3cceee1 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -14,9 +14,7 @@ module CmmSpillReload --, insertSpillsAndReloads --- XXX todo check live-in at entry against formals , dualLivenessWithInsertion - , availRegsLattice - , cmmAvailableReloads - , insertLateReloads + , insertLateAssignments , removeDeadAssignmentsAndReloads ) where @@ -31,6 +29,7 @@ import Control.Monad import Outputable hiding (empty) import qualified Outputable as PP import UniqSet +import UniqFM import Compiler.Hoopl import Data.Maybe @@ -188,91 +187,6 @@ spill, reload :: LocalReg -> CmmNode O O spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) ----------------------------------------------------------------- ---- sinking reloads - --- The idea is to compute at each point the set of registers such that --- on every path to the point, the register is defined by a Reload --- instruction. Then, if a use appears at such a point, we can safely --- insert a Reload right before the use. Finally, we can eliminate --- the early reloads along with other dead assignments. - -data AvailRegs = UniverseMinus RegSet - | AvailRegs RegSet - - -availRegsLattice :: DataflowLattice AvailRegs -availRegsLattice = DataflowLattice "register gotten from reloads" empty add - where empty = UniverseMinus emptyRegSet - -- | compute in the Tx monad to track whether anything has changed - add _ (OldFact old) (NewFact new) = - if join `smallerAvail` old then (SomeChange, join) else (NoChange, old) - where join = interAvail new old - - -interAvail :: AvailRegs -> AvailRegs -> AvailRegs -interAvail (UniverseMinus s) (UniverseMinus s') = UniverseMinus (s `plusRegSet` s') -interAvail (AvailRegs s) (AvailRegs s') = AvailRegs (s `timesRegSet` s') -interAvail (AvailRegs s) (UniverseMinus s') = AvailRegs (s `minusRegSet` s') -interAvail (UniverseMinus s) (AvailRegs s') = AvailRegs (s' `minusRegSet` s ) - -smallerAvail :: AvailRegs -> AvailRegs -> Bool -smallerAvail (AvailRegs _) (UniverseMinus _) = True -smallerAvail (UniverseMinus _) (AvailRegs _) = False -smallerAvail (AvailRegs s) (AvailRegs s') = sizeUniqSet s < sizeUniqSet s' -smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > sizeUniqSet s' - -extendAvail :: AvailRegs -> LocalReg -> AvailRegs -extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) -extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) - -delFromAvail :: AvailRegs -> LocalReg -> AvailRegs -delFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r) -delFromAvail (AvailRegs s) r = AvailRegs (deleteFromRegSet s r) - -elemAvail :: AvailRegs -> LocalReg -> Bool -elemAvail (UniverseMinus s) r = not $ elemRegSet r s -elemAvail (AvailRegs s) r = elemRegSet r s - -cmmAvailableReloads :: CmmGraph -> FuelUniqSM (BlockEnv AvailRegs) -cmmAvailableReloads g = - liftM snd $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $ - analFwd availRegsLattice availReloadsTransfer - -availReloadsTransfer :: FwdTransfer CmmNode AvailRegs -availReloadsTransfer = mkFTransfer3 (flip const) middleAvail ((mkFactBase availRegsLattice .) . lastAvail) - -middleAvail :: CmmNode O O -> AvailRegs -> AvailRegs -middleAvail (CmmAssign (CmmLocal r) (CmmLoad l _)) avail - | l `isStackSlotOf` r = extendAvail avail r -middleAvail (CmmAssign lhs _) avail = foldRegsDefd delFromAvail avail lhs -middleAvail (CmmStore l (CmmReg (CmmLocal r))) avail - | l `isStackSlotOf` r = avail -middleAvail (CmmStore (CmmStackSlot (RegSlot r) _) _) avail = delFromAvail avail r -middleAvail (CmmStore {}) avail = avail -middleAvail (CmmUnsafeForeignCall {}) _ = AvailRegs emptyRegSet -middleAvail (CmmComment {}) avail = avail - -lastAvail :: CmmNode O C -> AvailRegs -> [(Label, AvailRegs)] -lastAvail (CmmCall _ (Just k) _ _ _) _ = [(k, AvailRegs emptyRegSet)] -lastAvail (CmmForeignCall {succ=k}) _ = [(k, AvailRegs emptyRegSet)] -lastAvail l avail = map (\id -> (id, avail)) $ successors l - -insertLateReloads :: CmmGraph -> FuelUniqSM CmmGraph -insertLateReloads g = - liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $ - analRewFwd availRegsLattice availReloadsTransfer rewrites - where rewrites = mkFRewrite3 first middle last - first _ _ = return Nothing - middle m avail = return $ maybe_reload_before avail m (mkMiddle m) - last l avail = return $ maybe_reload_before avail l (mkLast l) - maybe_reload_before avail node tail = - let used = filterRegsUsed (elemAvail avail) node - in if isEmptyUniqSet used then Nothing - else Just $ reloadTail used tail - reloadTail regset t = foldl rel t $ uniqSetToList regset - where rel t r = mkMiddle (reload r) <*> t - removeDeadAssignmentsAndReloads :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph removeDeadAssignmentsAndReloads procPoints g = liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice @@ -287,6 +201,166 @@ removeDeadAssignmentsAndReloads procPoints g = nothing _ _ = return Nothing +---------------------------------------------------------------- +--- sinking arbitrary loads + +-- The idea is to compute at each point an expression that could be used +-- to calculate the contents of a register. Then, if a use appears at +-- such a point, we can safely insert an assignment for that register +-- right before the use. Finally, we can eliminate the earlier +-- assignment along with other dead assignments. +-- +-- XXX This code currently performs what is essentially unbounded +-- inlining, which is not necessarily a good idea. We should associate +-- a cost with all expressions, and only inline if the cost is not too +-- high. +-- +-- XXX Interacts poorly with the CmmOpt inliner, which only inlines +-- if there's a single use of the register. Here, the register is +-- assigned multiple times, so it won't actually get inlined... + +type AssignmentMap = UniqFM (LocalReg, WithTop CmmExpr) + +-- ToDo: Move this into somewhere more general (UniqFM? That will +-- introduce a Hoopl dependency on that file.) +joinUFM :: JoinFun v -> JoinFun (UniqFM v) +joinUFM eltJoin l (OldFact old) (NewFact new) = foldUFM_Directly add (NoChange, old) new + where add k new_v (ch, joinmap) = + case lookupUFM_Directly joinmap k of + Nothing -> (SomeChange, addToUFM_Directly joinmap k new_v) + Just old_v -> case eltJoin l (OldFact old_v) (NewFact new_v) of + (SomeChange, v') -> (SomeChange, addToUFM_Directly joinmap k v') + (NoChange, _) -> (ch, joinmap) + +assignmentLattice :: DataflowLattice AssignmentMap +assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add) + where -- ToDo: Factor out 'add', which is useful for any lattice map + -- that manually tracks registers +#if DEBUG + add l (OldFact x@(r, old)) (NewFact (r', new)) + | r /= r' = panic "assignmentLattice: distinct registers in same mapping" +#else + add l (OldFact x@(r, old)) (NewFact (_, new)) +#endif + | otherwise = + case extendJoinDomain add' l (OldFact old) (NewFact new) of + (NoChange, _) -> (NoChange, x) + (SomeChange, v) -> (SomeChange, (r, v)) + add' _ (OldFact old) (NewFact new) + | old == new = (NoChange, PElem old) + | otherwise = (SomeChange, Top) +-- ToDo: I feel like there's a monad squirreled away here somewhere... + +middleAssignment :: CmmNode O O -> AssignmentMap -> AssignmentMap +-- Stack slots for registers are treated specially: we maintain +-- the invariant that the stack slot will always accurately reflect the +-- contents of a variable. This invariant only holds because we only +-- generate register slots during the spilling phase, and the spilling +-- phase always spills when the register changes, so we will never see a +-- lone 'abc = ...' (where ... is not a reload) without a subsequent +-- spill. (We could probably write another quick analysis to check that +-- this invariant holds.) If this invariant holds, then any reload +-- (that's what an assignment to the register from a stack slot is) +-- simply adds a register to the available assignments without +-- invalidating any existing references to the register. +middleAssignment (CmmAssign (CmmLocal r) e@(CmmLoad l _)) assign + | l `isStackSlotOf` r = addToUFM assign r (r, PElem e) +-- When we assign anything else to a register, we invalidate all current +-- assignments that contain an assignment to that register, and then, if +-- it's a local assignment, add this assignment to our map. (Note: We +-- could do something clever here for simple Hp = Hp + 8 style +-- assignments by simply aggressively inlining that addition.) +middleAssignment (CmmAssign reg e) assign + = f (mapUFM p assign) + where p (r', PElem e') | reg `regUsedIn` e' = (r', Top) + p old = old + f m | (CmmLocal r) <- reg = addToUFM m r (r, PElem e) + | otherwise = m +-- Once again, treat stores of registers to register slots specially +middleAssignment (CmmStore l (CmmReg (CmmLocal r))) assign + | l `isStackSlotOf` r = assign +-- When a store occurs, invalidate all current assignments that rely on +-- the memory location that got clobbered. Note that stack slots have +-- already been handled. +middleAssignment (CmmStore lhs rhs) assign + = mapUFM p assign + where p (r, PElem e) | (lhs, rhs) `clobbers` (r, e) = (r, Top) + p old = old +-- This is tricky, because whether or not an unsafe foreign call is safe +-- depends on how far along the pipeline we are. Current choice is +-- conservative. +middleAssignment (CmmUnsafeForeignCall {}) assign = mapUFM (\(r,_) -> (r,Top)) assign +middleAssignment (CmmComment {}) assign = assign + +-- Assumptions: +-- * Stack slots do not overlap with any other memory locations +-- * Non stack-slot stores always conflict with each other. (This is +-- not always the case; we could probably do something special for Hp) +-- * Stack slots for different areas do not overlap +-- * Stack slots within the same area and different offsets may +-- overlap; we need to do a size check (see 'overlaps'). +clobbers :: (CmmExpr, CmmExpr) -> (LocalReg, CmmExpr) -> Bool +clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (r', CmmLoad (ss'@CmmStackSlot{}) _) + | r == r', ss == ss' = False -- No-op on the stack slot +clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot (CallArea a') o') t) + = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) + f (CmmLoad e _) = containsStackSlot e + f (CmmMachOp _ es) = or (map f es) + f _ = False + -- Maybe there's an invariant broken if this actually ever + -- returns True + containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off + containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) + containsStackSlot (CmmStackSlot{}) = True + containsStackSlot _ = False +clobbers _ (_, e) = f e + where f (CmmLoad (CmmStackSlot _ _) _) = False + f (CmmLoad{}) = True -- conservative + f (CmmMachOp _ es) = or (map f es) + f _ = False + +-- Diagram: +-- 4 8 12 +-- s -w- o +-- [ I32 ] +-- [ F64 ] +-- s' -w'- o' +type CallSubArea = (AreaId, Int, Int) -- area, offset, width +overlaps :: CallSubArea -> CallSubArea -> Bool +overlaps (a, _, _) (a', _, _) | a /= a' = False +overlaps (_, o, w) (_, o', w') = + let s = o - w + s' = o' - w' + in (s' <= o) && (s <= o) + +lastAssignment :: CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)] +-- Also very conservative choices +lastAssignment (CmmCall _ (Just k) _ _ _) assign = [(k, mapUFM (\(r,_) -> (r,Top)) assign)] +lastAssignment (CmmForeignCall {succ=k}) assign = [(k, mapUFM (\(r,_) -> (r,Top)) assign)] +lastAssignment l assign = map (\id -> (id, assign)) $ successors l + +assignmentTransfer :: FwdTransfer CmmNode AssignmentMap +assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment) + +insertLateAssignments :: CmmGraph -> FuelUniqSM CmmGraph +insertLateAssignments g = + liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot assignmentLattice)] $ + analRewFwd assignmentLattice assignmentTransfer rewrites + where rewrites = mkFRewrite3 first middle last + first _ _ = return Nothing + middle m assign = return $ maybe_assign_before assign m (mkMiddle m) + last l assign = return $ maybe_assign_before assign l (mkLast l) + maybe_assign_before assign node tail = + let used = foldRegsUsed f emptyUFM node :: UniqFM (LocalReg, CmmExpr) + f t r = case lookupUFM assign r of + Just (r, (PElem e)) -> addToUFM t r (r, e) + _ -> t + in if isNullUFM used then Nothing + else Just $ lateAssign used tail + -- XXX need to do a cost check + lateAssign regset t = foldl rel t $ eltsUFM regset + where rel t (r, e) = mkMiddle (CmmAssign (CmmLocal r) e) <*> t --------------------- -- prettyprinting @@ -305,12 +379,6 @@ instance Outputable DualLive where if isEmptyUniqSet stack then PP.empty else (ppr_regs "live on stack =" stack)] -instance Outputable AvailRegs where - ppr (UniverseMinus s) = if isEmptyUniqSet s then text "<everything available>" - else ppr_regs "available = all but" s - ppr (AvailRegs s) = if isEmptyUniqSet s then text "<nothing available>" - else ppr_regs "available = " s - my_trace :: String -> SDoc -> a -> a my_trace = if False then pprTrace else \_ _ a -> a diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs index 293e48e..63b1724 100644 --- a/compiler/utils/UniqFM.lhs +++ b/compiler/utils/UniqFM.lhs @@ -20,6 +20,7 @@ and ``addToUFM\_C'' and ``Data.IntMap.insertWith'' differ in the order of arguments of combining function. \begin{code} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# OPTIONS -Wall #-} module UniqFM ( -- * Unique-keyed mappings @@ -45,7 +46,7 @@ module UniqFM ( intersectUFM, intersectUFM_C, foldUFM, foldUFM_Directly, - mapUFM, + mapUFM, mapUFM_Directly, elemUFM, elemUFM_Directly, filterUFM, filterUFM_Directly, sizeUFM, @@ -122,6 +123,7 @@ intersectUFM_C :: (elt1 -> elt2 -> elt3) foldUFM :: (elt -> a -> a) -> a -> UniqFM elt -> a foldUFM_Directly:: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a mapUFM :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 +mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 filterUFM :: (elt -> Bool) -> UniqFM elt -> UniqFM elt filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt @@ -153,6 +155,7 @@ ufmToList :: UniqFM elt -> [(Unique, elt)] \begin{code} newtype UniqFM ele = UFM (M.IntMap ele) + deriving (Eq) emptyUFM = UFM M.empty isNullUFM (UFM m) = M.null m @@ -188,6 +191,7 @@ intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith f x y) foldUFM k z (UFM m) = M.fold k z m foldUFM_Directly k z (UFM m) = M.foldWithKey (k . getUnique) z m mapUFM f (UFM m) = UFM (M.map f m) +mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . getUnique) m) filterUFM p (UFM m) = UFM (M.filter p m) filterUFM_Directly p (UFM m) = UFM (M.filterWithKey (p . getUnique) m) -- 1.7.4.2 _______________________________________________ Cvs-ghc mailing list [email protected] http://www.haskell.org/mailman/listinfo/cvs-ghc
