This is a work in progress, see comments for notable infelicities.
But since it's good to get continuous feedback, I've sent a copy
of the patch via mail.

Signed-off-by: Edward Z. Yang <[email protected]>
---
 compiler/cmm/CmmCPS.hs         |    5 +-
 compiler/cmm/CmmExpr.hs        |    2 +
 compiler/cmm/CmmSpillReload.hs |  256 +++++++++++++++++++++++++---------------
 compiler/utils/UniqFM.lhs      |    6 +-
 4 files changed, 171 insertions(+), 98 deletions(-)

diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs
index b9f6db3..d386d9f 100644
--- a/compiler/cmm/CmmCPS.hs
+++ b/compiler/cmm/CmmCPS.hs
@@ -95,9 +95,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo 
{arg_space=entry_off}})
                              (dualLivenessWithInsertion procPoints) g
                     -- Insert spills at defns; reloads at return points
        g     <-
-              -- pprTrace "pre insertLateReloads" (ppr g) $
-                runOptimization $ insertLateReloads g -- Duplicate reloads 
just before uses
-       dump Opt_D_dump_cmmz "Post late reloads" g
+                runOptimization $ insertLateAssignments g
+       dump Opt_D_dump_cmmz "Post late assignments" g
        g     <-
                -- pprTrace "post insertLateReloads" (ppr g) $
                 dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment 
Elimination"
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 3ae2996..c1be6ba 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -124,6 +124,8 @@ cmmExprType (CmmReg reg)            = cmmRegType reg
 cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
 cmmExprType (CmmRegOff reg _)   = cmmRegType reg
 cmmExprType (CmmStackSlot _ _)  = bWord -- an address
+-- Careful though: what is stored at the stack slot may be bigger than
+-- an address
 
 cmmLitType :: CmmLit -> CmmType
 cmmLitType (CmmInt _ width)     = cmmBits  width
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index 4e2dd38..3cceee1 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -14,9 +14,7 @@ module CmmSpillReload
   --, insertSpillsAndReloads  --- XXX todo check live-in at entry against 
formals
   , dualLivenessWithInsertion
 
-  , availRegsLattice
-  , cmmAvailableReloads
-  , insertLateReloads
+  , insertLateAssignments
   , removeDeadAssignmentsAndReloads
   )
 where
@@ -31,6 +29,7 @@ import Control.Monad
 import Outputable hiding (empty)
 import qualified Outputable as PP
 import UniqSet
+import UniqFM
 
 import Compiler.Hoopl
 import Data.Maybe
@@ -188,91 +187,6 @@ spill, reload :: LocalReg -> CmmNode O O
 spill  r = CmmStore  (regSlot r) (CmmReg $ CmmLocal r)
 reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
 
-----------------------------------------------------------------
---- sinking reloads
-
--- The idea is to compute at each point the set of registers such that
--- on every path to the point, the register is defined by a Reload
--- instruction.  Then, if a use appears at such a point, we can safely
--- insert a Reload right before the use.  Finally, we can eliminate
--- the early reloads along with other dead assignments.
-
-data AvailRegs = UniverseMinus RegSet
-               | AvailRegs     RegSet
-
-
-availRegsLattice :: DataflowLattice AvailRegs
-availRegsLattice = DataflowLattice "register gotten from reloads" empty add
-    where empty = UniverseMinus emptyRegSet
-          -- | compute in the Tx monad to track whether anything has changed
-          add _ (OldFact old) (NewFact new) =
-            if join `smallerAvail` old then (SomeChange, join) else (NoChange, 
old)
-            where join = interAvail new old
-
-
-interAvail :: AvailRegs -> AvailRegs -> AvailRegs
-interAvail (UniverseMinus s) (UniverseMinus s') = UniverseMinus (s 
`plusRegSet`  s')
-interAvail (AvailRegs     s) (AvailRegs     s') = AvailRegs (s `timesRegSet` 
s')
-interAvail (AvailRegs     s) (UniverseMinus s') = AvailRegs (s  `minusRegSet` 
s')
-interAvail (UniverseMinus s) (AvailRegs     s') = AvailRegs (s' `minusRegSet` 
s )
-
-smallerAvail :: AvailRegs -> AvailRegs -> Bool
-smallerAvail (AvailRegs     _) (UniverseMinus _)  = True
-smallerAvail (UniverseMinus _) (AvailRegs     _)  = False
-smallerAvail (AvailRegs     s) (AvailRegs    s')  = sizeUniqSet s < 
sizeUniqSet s'
-smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > 
sizeUniqSet s'
-
-extendAvail :: AvailRegs -> LocalReg -> AvailRegs
-extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r)
-extendAvail (AvailRegs     s) r = AvailRegs (extendRegSet s r)
-
-delFromAvail :: AvailRegs -> LocalReg -> AvailRegs
-delFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r)
-delFromAvail (AvailRegs     s) r = AvailRegs (deleteFromRegSet s r)
-
-elemAvail :: AvailRegs -> LocalReg -> Bool
-elemAvail (UniverseMinus s) r = not $ elemRegSet r s
-elemAvail (AvailRegs     s) r = elemRegSet r s
-
-cmmAvailableReloads :: CmmGraph -> FuelUniqSM (BlockEnv AvailRegs)
-cmmAvailableReloads g =
-  liftM snd $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $
-                              analFwd availRegsLattice availReloadsTransfer
-
-availReloadsTransfer :: FwdTransfer CmmNode AvailRegs
-availReloadsTransfer = mkFTransfer3 (flip const) middleAvail ((mkFactBase 
availRegsLattice .) . lastAvail)
-
-middleAvail :: CmmNode O O -> AvailRegs -> AvailRegs
-middleAvail (CmmAssign (CmmLocal r) (CmmLoad l _)) avail
-               | l `isStackSlotOf` r = extendAvail avail r
-middleAvail (CmmAssign lhs _)        avail = foldRegsDefd delFromAvail avail 
lhs
-middleAvail (CmmStore l (CmmReg (CmmLocal r))) avail
-               | l `isStackSlotOf` r = avail
-middleAvail (CmmStore (CmmStackSlot (RegSlot r) _) _) avail = delFromAvail 
avail r
-middleAvail (CmmStore {})            avail = avail
-middleAvail (CmmUnsafeForeignCall {}) _    = AvailRegs emptyRegSet
-middleAvail (CmmComment {})          avail = avail
-
-lastAvail :: CmmNode O C -> AvailRegs -> [(Label, AvailRegs)]
-lastAvail (CmmCall _ (Just k) _ _ _) _ = [(k, AvailRegs emptyRegSet)]
-lastAvail (CmmForeignCall {succ=k})  _ = [(k, AvailRegs emptyRegSet)]
-lastAvail l avail = map (\id -> (id, avail)) $ successors l
-
-insertLateReloads :: CmmGraph -> FuelUniqSM CmmGraph
-insertLateReloads g =
-  liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $
-                              analRewFwd availRegsLattice availReloadsTransfer 
rewrites
-  where rewrites = mkFRewrite3 first middle last
-        first _ _ = return Nothing
-        middle m avail = return $ maybe_reload_before avail m (mkMiddle m)
-        last   l avail = return $ maybe_reload_before avail l (mkLast l)
-        maybe_reload_before avail node tail =
-            let used = filterRegsUsed (elemAvail avail) node
-            in  if isEmptyUniqSet used then Nothing
-                                       else Just $ reloadTail used tail
-        reloadTail regset t = foldl rel t $ uniqSetToList regset
-          where rel t r = mkMiddle (reload r) <*> t
-
 removeDeadAssignmentsAndReloads :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
 removeDeadAssignmentsAndReloads procPoints g =
    liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
@@ -287,6 +201,166 @@ removeDeadAssignmentsAndReloads procPoints g =
 
          nothing _ _ = return Nothing
 
+----------------------------------------------------------------
+--- sinking arbitrary loads
+
+-- The idea is to compute at each point an expression that could be used
+-- to calculate the contents of a register.  Then, if a use appears at
+-- such a point, we can safely insert an assignment for that register
+-- right before the use.  Finally, we can eliminate the earlier
+-- assignment along with other dead assignments.
+--
+-- XXX This code currently performs what is essentially unbounded
+-- inlining, which is not necessarily a good idea.  We should associate
+-- a cost with all expressions, and only inline if the cost is not too
+-- high.
+--
+-- XXX Interacts poorly with the CmmOpt inliner, which only inlines
+-- if there's a single use of the register.  Here, the register is
+-- assigned multiple times, so it won't actually get inlined...
+
+type AssignmentMap = UniqFM (LocalReg, WithTop CmmExpr)
+
+-- ToDo: Move this into somewhere more general (UniqFM? That will
+-- introduce a Hoopl dependency on that file.)
+joinUFM :: JoinFun v -> JoinFun (UniqFM v)
+joinUFM eltJoin l (OldFact old) (NewFact new) = foldUFM_Directly add 
(NoChange, old) new
+    where add k new_v (ch, joinmap) =
+            case lookupUFM_Directly joinmap k of
+                Nothing -> (SomeChange, addToUFM_Directly joinmap k new_v)
+                Just old_v -> case eltJoin l (OldFact old_v) (NewFact new_v) of
+                                (SomeChange, v') -> (SomeChange, 
addToUFM_Directly joinmap k v')
+                                (NoChange, _) -> (ch, joinmap)
+
+assignmentLattice :: DataflowLattice AssignmentMap
+assignmentLattice = DataflowLattice "assignments for registers" emptyUFM 
(joinUFM add)
+    where -- ToDo: Factor out 'add', which is useful for any lattice map
+          -- that manually tracks registers
+#if DEBUG
+          add l (OldFact x@(r, old)) (NewFact (r', new))
+            | r /= r' = panic "assignmentLattice: distinct registers in same 
mapping"
+#else
+          add l (OldFact x@(r, old)) (NewFact (_, new))
+#endif
+            | otherwise =
+                case extendJoinDomain add' l (OldFact old) (NewFact new) of
+                    (NoChange,   _) -> (NoChange, x)
+                    (SomeChange, v) -> (SomeChange, (r, v))
+          add' _ (OldFact old) (NewFact new)
+            | old == new = (NoChange,   PElem old)
+            | otherwise  = (SomeChange, Top)
+-- ToDo: I feel like there's a monad squirreled away here somewhere...
+
+middleAssignment :: CmmNode O O -> AssignmentMap -> AssignmentMap
+-- Stack slots for registers are treated specially: we maintain
+-- the invariant that the stack slot will always accurately reflect the
+-- contents of a variable.  This invariant only holds because we only
+-- generate register slots during the spilling phase, and the spilling
+-- phase always spills when the register changes, so we will never see a
+-- lone 'abc = ...' (where ... is not a reload) without a subsequent
+-- spill.  (We could probably write another quick analysis to check that
+-- this invariant holds.) If this invariant holds, then any reload
+-- (that's what an assignment to the register from a stack slot is)
+-- simply adds a register to the available assignments without
+-- invalidating any existing references to the register.
+middleAssignment (CmmAssign (CmmLocal r) e@(CmmLoad l _)) assign
+    | l `isStackSlotOf` r = addToUFM assign r (r, PElem e)
+-- When we assign anything else to a register, we invalidate all current
+-- assignments that contain an assignment to that register, and then, if
+-- it's a local assignment, add this assignment to our map.  (Note: We
+-- could do something clever here for simple Hp = Hp + 8 style
+-- assignments by simply aggressively inlining that addition.)
+middleAssignment (CmmAssign reg e) assign
+    = f (mapUFM p assign)
+      where p (r', PElem e') | reg `regUsedIn` e' = (r', Top)
+            p old = old
+            f m | (CmmLocal r) <- reg = addToUFM m r (r, PElem e)
+                | otherwise = m
+-- Once again, treat stores of registers to register slots specially
+middleAssignment (CmmStore l (CmmReg (CmmLocal r))) assign
+    | l `isStackSlotOf` r = assign
+-- When a store occurs, invalidate all current assignments that rely on
+-- the memory location that got clobbered.  Note that stack slots have
+-- already been handled.
+middleAssignment (CmmStore lhs rhs) assign
+    = mapUFM p assign
+      where p (r, PElem e) | (lhs, rhs) `clobbers` (r, e) = (r, Top)
+            p old = old
+-- This is tricky, because whether or not an unsafe foreign call is safe
+-- depends on how far along the pipeline we are.  Current choice is
+-- conservative.
+middleAssignment (CmmUnsafeForeignCall {}) assign = mapUFM (\(r,_) -> (r,Top)) 
assign
+middleAssignment (CmmComment {})           assign = assign
+
+-- Assumptions:
+--  * Stack slots do not overlap with any other memory locations
+--  * Non stack-slot stores always conflict with each other.  (This is
+--    not always the case; we could probably do something special for Hp)
+--  * Stack slots for different areas do not overlap
+--  * Stack slots within the same area and different offsets may
+--    overlap; we need to do a size check (see 'overlaps').
+clobbers :: (CmmExpr, CmmExpr) -> (LocalReg, CmmExpr) -> Bool
+clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (r', CmmLoad 
(ss'@CmmStackSlot{}) _)
+    | r == r', ss == ss' = False -- No-op on the stack slot
+clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
+    where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+            = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', 
widthInBytes (typeWidth t))
+          f (CmmLoad e _)      = containsStackSlot e
+          f (CmmMachOp _ es) = or (map f es)
+          f _                = False
+          -- Maybe there's an invariant broken if this actually ever
+          -- returns True
+          containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
+          containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
+          containsStackSlot (CmmStackSlot{}) = True
+          containsStackSlot _ = False
+clobbers _ (_, e) = f e
+    where f (CmmLoad (CmmStackSlot _ _) _) = False
+          f (CmmLoad{}) = True -- conservative
+          f (CmmMachOp _ es) = or (map f es)
+          f _ = False
+
+-- Diagram:
+--      4      8     12
+--      s -w-  o
+--      [ I32  ]
+--      [    F64     ]
+--      s'   -w'-    o'
+type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+overlaps :: CallSubArea -> CallSubArea -> Bool
+overlaps (a, _, _) (a', _, _) | a /= a' = False
+overlaps (_, o, w) (_, o', w') =
+    let s  = o  - w
+        s' = o' - w'
+    in (s' <= o) && (s <= o)
+
+lastAssignment :: CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
+-- Also very conservative choices
+lastAssignment (CmmCall _ (Just k) _ _ _) assign = [(k, mapUFM (\(r,_) -> 
(r,Top)) assign)]
+lastAssignment (CmmForeignCall {succ=k})  assign = [(k, mapUFM (\(r,_) -> 
(r,Top)) assign)]
+lastAssignment l assign = map (\id -> (id, assign)) $ successors l
+
+assignmentTransfer :: FwdTransfer CmmNode AssignmentMap
+assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase 
assignmentLattice .) . lastAssignment)
+
+insertLateAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+insertLateAssignments g =
+  liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot assignmentLattice)] $
+                              analRewFwd assignmentLattice assignmentTransfer 
rewrites
+  where rewrites = mkFRewrite3 first middle last
+        first _ _ = return Nothing
+        middle m assign = return $ maybe_assign_before assign m (mkMiddle m)
+        last   l assign = return $ maybe_assign_before assign l (mkLast l)
+        maybe_assign_before assign node tail =
+            let used = foldRegsUsed f emptyUFM node :: UniqFM (LocalReg, 
CmmExpr)
+                f t r = case lookupUFM assign r of
+                            Just (r, (PElem e)) -> addToUFM t r (r, e)
+                            _ -> t
+            in if isNullUFM used then Nothing
+                                 else Just $ lateAssign used tail
+        -- XXX need to do a cost check
+        lateAssign regset t = foldl rel t $ eltsUFM regset
+          where rel t (r, e) = mkMiddle (CmmAssign (CmmLocal r) e) <*> t
 
 ---------------------
 -- prettyprinting
@@ -305,12 +379,6 @@ instance Outputable DualLive where
                          if isEmptyUniqSet stack then PP.empty
                          else (ppr_regs "live on stack =" stack)]
 
-instance Outputable AvailRegs where
-  ppr (UniverseMinus s) = if isEmptyUniqSet s then text "<everything 
available>"
-                          else ppr_regs "available = all but" s
-  ppr (AvailRegs     s) = if isEmptyUniqSet s then text "<nothing available>"
-                          else ppr_regs "available = " s
-
 my_trace :: String -> SDoc -> a -> a
 my_trace = if False then pprTrace else \_ _ a -> a
 
diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs
index 293e48e..63b1724 100644
--- a/compiler/utils/UniqFM.lhs
+++ b/compiler/utils/UniqFM.lhs
@@ -20,6 +20,7 @@ and ``addToUFM\_C'' and ``Data.IntMap.insertWith'' differ in 
the order
 of arguments of combining function.
 
 \begin{code}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# OPTIONS -Wall #-}
 module UniqFM (
        -- * Unique-keyed mappings
@@ -45,7 +46,7 @@ module UniqFM (
        intersectUFM,
        intersectUFM_C,
        foldUFM, foldUFM_Directly,
-       mapUFM,
+       mapUFM, mapUFM_Directly,
        elemUFM, elemUFM_Directly,
        filterUFM, filterUFM_Directly,
        sizeUFM,
@@ -122,6 +123,7 @@ intersectUFM_C      :: (elt1 -> elt2 -> elt3)
 foldUFM                :: (elt -> a -> a) -> a -> UniqFM elt -> a
 foldUFM_Directly:: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
 mapUFM         :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2
+mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2
 filterUFM      :: (elt -> Bool) -> UniqFM elt -> UniqFM elt
 filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt
 
@@ -153,6 +155,7 @@ ufmToList   :: UniqFM elt -> [(Unique, elt)]
 
 \begin{code}
 newtype UniqFM ele = UFM (M.IntMap ele)
+    deriving (Eq)
 
 emptyUFM = UFM M.empty
 isNullUFM (UFM m) = M.null m
@@ -188,6 +191,7 @@ intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith 
f x y)
 foldUFM k z (UFM m) = M.fold k z m
 foldUFM_Directly k z (UFM m) = M.foldWithKey (k . getUnique) z m
 mapUFM f (UFM m) = UFM (M.map f m)
+mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . getUnique) m)
 filterUFM p (UFM m) = UFM (M.filter p m)
 filterUFM_Directly p (UFM m) = UFM (M.filterWithKey (p . getUnique) m)
 
-- 
1.7.4.2


_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to