On Tuesday 15 June 2010 23:26:10, Don Stewart wrote: > deliverable: > > Wren -- thanks for the clarification! Someone said that Foldable on > > Trie may not be very efficient -- is that true? > > > > I use ByteString as a node type for the graph; these are Twitter user > > names. Surely it's useful to replace them with Int, which I'll try, > > but Clojure works with Java String fine and it simplifies all kinds of > > exploratory data mining and debugging to keep it as a String, so I'll > > try to get the most mileage from other things before interning. > > bytestring seems appropriate. > > > What's the exact relationship between Trie and Map and their > > respective performance? > > Tries specialized to bytestring keys should outperform the generic Map. >
That would be desirable. I've done some profiling with the sample data, and found that - if we subtract the times for loading and saving the graphs - some 35-40% of the time is spent looking up ByteStrings in Maps. That's far too much for my liking. I'm not sure whether the lookup for e.g. an Int key would be much faster, but I suspect it would be. I've also fiddled a bit with the strictness and removed a bit of unnecessary work, reduced the heap usage by ~20%, MUT times by ~15% and GC times by ~50% (all for the tests on my box with a measly 1GB RAM). It's still a far cry from a racehorse, but at least I can now run the sample data for the entire 35 days without having my box thrashing madly :) The result of my endeavours is attached. Cheers, Daniel
{-# LANGUAGE BangPatterns #-} module SocRun ( UserStats, DCaps, SGraph(..), SocRun(..), socRun, optSocRun ) -- TODO exports where import Graph import Data.Ord (comparing) import Data.List (groupBy,sortBy,foldl1') import Data.Foldable (foldl') import Data.Function (on) import qualified Data.Map as M import Data.Map ((!)) import Data.List (maximum) import System.IO import Debug.Trace import Data.Maybe import Control.Monad ((>=>)) --import Data.Nthable -- generalized fst --import Prelude hiding (fst,snd) -- errln x = -- hPrint stderr x -- hFlush stderr type DCaps = M.Map User (M.Map Day Float) type TalkBalance = M.Map User Int emptyTalk :: TalkBalance emptyTalk = M.empty data UserStats = UserStats { socUS :: !Float, dayUS :: !Int, insUS :: TalkBalance, outsUS :: TalkBalance, totUS :: TalkBalance, balUS :: TalkBalance} newUserStats :: Float -> Int -> UserStats newUserStats soc day = UserStats {socUS = soc, dayUS = day, insUS = emptyTalk, outsUS = emptyTalk, totUS = emptyTalk, balUS = emptyTalk} type UStats = M.Map User UserStats data SocRun = SocRun {alphaSR :: !Float, betaSR :: !Float, gammaSR :: !Float, socInitSR :: !Float, maxDaysSR :: Maybe Int} optSocRun = SocRun 0.00001 0.5 0.5 1.0 Nothing data SGraph = SGraph {drepsSG :: !Graph, dmentsSG :: !Graph, dcapsSG :: !DCaps, ustatsSG :: !UStats} paramSC (SocRun {alphaSR =a, betaSR =b, gammaSR =g}) = (a, b, g) minMax1 (oldMin, oldMax) x = let !newMin = oldMin `min` x !newMax = oldMax `max` x in (newMin, newMax) minMax2 (oldMin, oldMax) (x,y) = let !newMin = oldMin `min` x !newMax = oldMax `max` y in (newMin, newMax) -- find the day range when each user exists in dreps -- PRE: dreps must be sorted in adjacency lists by day!" -- (assert (reps-sorted1? dreps)) dayRanges :: Graph -> M.Map User (Int, Int) dayRanges dreps = M.map doDays dreps where doDays days = case fst $ M.findMin days of !f -> case fst $ M.findMax days of !l -> (f, l) -- let (!start, _) = M.elemAt 0 days -- i.e. "any map entry" -- range@(!f,!l) = foldl' minMax1 (start, start) (M.keys days) in -- range -- merge two day-ranges results -- mergeDayRanges dr1 dr2 = M.unionWith min_max dr1 dr2 -- socRun :: Graph -> Graph -> SocRun -> IO () socRun dreps dments opts = let params = paramSC opts socInit = socInitSR opts dcaps = M.empty -- TODO type ustats = M.empty -- type sgraph = SGraph dreps dments dcaps ustats presMap = M.unionWith minMax2 (dayRanges dreps) (dayRanges dments) !l0 = snd . snd $ M.findMax presMap (!lastDay0, !dstarts) = foldl' updt (l0, M.empty) (M.assocs presMap) updt (!ld, !m) (u,(f,l)) = (max ld l, M.insertWith' (++) f [u] m) !firstDay = fst $ M.findMin dstarts !lastDay = let x' = maybe lastDay0 (\y -> min lastDay0 (firstDay + y - 1)) (maxDaysSR opts) in trace ("doing days from " ++ (show firstDay) ++ " to " ++ (show x')) x' tick sgraph day = -- inject the users first appearing in this cycle let nus = newUserStats socInit day !ustats = ustatsSG sgraph newUsers = let x = dstarts ! day in trace ("adding " ++ (show . length $ x) ++ " new users on day " ++ (show day)) x !ustats' = foldl' insn ustats newUsers insn m u = M.insert u nus m !sgraph' = trace ("now got " ++ show (M.size ustats')) sgraph {ustatsSG = ustats'} in socDay sgraph' params day in foldl' tick sgraph [firstDay..lastDay] -- socDay sgraph params day = undefined safeDivide :: (Fractional a) => a -> a -> a safeDivide x 0 = x safeDivide x y = x / y -- fst3 (x,_,_) = x safeDivide3 (x,y,z) (x',y',z') = let !a = safeDivide x x' !b = safeDivide y y' !c = safeDivide z z' in (a,b,c) socDay sgraph params day = let (!alpha, !beta, !gamma) = params --SGraph {ustatsSG =ustats, dcapsSG =dcaps} = sgraph !ustats = ustatsSG sgraph !dcaps = dcapsSG sgraph -- users = M.keys ustats -- my bangs, needed? tsmap u _ = case socUserDaySum sgraph day u of (Nothing,st) -> st `seq` (Nothing,st) (Just x, st) -> x `seq` st `seq` (Just x, st) !termsStats = {-# SCC "termsStats" #-} M.mapWithKey tsmap ustats -- !termsStats = M.mapWithKey (const . socUserDaySum sgraph day) ustats sumTerms = catMaybes . map fst . M.elems $ termsStats -- norms = foldl1' (zipWith (+)) sumTerms norms@(!a,!b,!c) = {-# SCC "norms" #-} foldl1' (\(!x,!y,!z) (!x',!y',!z') -> (x+x',y+y',z+z')) sumTerms tick user _ = {-# SCC "socDay.tick" #-} let (numers,!stats) = termsStats ! user !soc = socUS stats !soc' = case numers of Just numers -> let (!outs', !insBack', !insAll') = -- map safeDivide numers norms safeDivide3 numers norms in alpha * soc + (1 - alpha) * (beta * outs' + (1 - beta) * (gamma * insBack' + (1 - gamma) * insAll')) Nothing -> alpha * soc !stats' = stats {socUS = soc'} in stats' !ustats' = {-# SCC "ustats'" #-} M.mapWithKey tick ustats -- TODO fold[l/r]WithKey? !dcaps' = {-# SCC "dcaps'" #-} M.foldWithKey updateUser dcaps ustats' where updateUser !user userStats !res = case (dayUS userStats, socUS userStats) of (!day, !soc) -> M.insertWith' (flip M.union) user (M.singleton day soc) res in sgraph {ustatsSG= ustats', dcapsSG= dcaps'} -- socUserDaySum sgraph day user = undefined --getUserDay user day = M.lookup user >=> M.lookup day {-# INLINE getUserDay #-} getUserDay user day m = case {-# SCC "getUserDay.user" #-} M.lookup user m of Just m' -> {-# SCC "getUserDay.day" #-} M.lookup day m' Nothing -> Nothing -- we started writing this and BMeph finished, but Map has findWithDefault already: -- lookupWithDefault d = fromMaybe d . flip M.lookup getSoccap ustats user = case M.lookup user ustats of Just UserStats{socUS =soc} -> soc _ -> 0 socUserDaySum sgraph day user = let SGraph {drepsSG =dreps, dmentsSG =dments, ustatsSG =ustats} = sgraph !stats = {-# SCC "socUDSum.stats" #-} ustats ! user !dr_ = getUserDay user day dreps !dm_ = getUserDay user day dments in if not (isJust dr_ || isJust dm_) then (Nothing, stats) else -- we had edges this cycle -- now let's dance and compute the change! let UserStats {socUS =soc, dayUS =day, insUS =ins, outsUS =outs, totUS =tot, balUS =bal} = stats -- changing order from foldlWithKey to foldWithKey's -- to be able to run under GHC 6.10 socStep pred !to !num !res = {-# SCC "socStep" #-} let !toBal = M.findWithDefault 0 to bal in if not (pred toBal) then 0 else let !toSoc = getSoccap ustats to in if toSoc == 0 then 0 else let toTot = M.findWithDefault 1 to tot !term = fromIntegral (num * toBal * toTot) * toSoc in res + term -- find all those who talked to us in the past to whom we replied now !outSum = {-# SCC "outSum" #-} case dr_ of Nothing -> 0 Just dr -> M.foldWithKey (socStep (<0)) 0 dr !inSumBack = {-# SCC "inSumBack" #-} case dm_ of Nothing -> 0 Just dm -> M.foldWithKey (socStep (>0)) 0 dm !inSumAll = {-# SCC "inSumAll" #-} case dm_ of Nothing -> 0 Just dm -> M.foldWithKey step 0 dm where step to !num !res = let toSoc = getSoccap ustats to in if toSoc == 0 then 0 else let toTot = M.findWithDefault 1 to tot !term = fromIntegral (num * toTot) * toSoc in res + term terms = (outSum, inSumBack, inSumAll) addMaps = M.unionWith (+) subtractMaps = M.unionWith (-) ins' = case dr_ of {Just dr -> addMaps ins dr; _ -> ins} outs' = case dm_ of {Just dm -> addMaps outs dm; _ -> outs} -- ziman: M.unionWith (+) `on` maybe M.empty id (tot', bal') = case (dr_, dm_) of (Just dr, Nothing) -> (addMaps tot dr, addMaps bal dr) (Nothing, Just dm) -> (addMaps tot dm, subtractMaps bal dm) (Just dr, Just dm) -> let t = addMaps dr $ addMaps tot dm b = addMaps dr $ subtractMaps bal dm in (t,b) !stats' = stats {insUS= ins', outsUS= outs', totUS= tot', balUS= bal'} in (Just terms, stats')
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe