Why not to call StdDev from Utils.hs in order to eliminate duplication?
On 11/02/2015 04:59 PM, 'Klaus Aehlig' via ganeti-devel wrote:
Given that we later we believe our metric up to values of 1e-12,
we at least should avoid too large inaccuracies by linearly summing
up values.
Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/Utils.hs | 15 +++++----------
src/Ganeti/Utils/Statistics.hs | 17 ++++++++---------
2 files changed, 13 insertions(+), 19 deletions(-)
diff --git a/src/Ganeti/Utils.hs b/src/Ganeti/Utils.hs
index 8db0c71..25f3843 100644
--- a/src/Ganeti/Utils.hs
+++ b/src/Ganeti/Utils.hs
@@ -234,16 +234,11 @@ balancedSum xs = let (ls, rs) = divideList xs
-- | Standard deviation function.
stdDev :: [Double] -> Double
stdDev lst =
- -- first, calculate the list length and sum lst in a single step,
- -- for performance reasons
- let (ll', sx) = foldl' (\(rl, rs) e ->
- let rl' = rl + 1
- rs' = rs + e
- in rl' `seq` rs' `seq` (rl', rs')) (0::Int, 0) lst
- ll = fromIntegral ll'::Double
- mv = sx / ll
- av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst
- in sqrt (av / ll) -- stddev
+ let len = fromIntegral $ length lst
+ mean = balancedSum lst / len
+ sqDist x = let d = x - mean in d * d
+ variance = balancedSum (map sqDist lst) / len
+ in sqrt variance
-- * Logical functions
diff --git a/src/Ganeti/Utils/Statistics.hs b/src/Ganeti/Utils/Statistics.hs
index ff91d93..776f65a 100644
--- a/src/Ganeti/Utils/Statistics.hs
+++ b/src/Ganeti/Utils/Statistics.hs
@@ -49,6 +49,8 @@ import qualified Data.Foldable as Foldable
import Data.List (foldl')
import qualified Data.Map as Map
+import Ganeti.Utils (balancedSum)
+
-- | Typeclass describing necessary statistical accumulations functions. Types
-- defining an instance of Stat behave as if the given statistics were
computed
-- on the list of values, but they allow a potentially more efficient update
of
@@ -88,15 +90,12 @@ instance Stat Double SumStat where
instance Stat Double StdDevStat where
calculate xs =
- let addComponent (n, s) x =
- let !n' = n + 1
- !s' = s + x
- in (n', s')
- (nt, st) = foldl' addComponent (0, 0) xs
- mean = st / nt
- center x = x - mean
- nvar = foldl' (\v x -> let d = center x in v + d * d) 0 xs
- in StdDevStat nt st (nvar / nt)
+ let !n = fromIntegral $ length xs
+ !sx = balancedSum xs
+ !mean = sx / n
+ sqDist x = let d = x - mean in d * d
+ !var = balancedSum (map sqDist xs) / n
+ in StdDevStat n sx var
update (StdDevStat n s var) x x' =
let !ds = x' - x
!dss = x' * x' - x * x