Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package ghc-statistics for openSUSE:Factory checked in at 2022-02-11 23:09:41 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-statistics (Old) and /work/SRC/openSUSE:Factory/.ghc-statistics.new.1956 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-statistics" Fri Feb 11 23:09:41 2022 rev:4 rq:953535 version:0.16.0.1 Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-statistics/ghc-statistics.changes 2021-09-09 23:08:00.848863684 +0200 +++ /work/SRC/openSUSE:Factory/.ghc-statistics.new.1956/ghc-statistics.changes 2022-02-11 23:11:39.851337240 +0100 @@ -1,0 +2,21 @@ +Sat Jan 8 15:39:32 UTC 2022 - Peter Simons <psim...@suse.com> + +- Update statistics to version 0.16.0.1. + ## Changes in 0.16.0.0 + + * Random number generation switched to API introduced in random-1.2 + + * Support of GHC<7.10 is dropped + + * Fix for chi-squared test (#167) which was completely wrong + + * Computation of CDF and quantiles of Cauchy distribution is now numerically + stable. + + * Fix loss of precision in computing of CDF of gamma distribution + + * Log-normal and Weibull distributions added. + + * `DiscreteGen` instance added for `DiscreteUniform` + +------------------------------------------------------------------- Old: ---- statistics-0.15.2.0.tar.gz New: ---- statistics-0.16.0.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-statistics.spec ++++++ --- /var/tmp/diff_new_pack.hr0qpO/_old 2022-02-11 23:11:40.347338675 +0100 +++ /var/tmp/diff_new_pack.hr0qpO/_new 2022-02-11 23:11:40.351338687 +0100 @@ -1,7 +1,7 @@ # # spec file for package ghc-statistics # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +19,7 @@ %global pkg_name statistics %bcond_with tests Name: ghc-%{pkg_name} -Version: 0.15.2.0 +Version: 0.16.0.1 Release: 0 Summary: A library of statistical types, data, and functions License: BSD-2-Clause @@ -28,7 +28,6 @@ BuildRequires: ghc-Cabal-devel BuildRequires: ghc-aeson-devel BuildRequires: ghc-async-devel -BuildRequires: ghc-base-orphans-devel BuildRequires: ghc-binary-devel BuildRequires: ghc-data-default-class-devel BuildRequires: ghc-deepseq-devel @@ -37,6 +36,7 @@ BuildRequires: ghc-monad-par-devel BuildRequires: ghc-mwc-random-devel BuildRequires: ghc-primitive-devel +BuildRequires: ghc-random-devel BuildRequires: ghc-rpm-macros BuildRequires: ghc-vector-algorithms-devel BuildRequires: ghc-vector-binary-instances-devel @@ -84,11 +84,10 @@ %prep %autosetup -n %{pkg_name}-%{version} +find . -type f -exec chmod -x {} + %build %ghc_lib_build -chmod a-x README.markdown changelog.md -chmod a-x examples/kde/* %install %ghc_lib_install ++++++ statistics-0.15.2.0.tar.gz -> statistics-0.16.0.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Correlation/Kendall.hs new/statistics-0.16.0.1/Statistics/Correlation/Kendall.hs --- old/statistics-0.15.2.0/Statistics/Correlation/Kendall.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Correlation/Kendall.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, CPP, FlexibleContexts #-} +{-# LANGUAGE BangPatterns, FlexibleContexts #-} -- | -- Module : Statistics.Correlation.Kendall -- @@ -131,11 +131,6 @@ wroteLow low (iLow+1) high iHigh eHigh (iIns+1) {-# INLINE merge #-} -#if !MIN_VERSION_base(4,6,0) -modifySTRef' :: STRef s a -> (a -> a) -> ST s () -modifySTRef' = modifySTRef -#endif - -- $references -- -- * William R. Knight. (1966) A computer method for calculating Kendall's Tau diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Binomial.hs new/statistics-0.16.0.1/Statistics/Distribution/Binomial.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Binomial.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/Binomial.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} -- | -- Module : Statistics.Distribution.Binomial @@ -31,7 +32,7 @@ import Data.Data (Data, Typeable) import GHC.Generics (Generic) import Numeric.SpecFunctions (choose,logChoose,incompleteBeta,log1p) -import Numeric.MathFunctions.Constants (m_epsilon) +import Numeric.MathFunctions.Constants (m_epsilon,m_tiny) import qualified Statistics.Distribution as D import qualified Statistics.Distribution.Poisson.Internal as I @@ -104,9 +105,16 @@ | n == 0 = 1 -- choose could overflow Double for n >= 1030 so we switch to -- log-domain to calculate probability - | n < 1000 = choose n k * p^k * (1-p)^(n-k) - | otherwise = exp $ logChoose n k + log p * k' + log1p (-p) * nk' + -- + -- We also want to avoid underflow when computing p^k & + -- (1-p)^(n-k). + | n < 1000 + , pK >= m_tiny + , pNK >= m_tiny = choose n k * pK * pNK + | otherwise = exp $ logChoose n k + log p * k' + log1p (-p) * nk' where + pK = p^k + pNK = (1-p)^(n-k) k' = fromIntegral k nk' = fromIntegral $ n - k diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/CauchyLorentz.hs new/statistics-0.16.0.1/Statistics/Distribution/CauchyLorentz.hs --- old/statistics-0.15.2.0/Statistics/Distribution/CauchyLorentz.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/CauchyLorentz.hs 2001-09-09 03:46:40.000000000 +0200 @@ -94,23 +94,43 @@ instance D.Distribution CauchyDistribution where - cumulative (CD m s) x = 0.5 + atan( (x - m) / s ) / pi + cumulative (CD m s) x + | y < -1 = atan (-1/y) / pi + | otherwise = 0.5 + atan y / pi + where + y = (x - m) / s + complCumulative (CD m s) x + | y > 1 = atan (1/y) / pi + | otherwise = 0.5 - atan y / pi + where + y = (x - m) / s instance D.ContDistr CauchyDistribution where density (CD m s) x = (1 / pi) / (s * (1 + y*y)) where y = (x - m) / s quantile (CD m s) p - | p > 0 && p < 1 = m + s * tan( pi * (p - 0.5) ) - | p == 0 = -1 / 0 - | p == 1 = 1 / 0 - | otherwise = - error $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p + | p == 0 = -1 / 0 + | p == 1 = 1 / 0 + | p == 0.5 = m + | p < 0 = err + | p < 0.5 = m - s / tan( pi * p ) + | p < 1 = m + s / tan( pi * (1 - p) ) + | otherwise = err + where + err = error + $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p complQuantile (CD m s) p - | p > 0 && p < 1 = m + s * tan( pi * (0.5 - p) ) - | p == 0 = 1 / 0 - | p == 1 = -1 / 0 - | otherwise = - error $ "Statistics.Distribution.CauchyLorentz.complQuantile: p must be in [0,1] range. Got: "++show p + | p == 0 = 1 / 0 + | p == 1 = -1 / 0 + | p == 0.5 = m + | p < 0 = err + | p < 0.5 = m + s / tan( pi * p ) + | p < 1 = m - s / tan( pi * (1 - p) ) + | otherwise = err + where + err = error + $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p + instance D.ContGen CauchyDistribution where genContVar = D.genContinuous diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/DiscreteUniform.hs new/statistics-0.16.0.1/Statistics/Distribution/DiscreteUniform.hs --- old/statistics-0.15.2.0/Statistics/Distribution/DiscreteUniform.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/DiscreteUniform.hs 2001-09-09 03:46:40.000000000 +0200 @@ -28,10 +28,11 @@ , rangeTo ) where -import Control.Applicative ((<$>), (<*>), empty) +import Control.Applicative (empty) import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) import Data.Binary (Binary(..)) import Data.Data (Data, Typeable) +import System.Random.Stateful (uniformRM) import GHC.Generics (Generic) import qualified Statistics.Distribution as D @@ -94,6 +95,12 @@ instance D.MaybeEntropy DiscreteUniform where maybeEntropy = Just . D.entropy +instance D.ContGen DiscreteUniform where + genContVar d = fmap fromIntegral . D.genDiscreteVar d + +instance D.DiscreteGen DiscreteUniform where + genDiscreteVar (U a b) = uniformRM (a,b) + -- | Construct discrete uniform distribution on support {1, ..., n}. -- Range /n/ must be >0. discreteUniform :: Int -- ^ Range diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/FDistribution.hs new/statistics-0.16.0.1/Statistics/Distribution/FDistribution.hs --- old/statistics-0.15.2.0/Statistics/Distribution/FDistribution.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/FDistribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -109,15 +109,36 @@ cumulative :: FDistribution -> Double -> Double cumulative (F n m _) x | x <= 0 = 0 - | isInfinite x = 1 -- Only matches +??? - | otherwise = let y = n*x in incompleteBeta (0.5 * n) (0.5 * m) (y / (m + y)) + -- Only matches +??? + | isInfinite x = 1 + -- NOTE: Here we rely on implementation detail of incompleteBeta. It + -- computes using series expansion for sufficiently small x + -- and uses following identity otherwise: + -- + -- I(x; a, b) = 1 - I(1-x; b, a) + -- + -- Point is we can compute 1-x as m/(m+y) without loss of + -- precision for large x. Sadly this switchover point is + -- implementation detail. + | n >= (n+m)*bx = incompleteBeta (0.5 * n) (0.5 * m) bx + | otherwise = 1 - incompleteBeta (0.5 * m) (0.5 * n) bx1 + where + y = n * x + bx = y / (m + y) + bx1 = m / (m + y) complCumulative :: FDistribution -> Double -> Double complCumulative (F n m _) x - | x <= 0 = 1 - | isInfinite x = 0 -- Only matches +??? - | otherwise = let y = n*x - in incompleteBeta (0.5 * m) (0.5 * n) (m / (m + y)) + | x <= 0 = 1 + -- Only matches +??? + | isInfinite x = 0 + -- See NOTE at cumulative + | m >= (n+m)*bx = incompleteBeta (0.5 * m) (0.5 * n) bx + | otherwise = 1 - incompleteBeta (0.5 * n) (0.5 * m) bx1 + where + y = n*x + bx = m / (m + y) + bx1 = y / (m + y) logDensity :: FDistribution -> Double -> Double logDensity (F n m fac) x diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Gamma.hs new/statistics-0.16.0.1/Statistics/Distribution/Gamma.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Gamma.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/Gamma.hs 2001-09-09 03:46:40.000000000 +0200 @@ -36,6 +36,7 @@ import Numeric.MathFunctions.Constants (m_pos_inf, m_NaN, m_neg_inf) import Numeric.SpecFunctions (incompleteGamma, invIncompleteGamma, logGamma, digamma) import qualified System.Random.MWC.Distributions as MWC +import qualified Numeric.Sum as Sum import Statistics.Distribution.Poisson.Internal as Poisson import qualified Statistics.Distribution as D @@ -126,7 +127,11 @@ density = density logDensity (GD k theta) x | x <= 0 = m_neg_inf - | otherwise = log x * (k - 1) - (x / theta) - logGamma k - log theta * k + | otherwise = Sum.sum Sum.kbn [ log x * (k - 1) + , - (x / theta) + , - logGamma k + , - log theta * k + ] quantile = quantile instance D.Variance GammaDistribution where diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Lognormal.hs new/statistics-0.16.0.1/Statistics/Distribution/Lognormal.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Lognormal.hs 1970-01-01 01:00:00.000000000 +0100 +++ new/statistics-0.16.0.1/Statistics/Distribution/Lognormal.hs 2001-09-09 03:46:40.000000000 +0200 @@ -0,0 +1,172 @@ +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} +-- | +-- Module : Statistics.Distribution.Lognormal +-- Copyright : (c) 2020 Ximin Luo +-- License : BSD3 +-- +-- Maintainer : infini...@pwned.gg +-- Stability : experimental +-- Portability : portable +-- +-- The log normal distribution. This is a continuous probability +-- distribution that describes data whose log is clustered around a +-- mean. For example, the multiplicative product of many independent +-- positive random variables. + +module Statistics.Distribution.Lognormal + ( + LognormalDistribution + -- * Constructors + , lognormalDistr + , lognormalDistrErr + , lognormalDistrMeanStddevErr + , lognormalStandard + ) where + +import Data.Aeson (FromJSON, ToJSON) +import Data.Binary (Binary (..)) +import Data.Data (Data, Typeable) +import GHC.Generics (Generic) +import Numeric.MathFunctions.Constants (m_huge, m_sqrt_2_pi) +import Numeric.SpecFunctions (expm1, log1p) +import qualified Data.Vector.Generic as G + +import qualified Statistics.Distribution as D +import qualified Statistics.Distribution.Normal as N +import Statistics.Internal + + +-- | The lognormal distribution. +newtype LognormalDistribution = LND N.NormalDistribution + deriving (Eq, Typeable, Data, Generic) + +instance Show LognormalDistribution where + showsPrec i (LND d) = defaultShow2 "lognormalDistr" m s i + where + m = D.mean d + s = D.stdDev d +instance Read LognormalDistribution where + readPrec = defaultReadPrecM2 "lognormalDistr" $ + (either (const Nothing) Just .) . lognormalDistrErr + +instance ToJSON LognormalDistribution +instance FromJSON LognormalDistribution + +instance Binary LognormalDistribution where + put (LND d) = put m >> put s + where + m = D.mean d + s = D.stdDev d + get = do + m <- get + sd <- get + either fail return $ lognormalDistrErr m sd + +instance D.Distribution LognormalDistribution where + cumulative = cumulative + complCumulative = complCumulative + +instance D.ContDistr LognormalDistribution where + logDensity = logDensity + quantile = quantile + complQuantile = complQuantile + +instance D.MaybeMean LognormalDistribution where + maybeMean = Just . D.mean + +instance D.Mean LognormalDistribution where + mean (LND d) = exp (m + v / 2) + where + m = D.mean d + v = D.variance d + +instance D.MaybeVariance LognormalDistribution where + maybeStdDev = Just . D.stdDev + maybeVariance = Just . D.variance + +instance D.Variance LognormalDistribution where + variance (LND d) = expm1 v * exp (2 * m + v) + where + m = D.mean d + v = D.variance d + +instance D.Entropy LognormalDistribution where + entropy (LND d) = logBase 2 (s * exp (m + 0.5) * m_sqrt_2_pi) + where + m = D.mean d + s = D.stdDev d + +instance D.MaybeEntropy LognormalDistribution where + maybeEntropy = Just . D.entropy + +instance D.ContGen LognormalDistribution where + genContVar d = D.genContinuous d + +-- | Standard log normal distribution with mu 0 and sigma 1. +-- +-- Mean is @sqrt e@ and variance is @(e - 1) * e@. +lognormalStandard :: LognormalDistribution +lognormalStandard = LND N.standard + +-- | Create log normal distribution from parameters. +lognormalDistr + :: Double -- ^ Mu + -> Double -- ^ Sigma + -> LognormalDistribution +lognormalDistr mu sig = either error id $ lognormalDistrErr mu sig + +-- | Create log normal distribution from parameters. +lognormalDistrErr + :: Double -- ^ Mu + -> Double -- ^ Sigma + -> Either String LognormalDistribution +lognormalDistrErr mu sig + | sig >= sqrt (log m_huge - 2 * mu) = Left $ errMsg mu sig + | otherwise = LND <$> N.normalDistrErr mu sig + +errMsg :: Double -> Double -> String +errMsg mu sig = + "Statistics.Distribution.Lognormal.lognormalDistr: sigma must be > 0 && < " + ++ show lim ++ ". Got " ++ show sig + where lim = sqrt (log m_huge - 2 * mu) + +-- | Create log normal distribution from mean and standard deviation. +lognormalDistrMeanStddevErr + :: Double -- ^ Mu + -> Double -- ^ Sigma + -> Either String LognormalDistribution +lognormalDistrMeanStddevErr m sd = LND <$> N.normalDistrErr mu sig + where r = sd / m + sig2 = log1p (r * r) + sig = sqrt sig2 + mu = log m - sig2 / 2 + +-- | Variance is estimated using maximum likelihood method +-- (biased estimation) over the log of the data. +-- +-- Returns @Nothing@ if sample contains less than one element or +-- variance is zero (all elements are equal) +instance D.FromSample LognormalDistribution Double where + fromSample = fmap LND . D.fromSample . G.map log + +logDensity :: LognormalDistribution -> Double -> Double +logDensity (LND d) x + | x > 0 = let lx = log x in D.logDensity d lx - lx + | otherwise = 0 + +cumulative :: LognormalDistribution -> Double -> Double +cumulative (LND d) x + | x > 0 = D.cumulative d $ log x + | otherwise = 0 + +complCumulative :: LognormalDistribution -> Double -> Double +complCumulative (LND d) x + | x > 0 = D.complCumulative d $ log x + | otherwise = 1 + +quantile :: LognormalDistribution -> Double -> Double +quantile (LND d) = exp . D.quantile d + +complQuantile :: LognormalDistribution -> Double -> Double +complQuantile (LND d) = exp . D.complQuantile d diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Normal.hs new/statistics-0.16.0.1/Statistics/Distribution/Normal.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Normal.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/Normal.hs 2001-09-09 03:46:40.000000000 +0200 @@ -19,6 +19,7 @@ -- * Constructors , normalDistr , normalDistrE + , normalDistrErr , standard ) where @@ -55,7 +56,7 @@ parseJSON (Object v) = do m <- v .: "mean" sd <- v .: "stdDev" - maybe (fail $ errMsg m sd) return $ normalDistrE m sd + either fail return $ normalDistrErr m sd parseJSON _ = empty instance Binary NormalDistribution where @@ -63,7 +64,7 @@ get = do m <- get sd <- get - maybe (fail $ errMsg m sd) return $ normalDistrE m sd + either fail return $ normalDistrErr m sd instance D.Distribution NormalDistribution where cumulative = cumulative @@ -111,7 +112,7 @@ normalDistr :: Double -- ^ Mean of distribution -> Double -- ^ Standard deviation of distribution -> NormalDistribution -normalDistr m sd = maybe (error $ errMsg m sd) id $ normalDistrE m sd +normalDistr m sd = either error id $ normalDistrErr m sd -- | Create normal distribution from parameters. -- @@ -120,13 +121,20 @@ normalDistrE :: Double -- ^ Mean of distribution -> Double -- ^ Standard deviation of distribution -> Maybe NormalDistribution -normalDistrE m sd - | sd > 0 = Just ND { mean = m - , stdDev = sd - , ndPdfDenom = log $ m_sqrt_2_pi * sd - , ndCdfDenom = m_sqrt_2 * sd - } - | otherwise = Nothing +normalDistrE m sd = either (const Nothing) Just $ normalDistrErr m sd + +-- | Create normal distribution from parameters. +-- +normalDistrErr :: Double -- ^ Mean of distribution + -> Double -- ^ Standard deviation of distribution + -> Either String NormalDistribution +normalDistrErr m sd + | sd > 0 = Right $ ND { mean = m + , stdDev = sd + , ndPdfDenom = log $ m_sqrt_2_pi * sd + , ndCdfDenom = m_sqrt_2 * sd + } + | otherwise = Left $ errMsg m sd errMsg :: Double -> Double -> String errMsg _ sd = "Statistics.Distribution.Normal.normalDistr: standard deviation must be positive. Got " ++ show sd diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Transform.hs new/statistics-0.16.0.1/Statistics/Distribution/Transform.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Transform.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/Transform.hs 2001-09-09 03:46:40.000000000 +0200 @@ -18,11 +18,9 @@ ) where import Data.Aeson (FromJSON, ToJSON) -import Control.Applicative ((<*>)) import Data.Binary (Binary) import Data.Binary (put, get) import Data.Data (Data, Typeable) -import Data.Functor ((<$>)) import GHC.Generics (Generic) import qualified Statistics.Distribution as D diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Uniform.hs new/statistics-0.16.0.1/Statistics/Distribution/Uniform.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Uniform.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution/Uniform.hs 2001-09-09 03:46:40.000000000 +0200 @@ -22,11 +22,11 @@ ) where import Control.Applicative -import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) -import Data.Binary (Binary(..)) -import Data.Data (Data, Typeable) -import GHC.Generics (Generic) -import qualified System.Random.MWC as MWC +import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) +import Data.Binary (Binary(..)) +import Data.Data (Data, Typeable) +import System.Random.Stateful (uniformRM) +import GHC.Generics (Generic) import qualified Statistics.Distribution as D import Statistics.Internal @@ -117,4 +117,4 @@ maybeEntropy = Just . D.entropy instance D.ContGen UniformDistribution where - genContVar (UniformDistribution a b) gen = MWC.uniformR (a,b) gen + genContVar (UniformDistribution a b) = uniformRM (a,b) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution/Weibull.hs new/statistics-0.16.0.1/Statistics/Distribution/Weibull.hs --- old/statistics-0.15.2.0/Statistics/Distribution/Weibull.hs 1970-01-01 01:00:00.000000000 +0100 +++ new/statistics-0.16.0.1/Statistics/Distribution/Weibull.hs 2001-09-09 03:46:40.000000000 +0200 @@ -0,0 +1,224 @@ +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} +-- | +-- Module : Statistics.Distribution.Lognormal +-- Copyright : (c) 2020 Ximin Luo +-- License : BSD3 +-- +-- Maintainer : infini...@pwned.gg +-- Stability : experimental +-- Portability : portable +-- +-- The weibull distribution. This is a continuous probability +-- distribution that describes the occurrence of a single event whose +-- probability changes over time, controlled by the shape parameter. + +module Statistics.Distribution.Weibull + ( + WeibullDistribution + -- * Constructors + , weibullDistr + , weibullDistrErr + , weibullStandard + , weibullDistrApproxMeanStddevErr + ) where + +import Control.Applicative +import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) +import Data.Binary (Binary(..)) +import Data.Data (Data, Typeable) +import GHC.Generics (Generic) +import Numeric.MathFunctions.Constants (m_eulerMascheroni) +import Numeric.SpecFunctions (expm1, log1p, logGamma) +import qualified Data.Vector.Generic as G + +import qualified Statistics.Distribution as D +import qualified Statistics.Sample as S +import Statistics.Internal + + +-- | The weibull distribution. +data WeibullDistribution = WD { + wdShape :: {-# UNPACK #-} !Double + , wdLambda :: {-# UNPACK #-} !Double + } deriving (Eq, Typeable, Data, Generic) + +instance Show WeibullDistribution where + showsPrec i (WD k l) = defaultShow2 "weibullDistr" k l i +instance Read WeibullDistribution where + readPrec = defaultReadPrecM2 "weibullDistr" $ + (either (const Nothing) Just .) . weibullDistrErr + +instance ToJSON WeibullDistribution +instance FromJSON WeibullDistribution where + parseJSON (Object v) = do + k <- v .: "wdShape" + l <- v .: "wdLambda" + either fail return $ weibullDistrErr k l + parseJSON _ = empty + +instance Binary WeibullDistribution where + put (WD k l) = put k >> put l + get = do + k <- get + l <- get + either fail return $ weibullDistrErr k l + +instance D.Distribution WeibullDistribution where + cumulative = cumulative + complCumulative = complCumulative + +instance D.ContDistr WeibullDistribution where + logDensity = logDensity + quantile = quantile + complQuantile = complQuantile + +instance D.MaybeMean WeibullDistribution where + maybeMean = Just . D.mean + +instance D.Mean WeibullDistribution where + mean (WD k l) = l * exp (logGamma (1 + 1 / k)) + +instance D.MaybeVariance WeibullDistribution where + maybeStdDev = Just . D.stdDev + maybeVariance = Just . D.variance + +instance D.Variance WeibullDistribution where + variance (WD k l) = l * l * (exp (logGamma (1 + 2 * invk)) - q * q) + where + invk = 1 / k + q = exp (logGamma (1 + invk)) + +instance D.Entropy WeibullDistribution where + entropy (WD k l) = m_eulerMascheroni * (1 - 1 / k) + log (l / k) + 1 + +instance D.MaybeEntropy WeibullDistribution where + maybeEntropy = Just . D.entropy + +instance D.ContGen WeibullDistribution where + genContVar d = D.genContinuous d + +-- | Standard weibull distribution with scale factor (lambda) 1. +weibullStandard :: Double -> WeibullDistribution +weibullStandard k = weibullDistr k 1.0 + +-- | Create weibull distribution from parameters. +-- +-- If the shape (first) parameter is @1.0@, the distribution is equivalent to a +-- 'Statistics.Distribution.Exponential.ExponentialDistribution' with parameter +-- @1 / lambda@ the scale (second) parameter. +weibullDistr + :: Double -- ^ Shape + -> Double -- ^ Lambda (scale) + -> WeibullDistribution +weibullDistr k l = either error id $ weibullDistrErr k l + +-- | Create weibull distribution from parameters. +-- +-- If the shape (first) parameter is @1.0@, the distribution is equivalent to a +-- 'Statistics.Distribution.Exponential.ExponentialDistribution' with parameter +-- @1 / lambda@ the scale (second) parameter. +weibullDistrErr + :: Double -- ^ Shape + -> Double -- ^ Lambda (scale) + -> Either String WeibullDistribution +weibullDistrErr k l | k <= 0 = Left $ errMsg k l + | l <= 0 = Left $ errMsg k l + | otherwise = Right $ WD k l + +errMsg :: Double -> Double -> String +errMsg k l = + "Statistics.Distribution.Weibull.weibullDistr: both shape and lambda must be positive. Got shape " + ++ show k + ++ " and lambda " + ++ show l + +-- | Create weibull distribution from mean and standard deviation. +-- +-- The algorithm is from "Methods for Estimating Wind Speed Frequency +-- Distributions", C. G. Justus, W. R. Hargreaves, A. Mikhail, D. Graber, 1977. +-- Given the identity: +-- +-- \[ +-- (\frac{\sigma}{\mu})^2 = \frac{\Gamma(1+2/k)}{\Gamma(1+1/k)^2} - 1 +-- \] +-- +-- \(k\) can be approximated by +-- +-- \[ +-- k \approx (\frac{\sigma}{\mu})^{-1.086} +-- \] +-- +-- \(\lambda\) is then calculated straightforwardly via the identity +-- +-- \[ +-- \lambda = \frac{\mu}{\Gamma(1+1/k)} +-- \] +-- +-- Numerically speaking, the approximation for \(k\) is accurate only within a +-- certain range. We arbitrarily pick the range \(0.033 \le \frac{\sigma}{\mu} \le 1.45\) +-- where it is good to ~6%, and will refuse to create a distribution outside of +-- this range. The paper does not cover these details but it is straightforward +-- to check them numerically. +weibullDistrApproxMeanStddevErr + :: Double -- ^ Mean + -> Double -- ^ Stddev + -> Either String WeibullDistribution +weibullDistrApproxMeanStddevErr m s = if r > 1.45 || r < 0.033 + then Left msg + else weibullDistrErr k l + where r = s / m + k = (s / m) ** (-1.086) + l = m / exp (logGamma (1 + 1/k)) + msg = "Statistics.Distribution.Weibull.weibullDistr: stddev-mean ratio " + ++ "outside approximation accuracy range [0.033, 1.45]. Got " + ++ "stddev " ++ show s ++ " and mean " ++ show m + +-- | Uses an approximation based on the mean and standard deviation in +-- 'weibullDistrEstMeanStddevErr', with standard deviation estimated +-- using maximum likelihood method (unbiased estimation). +-- +-- Returns @Nothing@ if sample contains less than one element or +-- variance is zero (all elements are equal), or if the estimated mean +-- and standard-deviation lies outside the range for which the +-- approximation is accurate. +instance D.FromSample WeibullDistribution Double where + fromSample xs + | G.length xs <= 1 = Nothing + | v == 0 = Nothing + | otherwise = either (const Nothing) Just $ + weibullDistrApproxMeanStddevErr m (sqrt v) + where + (m,v) = S.meanVarianceUnb xs + +logDensity :: WeibullDistribution -> Double -> Double +logDensity (WD k l) x + | x < 0 = 0 + | otherwise = log k + (k - 1) * log x - k * log l - (x / l) ** k + +cumulative :: WeibullDistribution -> Double -> Double +cumulative (WD k l) x | x < 0 = 0 + | otherwise = -expm1 (-(x / l) ** k) + +complCumulative :: WeibullDistribution -> Double -> Double +complCumulative (WD k l) x | x < 0 = 1 + | otherwise = exp (-(x / l) ** k) + +quantile :: WeibullDistribution -> Double -> Double +quantile (WD k l) p + | p == 0 = 0 + | p == 1 = inf + | p > 0 && p < 1 = l * (-log1p (-p)) ** (1 / k) + | otherwise = + error $ "Statistics.Distribution.Weibull.quantile: p must be in [0,1] range. Got: " ++ show p + where inf = 1 / 0 + +complQuantile :: WeibullDistribution -> Double -> Double +complQuantile (WD k l) q + | q == 0 = inf + | q == 1 = 0 + | q > 0 && q < 1 = l * (-log q) ** (1 / k) + | otherwise = + error $ "Statistics.Distribution.Weibull.complQuantile: q must be in [0,1] range. Got: " ++ show q + where inf = 1 / 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Distribution.hs new/statistics-0.16.0.1/Statistics/Distribution.hs --- old/statistics-0.15.2.0/Statistics/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -29,18 +29,15 @@ , ContGen(..) , DiscreteGen(..) , genContinuous - , genContinous -- * Helper functions , findRoot , sumProbabilities ) where -import Control.Applicative ((<$>), Applicative(..)) -import Control.Monad.Primitive (PrimMonad,PrimState) import Prelude hiding (sum) -import Statistics.Function (square) +import Statistics.Function (square) import Statistics.Sample.Internal (sum) -import System.Random.MWC (Gen, uniform) +import System.Random.Stateful (StatefulGen, uniformDouble01M) import qualified Data.Vector.Unboxed as U import qualified Data.Vector.Generic as G @@ -56,7 +53,7 @@ -- > cumulative d +??? = 1 -- > cumulative d -??? = 0 cumulative :: d -> Double -> Double - + cumulative d x = 1 - complCumulative d x -- | One's complement of cumulative distribution: -- -- > complCumulative d x = 1 - cumulative d x @@ -67,17 +64,18 @@ -- encouraged to provide more precise implementation. complCumulative :: d -> Double -> Double complCumulative d x = 1 - cumulative d x + {-# MINIMAL (cumulative | complCumulative) #-} + -- | Discrete probability distribution. class Distribution d => DiscreteDistr d where -- | Probability of n-th outcome. probability :: d -> Int -> Double probability d = exp . logProbability d - -- | Logarithm of probability of n-th outcome logProbability :: d -> Int -> Double logProbability d = log . probability d - + {-# MINIMAL (probability | logProbability) #-} -- | Continuous probability distribution. -- @@ -89,22 +87,20 @@ -- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/ density :: d -> Double -> Double density d = exp . logDensity d - + -- | Natural logarithm of density. + logDensity :: d -> Double -> Double + logDensity d = log . density d -- | Inverse of the cumulative distribution function. The value -- /x/ for which P(/X/≤/x/) = /p/. If probability is outside -- of [0,1] range function should call 'error' quantile :: d -> Double -> Double - + quantile d x = complQuantile d (1 - x) -- | 1-complement of @quantile@: -- -- > complQuantile x ??? quantile (1 - x) complQuantile :: d -> Double -> Double complQuantile d x = quantile d (1 - x) - - -- | Natural logarithm of density. - logDensity :: d -> Double -> Double - logDensity d = log . density d - + {-# MINIMAL (density | logDensity), (quantile | complQuantile) #-} -- | Type class for distributions with mean. 'maybeMean' should return -- 'Nothing' if it's undefined for current value of data @@ -126,9 +122,10 @@ -- Minimal complete definition is 'maybeVariance' or 'maybeStdDev' class MaybeMean d => MaybeVariance d where maybeVariance :: d -> Maybe Double - maybeVariance d = (*) <$> x <*> x where x = maybeStdDev d + maybeVariance = fmap square . maybeStdDev maybeStdDev :: d -> Maybe Double - maybeStdDev = fmap sqrt . maybeVariance + maybeStdDev = fmap sqrt . maybeVariance + {-# MINIMAL (maybeVariance | maybeStdDev) #-} -- | Type class for distributions with variance. If distribution have -- finite variance for all valid parameter values it should be @@ -140,6 +137,8 @@ variance d = square (stdDev d) stdDev :: d -> Double stdDev = sqrt . variance + {-# MINIMAL (variance | stdDev) #-} + -- | Type class for distributions with entropy, meaning Shannon entropy -- in the case of a discrete distribution, or differential entropy in the @@ -161,13 +160,13 @@ -- | Generate discrete random variates which have given -- distribution. class Distribution d => ContGen d where - genContVar :: PrimMonad m => d -> Gen (PrimState m) -> m Double + genContVar :: (StatefulGen g m) => d -> g -> m Double -- | Generate discrete random variates which have given -- distribution. 'ContGen' is superclass because it's always possible -- to generate real-valued variates from integer values class (DiscreteDistr d, ContGen d) => DiscreteGen d where - genDiscreteVar :: PrimMonad m => d -> Gen (PrimState m) -> m Int + genDiscreteVar :: (StatefulGen g m) => d -> g -> m Int -- | Estimate distribution from sample. First parameter in sample is -- distribution type and second is element type. @@ -181,16 +180,11 @@ -- | Generate variates from continuous distribution using inverse -- transform rule. -genContinuous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double +genContinuous :: (ContDistr d, StatefulGen g m) => d -> g -> m Double genContinuous d gen = do - x <- uniform gen + x <- uniformDouble01M gen return $! quantile d x --- | Backwards compatibility with genContinuous. -genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -genContinous = genContinuous -{-# DEPRECATED genContinous "Use genContinuous" #-} - data P = P {-# UNPACK #-} !Double {-# UNPACK #-} !Double -- | Approximate the value of /X/ for which P(/x/>/X/)=/p/. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Internal.hs new/statistics-0.16.0.1/Statistics/Internal.hs --- old/statistics-0.15.2.0/Statistics/Internal.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Internal.hs 2001-09-09 03:46:40.000000000 +0200 @@ -25,8 +25,6 @@ import Control.Applicative import Control.Monad import Text.Read -import Data.Orphans () - ---------------------------------------------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Quantile.hs new/statistics-0.16.0.1/Statistics/Quantile.hs --- old/statistics-0.15.2.0/Statistics/Quantile.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Quantile.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} @@ -55,7 +54,6 @@ import Data.Aeson (ToJSON,FromJSON) import Data.Data (Data,Typeable) import Data.Default.Class -import Data.Functor import qualified Data.Foldable as F import Data.Vector.Generic ((!)) import qualified Data.Vector as V @@ -196,7 +194,7 @@ | F.any (badQ nQ) qs = modErr "quantiles" "Wrong quantile number" | G.any isNaN xs = modErr "quantiles" "Sample contains NaNs" -- Doesn't matter what we put into empty container - | fnull qs = 0 <$ qs + | null qs = 0 <$ qs | otherwise = fmap (estimateQuantile sortedXs) ks' where ks' = fmap (\q -> toPk param n q nQ) qs @@ -210,14 +208,6 @@ {-# SPECIALIZE quantiles :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> S.Vector Double -> f Double #-} --- COMPAT -fnull :: F.Foldable f => f a -> Bool -#if !MIN_VERSION_base(4,8,0) -fnull = F.foldr (\_ _ -> False) True -#else -fnull = null -#endif - -- | O(/k??n/??log /n/). Same as quantiles but uses 'G.Vector' container -- instead of 'Foldable' one. quantilesVec :: (G.Vector v Double, G.Vector v Int) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Regression.hs new/statistics-0.16.0.1/Statistics/Regression.hs --- old/statistics-0.15.2.0/Statistics/Regression.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Regression.hs 2001-09-09 03:46:40.000000000 +0200 @@ -13,7 +13,6 @@ , bootstrapRegress ) where -import Control.Applicative ((<$>)) import Control.Concurrent.Async (forConcurrently) import Control.DeepSeq (rnf) import Control.Monad (when) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Resampling.hs new/statistics-0.16.0.1/Statistics/Resampling.hs --- old/statistics-0.15.2.0/Statistics/Resampling.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Resampling.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,5 +1,4 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} @@ -40,7 +39,6 @@ ) where import Data.Aeson (FromJSON, ToJSON) -import Control.Applicative import Control.Concurrent.Async (forConcurrently_) import Control.Monad (forM_, forM, replicateM, liftM2) import Control.Monad.Primitive (PrimMonad(..)) @@ -88,9 +86,7 @@ , resamples :: v a } deriving (Eq, Read, Show , Generic, Functor, T.Foldable, T.Traversable -#if __GLASGOW_HASKELL__ >= 708 , Typeable, Data -#endif ) instance (Binary a, Binary (v a)) => Binary (Bootstrap v a) where diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Sample/Histogram.hs new/statistics-0.16.0.1/Statistics/Sample/Histogram.hs --- old/statistics-0.15.2.0/Statistics/Sample/Histogram.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Sample/Histogram.hs 2001-09-09 03:46:40.000000000 +0200 @@ -75,7 +75,7 @@ go (i+1) write' bins' b !e = GM.write bins' b e len = G.length xs - d = ((hi - lo) * (1 + realToFrac m_epsilon)) / fromIntegral numBins + d = ((hi - lo) / fromIntegral numBins) * (1 + realToFrac m_epsilon) {-# INLINE histogram_ #-} -- | /O(n)/ Compute decent defaults for the lower and upper bounds of diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Test/ChiSquared.hs new/statistics-0.16.0.1/Statistics/Test/ChiSquared.hs --- old/statistics-0.15.2.0/Statistics/Test/ChiSquared.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Test/ChiSquared.hs 2001-09-09 03:46:40.000000000 +0200 @@ -39,7 +39,7 @@ | n > 0 = Just Test { testSignificance = mkPValue $ complCumulative d chi2 , testStatistics = chi2 - , testDistribution = chiSquared ndf + , testDistribution = chiSquared n } | otherwise = Nothing where @@ -66,7 +66,7 @@ | n > 0 = Just Test { testSignificance = mkPValue $ complCumulative d chi2 , testStatistics = chi2 - , testDistribution = chiSquared ndf + , testDistribution = chiSquared n } | otherwise = Nothing where diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Test/KruskalWallis.hs new/statistics-0.16.0.1/Statistics/Test/KruskalWallis.hs --- old/statistics-0.15.2.0/Statistics/Test/KruskalWallis.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Test/KruskalWallis.hs 2001-09-09 03:46:40.000000000 +0200 @@ -17,7 +17,6 @@ ) where import Data.Ord (comparing) -import Data.Foldable (foldMap) import qualified Data.Vector.Unboxed as U import Statistics.Function (sort, sortBy, square) import Statistics.Distribution (complCumulative) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Test/MannWhitneyU.hs new/statistics-0.16.0.1/Statistics/Test/MannWhitneyU.hs --- old/statistics-0.15.2.0/Statistics/Test/MannWhitneyU.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Test/MannWhitneyU.hs 2001-09-09 03:46:40.000000000 +0200 @@ -24,7 +24,6 @@ -- $references ) where -import Control.Applicative ((<$>)) import Data.List (findIndex) import Data.Ord (comparing) import Numeric.SpecFunctions (choose) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Test/WilcoxonT.hs new/statistics-0.16.0.1/Statistics/Test/WilcoxonT.hs --- old/statistics-0.15.2.0/Statistics/Test/WilcoxonT.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Test/WilcoxonT.hs 2001-09-09 03:46:40.000000000 +0200 @@ -39,7 +39,6 @@ -- the sum of negative ranks (the ranks of the differences where the second parameter is higher). -- to the length of the shorter sample. -import Control.Applicative ((<$>)) import Data.Function (on) import Data.List (findIndex) import Data.Ord (comparing) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/Statistics/Types.hs new/statistics-0.16.0.1/Statistics/Types.hs --- old/statistics-0.15.2.0/Statistics/Types.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/Statistics/Types.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeFamilies #-} @@ -72,12 +71,6 @@ import Data.Vector.Unboxed (Unbox) import Data.Vector.Unboxed.Deriving (derivingUnbox) import GHC.Generics (Generic) - -#if __GLASGOW_HASKELL__ == 704 -import qualified Data.Vector.Generic -import qualified Data.Vector.Generic.Mutable -#endif - import Statistics.Internal import Statistics.Types.Internal import Statistics.Distribution @@ -324,9 +317,7 @@ , estError :: !(e a) -- ^ Confidence interval for estimate. } deriving (Eq, Read, Show, Generic -#if __GLASGOW_HASKELL__ >= 708 , Typeable, Data -#endif ) instance (Binary (e a), Binary a) => Binary (Estimate e a) where diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/changelog.md new/statistics-0.16.0.1/changelog.md --- old/statistics-0.15.2.0/changelog.md 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/changelog.md 2001-09-09 03:46:40.000000000 +0200 @@ -1,3 +1,21 @@ +## Changes in 0.16.0.0 + + * Random number generation switched to API introduced in random-1.2 + + * Support of GHC<7.10 is dropped + + * Fix for chi-squared test (#167) which was completely wrong + + * Computation of CDF and quantiles of Cauchy distribution is now numerically + stable. + + * Fix loss of precision in computing of CDF of gamma distribution + + * Log-normal and Weibull distributions added. + + * `DiscreteGen` instance added for `DiscreteUniform` + + ## Changes in 0.15.2.0 * Test suite is finally fixed (#42, #123). It took very-very-very long diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/statistics.cabal new/statistics-0.16.0.1/statistics.cabal --- old/statistics-0.15.2.0/statistics.cabal 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/statistics.cabal 2001-09-09 03:46:40.000000000 +0200 @@ -1,5 +1,5 @@ name: statistics -version: 0.15.2.0 +version: 0.16.0.1 synopsis: A library of statistical types, data, and functions description: This library provides a number of common functions and types useful @@ -24,14 +24,14 @@ license: BSD2 license-file: LICENSE -homepage: https://github.com/bos/statistics -bug-reports: https://github.com/bos/statistics/issues +homepage: https://github.com/haskell/statistics +bug-reports: https://github.com/haskell/statistics/issues author: Bryan O'Sullivan <b...@serpentine.com>, Alexey Khudaykov <alexey.sklad...@gmail.com> -maintainer: Bryan O'Sullivan <b...@serpentine.com>, Alexey Khudaykov <alexey.sklad...@gmail.com> +maintainer: Alexey Khudaykov <alexey.sklad...@gmail.com> copyright: 2009-2014 Bryan O'Sullivan category: Math, Statistics build-type: Simple -cabal-version: >= 1.8 +cabal-version: >= 1.10 extra-source-files: README.markdown benchmark/bench.hs @@ -46,19 +46,17 @@ tests/utils/fftw.c tested-with: - GHC ==7.4.2 - || ==7.6.3 - || ==7.8.4 - || ==7.10.3 - || ==8.0.2 + GHC ==8.0.2 || ==8.2.2 || ==8.4.4 || ==8.6.5 - || ==8.8.1 - , GHCJS ==8.4 - + || ==8.8.4 + || ==8.10.7 + || ==9.0.1 + || ==9.2.1 library + default-language: Haskell2010 exposed-modules: Statistics.Autocorrelation Statistics.ConfidenceInt @@ -76,11 +74,13 @@ Statistics.Distribution.Geometric Statistics.Distribution.Hypergeometric Statistics.Distribution.Laplace + Statistics.Distribution.Lognormal Statistics.Distribution.Normal Statistics.Distribution.Poisson Statistics.Distribution.StudentT Statistics.Distribution.Transform Statistics.Distribution.Uniform + Statistics.Distribution.Weibull Statistics.Function Statistics.Quantile Statistics.Regression @@ -108,11 +108,11 @@ Statistics.Internal Statistics.Test.Internal Statistics.Types.Internal - build-depends: base >= 4.5 && < 5 - , base-orphans >= 0.6 && <0.9 + build-depends: base >= 4.9 && < 5 -- - , math-functions >= 0.3 - , mwc-random >= 0.13.0.0 + , math-functions >= 0.3.4.1 + , mwc-random >= 0.15.0.0 + , random >= 1.2 -- , aeson >= 0.6.0.0 , async >= 2.2.2 && <2.3 @@ -134,7 +134,8 @@ ghc-prim ghc-options: -O2 -Wall -fwarn-tabs -funbox-strict-fields -test-suite tests +test-suite statistics-tests + default-language: Haskell2010 type: exitcode-stdio-1.0 hs-source-dirs: tests main-is: tests.hs @@ -165,7 +166,6 @@ , aeson , ieee754 >= 0.7.3 , math-functions - , mwc-random , primitive , tasty , tasty-hunit @@ -176,4 +176,4 @@ source-repository head type: git - location: https://github.com/bos/statistics + location: https://github.com/haskell/statistics diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/tests/Tests/Distribution.hs new/statistics-0.16.0.1/tests/Tests/Distribution.hs --- old/statistics-0.15.2.0/tests/Tests/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/tests/Tests/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -2,10 +2,10 @@ ViewPatterns #-} module Tests.Distribution (tests) where -import Control.Applicative ((<$), (<$>), (<*>)) import qualified Control.Exception as E import Data.List (find) import Data.Typeable (Typeable) +import Data.Word import Numeric.MathFunctions.Constants (m_tiny,m_huge,m_epsilon) import Numeric.MathFunctions.Comparison import Statistics.Distribution @@ -19,11 +19,13 @@ import Statistics.Distribution.Geometric import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution) +import Statistics.Distribution.Lognormal (LognormalDistribution) import Statistics.Distribution.Normal (NormalDistribution) import Statistics.Distribution.Poisson (PoissonDistribution) import Statistics.Distribution.StudentT import Statistics.Distribution.Transform (LinearTransform) import Statistics.Distribution.Uniform (UniformDistribution) +import Statistics.Distribution.Weibull (WeibullDistribution) import Statistics.Distribution.DiscreteUniform (DiscreteUniform) import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck (testProperty) @@ -46,8 +48,10 @@ , contDistrTests (T :: T ExponentialDistribution ) , contDistrTests (T :: T GammaDistribution ) , contDistrTests (T :: T LaplaceDistribution ) + , contDistrTests (T :: T LognormalDistribution ) , contDistrTests (T :: T NormalDistribution ) , contDistrTests (T :: T UniformDistribution ) + , contDistrTests (T :: T WeibullDistribution ) , contDistrTests (T :: T StudentT ) , contDistrTests (T :: T (LinearTransform NormalDistribution)) , contDistrTests (T :: T FDistribution ) @@ -71,8 +75,7 @@ contDistrTests t = testGroup ("Tests for: " ++ typeName t) $ cdfTests t ++ [ testProperty "PDF sanity" $ pdfSanityCheck t - ] ++ - [ (if quantileIsInvCDF_enabled t then id else ignoreTest) + , (if quantileIsInvCDF_enabled t then id else ignoreTest) $ testProperty "Quantile is CDF inverse" $ quantileIsInvCDF t , testProperty "quantile fails p<0||p>1" $ quantileShouldFail t , testProperty "log density check" $ logDensityCheck t @@ -94,7 +97,8 @@ cdfTests t = [ testProperty "C.D.F. sanity" $ cdfSanityCheck t , testProperty "CDF limit at +inf" $ cdfLimitAtPosInfinity t - , testProperty "CDF limit at -inf" $ cdfLimitAtNegInfinity t + , (if cdfLimitAtNegInfinity_enabled t then id else ignoreTest) + $ testProperty "CDF limit at -inf" $ cdfLimitAtNegInfinity t , testProperty "CDF at +inf = 1" $ cdfAtPosInfinity t , testProperty "CDF at -inf = 1" $ cdfAtNegInfinity t , testProperty "CDF is nondecreasing" $ cdfIsNondecreasing t @@ -143,11 +147,14 @@ -- CDF's complement is implemented correctly -cdfComplementIsCorrect :: (Distribution d, Param d) => T d -> d -> Double -> Bool +cdfComplementIsCorrect :: (Distribution d, Param d) => T d -> d -> Double -> Property cdfComplementIsCorrect _ d x - = 1 - (cumulative d x + complCumulative d x) <= tol + = counterexample ("err. tolerance = " ++ show tol) + $ counterexample ("difference = " ++ show delta) + $ delta <= tol where - tol = prec_complementCDF d + tol = prec_complementCDF d + delta = 1 - (cumulative d x + complCumulative d x) -- CDF for discrete distribution uses <= for comparison cdfDiscreteIsCorrect :: (Param d, DiscreteDistr d) => T d -> d -> Property @@ -168,29 +175,34 @@ p1 = cumulative d $ fromIntegral i dp = probability d i relerr = ((p1 - p) - dp) / max p1 dp - , not (p == 0 && p1 == 0 && dp == 0) - && relerr > tol + , p > m_tiny || p == 0 + , p1 > m_tiny + , dp > m_tiny + , relerr > tol ] tol = prec_discreteCDF d -logDensityCheck :: (ContDistr d) => T d -> d -> Double -> Property +logDensityCheck :: (Param d, ContDistr d) => T d -> d -> Double -> Property logDensityCheck _ d x = not (isDenorm x) ==> ( counterexample (printf "density = %g" p) $ counterexample (printf "logDensity = %g" logP) $ counterexample (printf "log p = %g" (log p)) - $ counterexample (printf "eps = %g" (abs (logP - log p) / max (abs (log p)) (abs logP))) + $ counterexample (printf "ulps[log] = %i" ulpsLog) + $ counterexample (printf "ulps[lin] = %i" ulpsLin) $ or [ p == 0 && logP == (-1/0) , p <= m_tiny && logP < log m_tiny -- To avoid problems with roundtripping error in case -- when density is computed as exponent of logDensity we -- accept either inequality - , (ulpDistance (log p) logP <= 32) - || (ulpDistance p (exp logP) <= 32) + , (ulpsLog <= n) || (ulpsLin <= n) ]) where - p = density d x - logP = logDensity d x + p = density d x + logP = logDensity d x + n = prec_logDensity d + ulpsLog = ulpDistance (log p) logP + ulpsLin = ulpDistance p (exp logP) -- PDF is positive pdfSanityCheck :: (ContDistr d) => T d -> d -> Double -> Bool @@ -201,6 +213,8 @@ complQuantileCheck _ d (Double01 p) = counterexample (printf "x0 = %g" x0) $ counterexample (printf "x1 = %g" x1) + $ counterexample (printf "abs err = %g" $ abs (x1 - x0)) + $ counterexample (printf "rel err = %g" $ relativeError x1 x0) -- We avoid extreme tails of distributions -- -- FIXME: all parameters are arbitrary at the moment @@ -208,7 +222,7 @@ , p < 0.99 , not $ isInfinite x0 , not $ isInfinite x1 - ] ==> (abs (x1 - x0) < 1e-6) + ] ==> (if x0 < 1e6 then abs (x1 - x0) < 1e-6 else relativeError x1 x0 < 1e-12) where x0 = quantile d (1 - p) x1 = complQuantile d p @@ -273,23 +287,26 @@ p1 = cumulative d (fromIntegral m + 0.5) - cumulative d (fromIntegral n - 0.5) p2 = sum $ map (probability d) [n .. m] -logProbabilityCheck :: (DiscreteDistr d) => T d -> d -> Int -> Property +logProbabilityCheck :: (Param d, DiscreteDistr d) => T d -> d -> Int -> Property logProbabilityCheck _ d x = counterexample (printf "probability = %g" p) $ counterexample (printf "logProbability = %g" logP) $ counterexample (printf "log p = %g" (log p)) - $ counterexample (printf "eps = %g" (abs (logP - log p) / max (abs (log p)) (abs logP))) + $ counterexample (printf "ulps[log] = %i" ulpsLog) + $ counterexample (printf "ulps[lin] = %i" ulpsLin) $ or [ p == 0 && logP == (-1/0) , p < 1e-308 && logP < 609 -- To avoid problems with roundtripping error in case -- when density is computed as exponent of logDensity we -- accept either inequality - , (ulpDistance (log p) logP <= 32) - || (ulpDistance p (exp logP) <= 32) + , (ulpsLog <= n) || (ulpsLin <= n) ] where p = probability d x logP = logProbability d x + n = prec_logDensity d + ulpsLog = ulpDistance (log p) logP + ulpsLin = ulpDistance p (exp logP) -- | Parameters for distribution testing. Some distribution require @@ -298,6 +315,9 @@ -- | Whether quantileIsInvCDF is enabled quantileIsInvCDF_enabled :: T a -> Bool quantileIsInvCDF_enabled _ = True + -- | Whether cdfLimitAtNegInfinity is enabled + cdfLimitAtNegInfinity_enabled :: T a -> Bool + cdfLimitAtNegInfinity_enabled _ = True -- | Precision for 'quantileIsInvCDF' test prec_quantile_CDF :: a -> (Double,Double) prec_quantile_CDF _ = (16,16) @@ -307,33 +327,53 @@ -- | Precision of CDF's complement prec_complementCDF :: a -> Double prec_complementCDF _ = 1e-14 + -- | Precision for logDensity check + prec_logDensity :: a -> Word64 + prec_logDensity _ = 32 instance Param StudentT where -- FIXME: disabled unless incompleteBeta troubles are sorted out quantileIsInvCDF_enabled _ = False + instance Param BetaDistribution where -- FIXME: See https://github.com/bos/statistics/issues/161 for details quantileIsInvCDF_enabled _ = False + instance Param FDistribution where -- FIXME: disabled unless incompleteBeta troubles are sorted out quantileIsInvCDF_enabled _ = False + -- We compute CDF and complement using same method so precision + -- should be very good here. + prec_complementCDF _ = 2 * m_epsilon instance Param ChiSquared where prec_quantile_CDF _ = (32,32) instance Param BinomialDistribution where - prec_discreteCDF _ = 1e-13 -instance Param CauchyDistribution + prec_discreteCDF _ = 1e-12 + prec_logDensity _ = 48 +instance Param CauchyDistribution where + -- Distribution is long-tailed enough that we may never get to zero + cdfLimitAtNegInfinity_enabled _ = False + instance Param DiscreteUniform instance Param ExponentialDistribution -instance Param GammaDistribution +instance Param GammaDistribution where + -- We lose precision near `incompleteGamma 10` because of error + -- introuced by exp . logGamma. This could only be fixed in + -- math-function by implementing gamma + prec_quantile_CDF _ = (24,24) + prec_logDensity _ = 64 instance Param GeometricDistribution instance Param GeometricDistribution0 instance Param HypergeometricDistribution instance Param LaplaceDistribution +instance Param LognormalDistribution where + prec_quantile_CDF _ = (64,64) instance Param NormalDistribution instance Param PoissonDistribution instance Param UniformDistribution +instance Param WeibullDistribution instance Param a => Param (LinearTransform a) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/tests/Tests/Orphanage.hs new/statistics-0.16.0.1/tests/Tests/Orphanage.hs --- old/statistics-0.15.2.0/tests/Tests/Orphanage.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/tests/Tests/Orphanage.hs 2001-09-09 03:46:40.000000000 +0200 @@ -16,11 +16,13 @@ import Statistics.Distribution.Geometric import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution, laplace) +import Statistics.Distribution.Lognormal (LognormalDistribution, lognormalDistr) import Statistics.Distribution.Normal (NormalDistribution, normalDistr) import Statistics.Distribution.Poisson (PoissonDistribution, poisson) import Statistics.Distribution.StudentT import Statistics.Distribution.Transform (LinearTransform, scaleAround) import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr) +import Statistics.Distribution.Weibull (WeibullDistribution, weibullDistr) import Statistics.Distribution.DiscreteUniform (DiscreteUniform, discreteUniformAB) import Statistics.Types @@ -38,7 +40,7 @@ instance QC.Arbitrary LaplaceDistribution where arbitrary = laplace <$> QC.choose (-10,10) <*> QC.choose (0, 2) instance QC.Arbitrary GammaDistribution where - arbitrary = gammaDistr <$> QC.choose (0.1,10) <*> QC.choose (0.1,10) + arbitrary = gammaDistr <$> QC.choose (0.1,100) <*> QC.choose (0.1,100) instance QC.Arbitrary BetaDistribution where arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10) instance QC.Arbitrary GeometricDistribution where @@ -50,6 +52,9 @@ m <- QC.choose (0,l) k <- QC.choose (1,l) return $ hypergeometric m l k +instance QC.Arbitrary LognormalDistribution where + -- can't choose sigma too big, otherwise goes outside of double-float limit + arbitrary = lognormalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-10, 20) instance QC.Arbitrary NormalDistribution where arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3) instance QC.Arbitrary PoissonDistribution where @@ -60,6 +65,8 @@ arbitrary = do a <- QC.arbitrary b <- QC.arbitrary `suchThat` (/= a) return $ uniformDistr a b +instance QC.Arbitrary WeibullDistribution where + arbitrary = weibullDistr <$> QC.choose (1e-3,1e3) <*> QC.choose (1e-3, 1e3) instance QC.Arbitrary CauchyDistribution where arbitrary = cauchyDistribution <$> arbitrary diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/tests/Tests/Serialization.hs new/statistics-0.16.0.1/tests/Tests/Serialization.hs --- old/statistics-0.15.2.0/tests/Tests/Serialization.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/tests/Tests/Serialization.hs 2001-09-09 03:46:40.000000000 +0200 @@ -16,11 +16,13 @@ import Statistics.Distribution.Geometric import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution) +import Statistics.Distribution.Lognormal (LognormalDistribution) import Statistics.Distribution.Normal (NormalDistribution) import Statistics.Distribution.Poisson (PoissonDistribution) import Statistics.Distribution.StudentT import Statistics.Distribution.Transform (LinearTransform) import Statistics.Distribution.Uniform (UniformDistribution) +import Statistics.Distribution.Weibull (WeibullDistribution) import Statistics.Types import Test.Tasty (TestTree, testGroup) @@ -50,8 +52,10 @@ , serializationTests (T :: T ExponentialDistribution ) , serializationTests (T :: T GammaDistribution ) , serializationTests (T :: T LaplaceDistribution ) + , serializationTests (T :: T LognormalDistribution ) , serializationTests (T :: T NormalDistribution ) , serializationTests (T :: T UniformDistribution ) + , serializationTests (T :: T WeibullDistribution ) , serializationTests (T :: T StudentT ) , serializationTests (T :: T (LinearTransform NormalDistribution)) , serializationTests (T :: T FDistribution ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.15.2.0/tests/Tests/Transform.hs new/statistics-0.16.0.1/tests/Tests/Transform.hs --- old/statistics-0.15.2.0/tests/Tests/Transform.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.0.1/tests/Tests/Transform.hs 2001-09-09 03:46:40.000000000 +0200 @@ -8,7 +8,6 @@ import Data.Bits ((.&.), shiftL) import Data.Complex (Complex((:+))) -import Data.Functor ((<$>)) import Numeric.Sum (kbn, sumVector) import Statistics.Function (within) import Statistics.Transform (CD, dct, fft, idct, ifft)