Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package ghc-statistics for openSUSE:Factory checked in at 2023-04-14 13:12:53 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-statistics (Old) and /work/SRC/openSUSE:Factory/.ghc-statistics.new.19717 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-statistics" Fri Apr 14 13:12:53 2023 rev:8 rq:1079161 version:0.16.2.0 Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-statistics/ghc-statistics.changes 2023-04-04 21:23:48.586348577 +0200 +++ /work/SRC/openSUSE:Factory/.ghc-statistics.new.19717/ghc-statistics.changes 2023-04-14 13:13:00.167538008 +0200 @@ -1,0 +2,9 @@ +Fri Apr 7 17:07:55 UTC 2023 - Peter Simons <[email protected]> + +- Update statistics to version 0.16.2.0. + Upstream has edited the change log file since the last release in + a non-trivial way, i.e. they did more than just add a new entry + at the top. You can review the file at: + http://hackage.haskell.org/package/statistics-0.16.2.0/src/changelog.md + +------------------------------------------------------------------- Old: ---- statistics-0.16.1.2.tar.gz New: ---- statistics-0.16.2.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-statistics.spec ++++++ --- /var/tmp/diff_new_pack.vuvI6G/_old 2023-04-14 13:13:00.651540776 +0200 +++ /var/tmp/diff_new_pack.vuvI6G/_new 2023-04-14 13:13:00.655540799 +0200 @@ -20,7 +20,7 @@ %global pkgver %{pkg_name}-%{version} %bcond_with tests Name: ghc-%{pkg_name} -Version: 0.16.1.2 +Version: 0.16.2.0 Release: 0 Summary: A library of statistical types, data, and functions License: BSD-2-Clause ++++++ statistics-0.16.1.2.tar.gz -> statistics-0.16.2.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Binomial.hs new/statistics-0.16.2.0/Statistics/Distribution/Binomial.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Binomial.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Binomial.hs 2001-09-09 03:46:40.000000000 +0200 @@ -71,6 +71,7 @@ instance D.Distribution BinomialDistribution where cumulative = cumulative + complCumulative = complCumulative instance D.DiscreteDistr BinomialDistribution where probability = probability @@ -127,7 +128,6 @@ k' = fromIntegral k nk' = fromIntegral $ n - k --- Summation from different sides required to reduce roundoff errors cumulative :: BinomialDistribution -> Double -> Double cumulative (BD n p) x | isNaN x = error "Statistics.Distribution.Binomial.cumulative: NaN input" @@ -138,6 +138,16 @@ where k = floor x +complCumulative :: BinomialDistribution -> Double -> Double +complCumulative (BD n p) x + | isNaN x = error "Statistics.Distribution.Binomial.complCumulative: NaN input" + | isInfinite x = if x > 0 then 0 else 1 + | k < 0 = 1 + | k >= n = 0 + | otherwise = incompleteBeta (fromIntegral (k+1)) (fromIntegral (n-k)) p + where + k = floor x + mean :: BinomialDistribution -> Double mean (BD n p) = fromIntegral n * p diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Exponential.hs new/statistics-0.16.2.0/Statistics/Distribution/Exponential.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Exponential.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Exponential.hs 2001-09-09 03:46:40.000000000 +0200 @@ -33,7 +33,6 @@ import Numeric.SpecFunctions (log1p,expm1) import Numeric.MathFunctions.Constants (m_neg_inf) import qualified System.Random.MWC.Distributions as MWC -import qualified Data.Vector.Generic as G import qualified Statistics.Distribution as D import qualified Statistics.Sample as S @@ -136,11 +135,9 @@ errMsg :: Double -> String errMsg l = "Statistics.Distribution.Exponential.exponential: scale parameter must be positive. Got " ++ show l --- | Create exponential distribution from sample. Returns @Nothing@ if --- sample is empty or contains negative elements. No other tests are --- made to check whether it truly is exponential. +-- | Create exponential distribution from sample. Estimates the rate +-- with the maximum likelihood estimator, which is biased. Returns +-- @Nothing@ if the sample mean does not exist or is not positive. instance D.FromSample ExponentialDistribution Double where - fromSample xs - | G.null xs = Nothing - | G.all (>= 0) xs = Just $! ED (S.mean xs) - | otherwise = Nothing + fromSample xs = let m = S.mean xs + in if m > 0 then Just (ED (1/m)) else Nothing diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Geometric.hs new/statistics-0.16.2.0/Statistics/Distribution/Geometric.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Geometric.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Geometric.hs 2001-09-09 03:46:40.000000000 +0200 @@ -39,7 +39,7 @@ import Data.Binary (Binary(..)) import Data.Data (Data, Typeable) import GHC.Generics (Generic) -import Numeric.MathFunctions.Constants (m_pos_inf, m_neg_inf) +import Numeric.MathFunctions.Constants (m_neg_inf) import Numeric.SpecFunctions (log1p,expm1) import qualified System.Random.MWC.Distributions as MWC @@ -81,10 +81,11 @@ instance D.DiscreteDistr GeometricDistribution where probability (GD s) n | n < 1 = 0 - | otherwise = s * (1-s) ** (fromIntegral n - 1) + | s >= 0.5 = s * (1 - s)^(n - 1) + | otherwise = s * (exp $ log1p (-s) * (fromIntegral n - 1)) logProbability (GD s) n | n < 1 = m_neg_inf - | otherwise = log s + log (1-s) * (fromIntegral n - 1) + | otherwise = log s + log1p (-s) * (fromIntegral n - 1) instance D.Mean GeometricDistribution where @@ -102,9 +103,8 @@ instance D.Entropy GeometricDistribution where entropy (GD s) - | s == 0 = m_pos_inf | s == 1 = 0 - | otherwise = negate $ (s * log s + (1-s) * log (1-s)) / s + | otherwise = -(s * log s + (1-s) * log1p (-s)) / s instance D.MaybeEntropy GeometricDistribution where maybeEntropy = Just . D.entropy @@ -120,14 +120,18 @@ | x < 1 = 0 | isInfinite x = 1 | isNaN x = error "Statistics.Distribution.Geometric.cumulative: NaN input" - | otherwise = negate $ expm1 $ fromIntegral (floor x :: Int) * log1p (-s) + | s >= 0.5 = 1 - (1 - s)^k + | otherwise = negate $ expm1 $ fromIntegral k * log1p (-s) + where k = floor x :: Int complCumulative :: GeometricDistribution -> Double -> Double complCumulative (GD s) x | x < 1 = 1 | isInfinite x = 0 - | isNaN x = error "Statistics.Distribution.Geometric.cumulative: NaN input" - | otherwise = exp $ fromIntegral (floor x :: Int) * log1p (-s) + | isNaN x = error "Statistics.Distribution.Geometric.complCumulative: NaN input" + | s >= 0.5 = (1 - s)^k + | otherwise = exp $ fromIntegral k * log1p (-s) + where k = floor x :: Int -- | Create geometric distribution. @@ -139,11 +143,11 @@ geometricE :: Double -- ^ Success rate -> Maybe GeometricDistribution geometricE x - | x >= 0 && x <= 1 = Just (GD x) + | x > 0 && x <= 1 = Just (GD x) | otherwise = Nothing errMsg :: Double -> String -errMsg x = "Statistics.Distribution.Geometric.geometric: probability must be in [0,1] range. Got " ++ show x +errMsg x = "Statistics.Distribution.Geometric.geometric: probability must be in (0,1] range. Got " ++ show x ---------------------------------------------------------------- @@ -215,8 +219,8 @@ geometric0E :: Double -- ^ Success rate -> Maybe GeometricDistribution0 geometric0E x - | x >= 0 && x <= 1 = Just (GD0 x) + | x > 0 && x <= 1 = Just (GD0 x) | otherwise = Nothing errMsg0 :: Double -> String -errMsg0 x = "Statistics.Distribution.Geometric.geometric0: probability must be in [0,1] range. Got " ++ show x +errMsg0 x = "Statistics.Distribution.Geometric.geometric0: probability must be in (0,1] range. Got " ++ show x diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Hypergeometric.hs new/statistics-0.16.2.0/Statistics/Distribution/Hypergeometric.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Hypergeometric.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Hypergeometric.hs 2001-09-09 03:46:40.000000000 +0200 @@ -71,6 +71,7 @@ instance D.Distribution HypergeometricDistribution where cumulative = cumulative + complCumulative = complCumulative instance D.DiscreteDistr HypergeometricDistribution where probability = probability @@ -133,10 +134,10 @@ errMsg :: Int -> Int -> Int -> String errMsg m l k - = "Statistics.Distribution.Hypergeometric.hypergeometric: " - ++ "m=" ++ show m - ++ "l=" ++ show l - ++ "k=" ++ show k + = "Statistics.Distribution.Hypergeometric.hypergeometric:" + ++ " m=" ++ show m + ++ " l=" ++ show l + ++ " k=" ++ show k ++ " should hold: l>0 & m in [0,l] & k in (0,l]" -- Naive implementation @@ -167,4 +168,16 @@ where n = floor x minN = max 0 (mi+ki-li) + maxN = min mi ki + +complCumulative :: HypergeometricDistribution -> Double -> Double +complCumulative d@(HD mi li ki) x + | isNaN x = error "Statistics.Distribution.Hypergeometric.complCumulative: NaN argument" + | isInfinite x = if x > 0 then 0 else 1 + | n < minN = 1 + | n >= maxN = 0 + | otherwise = D.sumProbabilities d (n + 1) maxN + where + n = floor x + minN = max 0 (mi+ki-li) maxN = min mi ki diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Laplace.hs new/statistics-0.16.2.0/Statistics/Distribution/Laplace.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Laplace.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Laplace.hs 2001-09-09 03:46:40.000000000 +0200 @@ -151,9 +151,9 @@ errMsg _ s = "Statistics.Distribution.Laplace.laplace: scale parameter must be positive. Got " ++ show s --- | Create Laplace distribution from sample. No tests are made to --- check whether it truly is Laplace. Location of distribution --- estimated as median of sample. +-- | Create Laplace distribution from sample. The location is estimated +-- as the median of the sample, and the scale as the mean absolute +-- deviation of the median. instance D.FromSample LaplaceDistribution Double where fromSample xs | G.null xs = Nothing diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/NegativeBinomial.hs new/statistics-0.16.2.0/Statistics/Distribution/NegativeBinomial.hs --- old/statistics-0.16.1.2/Statistics/Distribution/NegativeBinomial.hs 1970-01-01 01:00:00.000000000 +0100 +++ new/statistics-0.16.2.0/Statistics/Distribution/NegativeBinomial.hs 2001-09-09 03:46:40.000000000 +0200 @@ -0,0 +1,188 @@ +{-# LANGUAGE OverloadedStrings, PatternGuards, + DeriveDataTypeable, DeriveGeneric #-} +-- | +-- Module : Statistics.Distribution.NegativeBinomial +-- Copyright : (c) 2022 Lorenz Minder +-- License : BSD3 +-- +-- Maintainer : [email protected] +-- Stability : experimental +-- Portability : portable +-- +-- The negative binomial distribution. This is the discrete probability +-- distribution of the number of failures in a sequence of independent +-- yes\/no experiments before a specified number of successes /r/. Each +-- Bernoulli trial has success probability /p/ in the range (0, 1]. The +-- parameter /r/ must be positive, but does not have to be integer. + +module Statistics.Distribution.NegativeBinomial ( + NegativeBinomialDistribution + -- * Constructors + , negativeBinomial + , negativeBinomialE + -- * Accessors + , nbdSuccesses + , nbdProbability +) where + +import Control.Applicative +import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) +import Data.Binary (Binary(..)) +import Data.Data (Data, Typeable) +import Data.Foldable (foldl') +import GHC.Generics (Generic) +import Numeric.SpecFunctions (incompleteBeta, log1p) +import Numeric.SpecFunctions.Extra (logChooseFast) +import Numeric.MathFunctions.Constants (m_epsilon, m_tiny) + +import qualified Statistics.Distribution as D +import Statistics.Internal + +-- Math helper functions + +-- | Generalized binomial coefficients. +-- +-- These computes binomial coefficients with the small generalization +-- that the /n/ need not be integer, but can be real. +gChoose :: Double -> Int -> Double +gChoose n k + | k < 0 = 0 + | k' >= 50 = exp $ logChooseFast n k' + | otherwise = foldl' (*) 1 factors + where factors = [ (n - k' + j) / j | j <- [1..k'] ] + k' = fromIntegral k + + +-- Implementation of Negative Binomial + +-- | The negative binomial distribution. +data NegativeBinomialDistribution = NBD { + nbdSuccesses :: {-# UNPACK #-} !Double + -- ^ Number of successes until stop + , nbdProbability :: {-# UNPACK #-} !Double + -- ^ Success probability. + } deriving (Eq, Typeable, Data, Generic) + +instance Show NegativeBinomialDistribution where + showsPrec i (NBD r p) = defaultShow2 "negativeBinomial" r p i +instance Read NegativeBinomialDistribution where + readPrec = defaultReadPrecM2 "negativeBinomial" negativeBinomialE + +instance ToJSON NegativeBinomialDistribution +instance FromJSON NegativeBinomialDistribution where + parseJSON (Object v) = do + r <- v .: "nbdSuccesses" + p <- v .: "nbdProbability" + maybe (fail $ errMsg r p) return $ negativeBinomialE r p + parseJSON _ = empty + +instance Binary NegativeBinomialDistribution where + put (NBD r p) = put r >> put p + get = do + r <- get + p <- get + maybe (fail $ errMsg r p) return $ negativeBinomialE r p + +instance D.Distribution NegativeBinomialDistribution where + cumulative = cumulative + complCumulative = complCumulative + +instance D.DiscreteDistr NegativeBinomialDistribution where + probability = probability + logProbability = logProbability + +instance D.Mean NegativeBinomialDistribution where + mean = mean + +instance D.Variance NegativeBinomialDistribution where + variance = variance + +instance D.MaybeMean NegativeBinomialDistribution where + maybeMean = Just . D.mean + +instance D.MaybeVariance NegativeBinomialDistribution where + maybeStdDev = Just . D.stdDev + maybeVariance = Just . D.variance + +instance D.Entropy NegativeBinomialDistribution where + entropy = directEntropy + +instance D.MaybeEntropy NegativeBinomialDistribution where + maybeEntropy = Just . D.entropy + +-- This could be slow for big n +probability :: NegativeBinomialDistribution -> Int -> Double +probability d@(NBD r p) k + | k < 0 = 0 + -- Switch to log domain for large k + r to avoid overflows. + -- + -- We also want to avoid underflow when computing (1-p)^k & + -- p^r. + | k' + r < 1000 + , pK >= m_tiny + , pR >= m_tiny = gChoose (k' + r - 1) k * pK * pR + | otherwise = exp $ logProbability d k + where + pK = exp $ log1p (-p) * k' + pR = p**r + k' = fromIntegral k + +logProbability :: NegativeBinomialDistribution -> Int -> Double +logProbability (NBD r p) k + | k < 0 = (-1)/0 + | otherwise = logChooseFast (k' + r - 1) k' + + log1p (-p) * k' + + log p * r + where k' = fromIntegral k + +cumulative :: NegativeBinomialDistribution -> Double -> Double +cumulative (NBD r p) x + | isNaN x = error "Statistics.Distribution.NegativeBinomial.cumulative: NaN input" + | isInfinite x = if x > 0 then 1 else 0 + | k < 0 = 0 + | otherwise = incompleteBeta r (fromIntegral (k+1)) p + where + k = floor x :: Integer + +complCumulative :: NegativeBinomialDistribution -> Double -> Double +complCumulative (NBD r p) x + | isNaN x = error "Statistics.Distribution.NegativeBinomial.complCumulative: NaN input" + | isInfinite x = if x > 0 then 0 else 1 + | k < 0 = 1 + | otherwise = incompleteBeta (fromIntegral (k+1)) r (1 - p) + where + k = (floor x)::Integer + +mean :: NegativeBinomialDistribution -> Double +mean (NBD r p) = r * (1 - p)/p + +variance :: NegativeBinomialDistribution -> Double +variance (NBD r p) = r * (1 - p)/(p * p) + +directEntropy :: NegativeBinomialDistribution -> Double +directEntropy d = + negate . sum $ + takeWhile (< -m_epsilon) $ + dropWhile (>= -m_epsilon) $ + [ let x = probability d k in x * log x | k <- [0..]] + +-- | Construct negative binomial distribution. Number of failures /r/ +-- must be positive and probability must be in (0,1] range +negativeBinomial :: Double -- ^ Number of successes. + -> Double -- ^ Success probability. + -> NegativeBinomialDistribution +negativeBinomial r p = maybe (error $ errMsg r p) id $ negativeBinomialE r p + +-- | Construct negative binomial distribution. Number of failures /r/ +-- must be positive and probability must be in (0,1] range +negativeBinomialE :: Double -- ^ Number of successes. + -> Double -- ^ Success probability. + -> Maybe NegativeBinomialDistribution +negativeBinomialE r p + | r > 0 && 0 < p && p <= 1 = Just (NBD r p) + | otherwise = Nothing + +errMsg :: Double -> Double -> String +errMsg r p + = "Statistics.Distribution.NegativeBinomial.negativeBinomial: r=" ++ show r + ++ " p=" ++ show p ++ ", but need r>0 and p in (0,1]" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution/Poisson/Internal.hs new/statistics-0.16.2.0/Statistics/Distribution/Poisson/Internal.hs --- old/statistics-0.16.1.2/Statistics/Distribution/Poisson/Internal.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution/Poisson/Internal.hs 2001-09-09 03:46:40.000000000 +0200 @@ -33,7 +33,7 @@ (m_sqrt_2_pi * sqrt x) -- -- | Compute entropy using Theorem 1 from "Sharp Bounds on the Entropy --- -- of the Poisson Law". This function is unused because 'directEntorpy' +-- -- of the Poisson Law". This function is unused because 'directEntropy' -- -- is just as accurate and is faster by about a factor of 4. -- alyThm1 :: Double -> Double -- alyThm1 lambda = diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Distribution.hs new/statistics-0.16.2.0/Statistics/Distribution.hs --- old/statistics-0.16.1.2/Statistics/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -171,10 +171,10 @@ -- | Estimate distribution from sample. First parameter in sample is -- distribution type and second is element type. class FromSample d a where - -- | Estimate distribution from sample. Returns nothing is there's - -- not enough data to estimate or sample clearly doesn't come from - -- distribution in question. For example if there's negative - -- samples in exponential distribution. + -- | Estimate distribution from sample. Returns 'Nothing' if there is + -- not enough data, or if no usable fit results from the method + -- used, e.g., the estimated distribution parameters would be + -- invalid or inaccurate. fromSample :: G.Vector v a => v a -> Maybe d diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Sample/Histogram.hs new/statistics-0.16.2.0/Statistics/Sample/Histogram.hs --- old/statistics-0.16.1.2/Statistics/Sample/Histogram.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Sample/Histogram.hs 2001-09-09 03:46:40.000000000 +0200 @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleContexts, BangPatterns #-} +{-# LANGUAGE FlexibleContexts, BangPatterns, ScopedTypeVariables #-} -- | -- Module : Statistics.Sample.Histogram @@ -19,6 +19,7 @@ , range ) where +import Control.Monad.ST import Numeric.MathFunctions.Constants (m_epsilon,m_tiny) import Statistics.Function (minMax) import qualified Data.Vector.Generic as G @@ -49,7 +50,7 @@ -- -- Interval (bin) sizes are uniform, based on the supplied upper -- and lower bounds. -histogram_ :: (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) => +histogram_ :: forall b a v0 v1. (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) => Int -- ^ Number of bins. This value must be positive. A zero -- or negative value will cause an error. @@ -65,6 +66,7 @@ -> v1 b histogram_ numBins lo hi xs0 = G.create (GM.replicate numBins 0 >>= bin xs0) where + bin :: forall s. v0 a -> G.Mutable v1 s b -> ST s (G.Mutable v1 s b) bin xs bins = go 0 where go i | i >= len = return bins diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/Statistics/Test/KolmogorovSmirnov.hs new/statistics-0.16.2.0/Statistics/Test/KolmogorovSmirnov.hs --- old/statistics-0.16.1.2/Statistics/Test/KolmogorovSmirnov.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/Statistics/Test/KolmogorovSmirnov.hs 2001-09-09 03:46:40.000000000 +0200 @@ -21,7 +21,7 @@ , kolmogorovSmirnovCdfD , kolmogorovSmirnovD , kolmogorovSmirnov2D - -- * Probablities + -- * Probabilities , kolmogorovSmirnovProbability -- * References -- $references diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/changelog.md new/statistics-0.16.2.0/changelog.md --- old/statistics-0.16.1.2/changelog.md 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/changelog.md 2001-09-09 03:46:40.000000000 +0200 @@ -1,3 +1,11 @@ +## Changes in 0.16.2.0 + + * Improved precision for `complCumulative` for hypergeometric and binomial + distributions. Precision improvements of geometric distribution + + * Negative binomial distribution added. + + ## Changes in 0.16.1.2 * Fixed bug in `fromSample` for exponential distribudion (#190) @@ -265,7 +273,7 @@ * Bugs in DCT and IDCT are fixed. - * Accesors for uniform distribution are added. + * Accessors for uniform distribution are added. * ContGen instances for all continuous distributions are added. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/statistics.cabal new/statistics-0.16.2.0/statistics.cabal --- old/statistics-0.16.1.2/statistics.cabal 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/statistics.cabal 2001-09-09 03:46:40.000000000 +0200 @@ -1,5 +1,5 @@ name: statistics -version: 0.16.1.2 +version: 0.16.2.0 synopsis: A library of statistical types, data, and functions description: This library provides a number of common functions and types useful @@ -75,6 +75,7 @@ Statistics.Distribution.Hypergeometric Statistics.Distribution.Laplace Statistics.Distribution.Lognormal + Statistics.Distribution.NegativeBinomial Statistics.Distribution.Normal Statistics.Distribution.Poisson Statistics.Distribution.StudentT @@ -142,6 +143,7 @@ Tests.ApproxEq Tests.Correlation Tests.Distribution + Tests.ExactDistribution Tests.Function Tests.Helpers Tests.KDE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/tests/Tests/Distribution.hs new/statistics-0.16.2.0/tests/Tests/Distribution.hs --- old/statistics-0.16.1.2/tests/Tests/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/tests/Tests/Distribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -20,6 +20,7 @@ import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution) import Statistics.Distribution.Lognormal (LognormalDistribution) +import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution) import Statistics.Distribution.Normal (NormalDistribution) import Statistics.Distribution.Poisson (PoissonDistribution) import Statistics.Distribution.StudentT @@ -35,6 +36,7 @@ import Text.Printf (printf) import Tests.ApproxEq (ApproxEq(..)) +import Tests.ExactDistribution (exactDistributionTests) import Tests.Helpers (T(..), Double01(..), testAssertion, typeName) import Tests.Helpers (monotonicallyIncreasesIEEE,isDenorm) import Tests.Orphanage () @@ -60,9 +62,11 @@ , discreteDistrTests (T :: T GeometricDistribution ) , discreteDistrTests (T :: T GeometricDistribution0 ) , discreteDistrTests (T :: T HypergeometricDistribution ) + , discreteDistrTests (T :: T NegativeBinomialDistribution ) , discreteDistrTests (T :: T PoissonDistribution ) , discreteDistrTests (T :: T DiscreteUniform ) + , exactDistributionTests , unitTests ] @@ -89,7 +93,7 @@ [ testProperty "Prob. sanity" $ probSanityCheck t , testProperty "CDF is sum of prob." $ discreteCDFcorrect t , testProperty "Discrete CDF is OK" $ cdfDiscreteIsCorrect t - , testProperty "log probabilty check" $ logProbabilityCheck t + , testProperty "log probability check" $ logProbabilityCheck t ] -- Tests for distributions which have CDF @@ -370,14 +374,15 @@ instance Param LaplaceDistribution instance Param LognormalDistribution where prec_quantile_CDF _ = (64,64) +instance Param NegativeBinomialDistribution where + prec_discreteCDF _ = 1e-12 + prec_logDensity _ = 48 instance Param NormalDistribution instance Param PoissonDistribution instance Param UniformDistribution instance Param WeibullDistribution instance Param a => Param (LinearTransform a) - - ---------------------------------------------------------------- -- Unit tests ---------------------------------------------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/tests/Tests/ExactDistribution.hs new/statistics-0.16.2.0/tests/Tests/ExactDistribution.hs --- old/statistics-0.16.1.2/tests/Tests/ExactDistribution.hs 1970-01-01 01:00:00.000000000 +0100 +++ new/statistics-0.16.2.0/tests/Tests/ExactDistribution.hs 2001-09-09 03:46:40.000000000 +0200 @@ -0,0 +1,396 @@ +{-# LANGUAGE BangPatterns, + FlexibleInstances, + FlexibleContexts, + ScopedTypeVariables + #-} +-- | +-- Module : Tests.ExactDistribution +-- Copyright : (c) 2022 Lorenz Minder +-- License : BSD3 +-- +-- Maintainer : [email protected] +-- Stability : experimental +-- Portability : portable +-- +-- Tests comparing distributions to exact versions. +-- +-- This module provides exact versions of some distributions, and tests +-- to compare them to the production implementations in +-- Statistics.Distribution.*. It also contains the functionality to +-- test the production distributions against the exact versions. Errors +-- are flagged if data points are discovered where the probability mass +-- function, the cumulative probability function, or its complement +-- deviates too far (more than a prescribed tolerance) from the exact +-- calculation. +-- +-- The distributions here are implemented with rational integer +-- arithmetic, using pretty much the textbook definitions formulas. +-- Numerical problems like overflow or rounding errors cannot occur with +-- this approach, making them are easy to write, read and verify. They +-- are, of course, substantially slower than the production +-- distributions in Statistics.Distribution.*. This makes them +-- unsuitable for most uses other than testing and debugging. (Also, +-- only a handful of distributions can be implemented exactly with +-- rational arithmetic.) +-- +-- This module has the following sub-components: +-- +-- * Exact (rational) definitions of some distribution functions, +-- including both the probability mass as well as the CDF. +-- +-- * QC.Arbitrary implementations to sample test cases (i.e., +-- distribution parameters and evaluation points). +-- +-- * "Linkage": a mechanism to construct a production distribution +-- corresponding to a test case for an exact distribution. +-- +-- * A set of tests for the distributions derived using all of the above +-- components. +-- +-- This module exports a number symbols which can be useful for +-- debugging and experimentation. For use in a test suite, only the +-- `exactDistributionTests` function is needed. + +module Tests.ExactDistribution ( + -- * Exact math functions + exactChoose + + -- * Exact distributions + , ExactDiscreteDistr(..) + + , ExactBinomialDistr(..) + , ExactDiscreteUniformDistr(..) + , ExactGeometricDistr(..) + , ExactHypergeomDistr(..) + + -- * Linking to production distributions + , ProductionProbFuncs(..) + , productionProbFuncs + , ProductionLinkage + + -- * Individual test routines + , pmfMatch + , cdfMatch + , complCdfMatch + + -- * Test groups + , Tag(..) + , distTests + , exactDistributionTests +) where + +---------------------------------------------------------------- + +import Data.Foldable +import Data.Ratio + +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) +import Test.QuickCheck as QC +import Numeric.MathFunctions.Comparison (relativeError) + +import Statistics.Distribution +import Statistics.Distribution.Binomial +import Statistics.Distribution.DiscreteUniform +import Statistics.Distribution.Geometric +import Statistics.Distribution.Hypergeometric + +---------------------------------------------------------------- +-- +-- Math functions. +-- +-- Used for implementing the distributions below. +-- +---------------------------------------------------------------- + +-- | Exactly compute binomial coefficient. +-- +-- /n/ need not be an integer, can be fractional. +exactChoose :: Ratio Integer -> Integer -> Ratio Integer +exactChoose n k + | k < 0 = 0 + | otherwise = foldl' (*) 1 factors + where factors = [ (n - k' + j) / j | j <- [1..k'] ] + k' = fromInteger k :: Ratio Integer + +---------------------------------------------------------------- +-- +-- Exact distributions. +-- +---------------------------------------------------------------- + +-- | Exact discrete distribution. +class ExactDiscreteDistr a where + -- | Probability mass function. + exactProb :: a -> Integer -> Ratio Integer + exactProb d x = exactCumulative d x - exactCumulative d (x - 1) + + -- | Cumulative distribution function. + exactCumulative :: a -> Integer -> Ratio Integer + +-- | Exact Binomial distribution. +data ExactBinomialDistr = ExactBD Integer (Ratio Integer) + deriving(Show) + +instance ExactDiscreteDistr ExactBinomialDistr where + -- Probability mass, computed with textbook formula. + exactProb (ExactBD n p) k + | k < 0 || k > n = 0 + | otherwise = exactChoose n' k * p^k * (1-p)^(n-k) + where n' = fromIntegral n + -- CDF + -- + -- Computed iteratively by summing up all the probabilities + -- <= /k/. Rather than computing everything from scratch for each + -- probability, we reuse previous results. The meanings of the + -- variables in the "update" function are: + -- + -- bc is the binomial coefficient (n choose j), + -- pj is the term p^j, + -- pnj is the term (1 - p)^(n - j) + -- r is the (partial) sum of the probabilities + -- + exactCumulative (ExactBD n p) k + | k < 0 = 0 + | k >= n = 1 + -- Special case for p = 1, since in the below fold we + -- divide by (1 - p). + | p == 1 = if k == n then 1 else 0 + | otherwise + = result $ foldl' update (1, 1, (1 - p)^n, (1 - p)^n) [1..k] + where update (!bc, !pj, !pnj, !r) !j = + let bc' = bc * (n - j + 1) `div` j + pj' = pj * p + pnj' = pnj / (1 - p) + r' = r + (fromIntegral bc') * pj' * pnj' + in (bc', pj', pnj', r') + result (_, _, _, r) = r + +-- | Exact Discrete Uniform distribution. +data ExactDiscreteUniformDistr = ExactDU Integer Integer + deriving(Show) + +instance ExactDiscreteDistr ExactDiscreteUniformDistr where + exactProb (ExactDU lower upper) k + | k < lower || k > upper = 0 + | otherwise = 1 % (upper - lower + 1) + exactCumulative (ExactDU lower upper) k + | k < lower = 0 + | k > upper = 1 + | otherwise = + let d = (k - lower + 1) + in d % (upper - lower + 1) + +-- | Geometric distribution. +data ExactGeometricDistr = ExactGeom (Ratio Integer) + deriving(Show) + +instance ExactDiscreteDistr ExactGeometricDistr where + exactProb (ExactGeom p) k + | k < 1 = 0 + | otherwise = (1 - p)^(k - 1) * p + + exactCumulative (ExactGeom p) k = 1 - (1 - p)^k + +-- | Hypergeometric distribution. +-- +-- Parameters are /K/, /N/ and /n/, where: +-- - /N/ is the total sample space size. +-- - /K/ is number of "good" objects among /N/. +-- - /n/ is the number of draws without replacement. +data ExactHypergeomDistr = ExactHG Integer Integer Integer + deriving(Show) + +instance ExactDiscreteDistr ExactHypergeomDistr where + exactProb (ExactHG nK nN n) k + | k < 0 = 0 + | k > n || k > nN = 0 + | otherwise = + exactChoose nK' k * exactChoose (nN' - nK') (n - k) + / exactChoose nN' n + where nN' = fromIntegral nN + nK' = fromIntegral nK + + exactCumulative d k = sum [ exactProb d i | i <- [0..k] ] + +---------------------------------------------------------------- +-- +-- TestCase construction. +-- +-- Contains the TestCase data type which encapsulates an instance of an +-- exact distribution together with an evaluation point. +-- +-- Then in contains the QC.Arbitrary implementations for TestCases of +-- the different exact distributions. As a general rule, we try the +-- sampling to be relatively efficient, i.e., we only want to sample +-- valid distribution parameters. The evaluation points are sampled +-- such that most points are within the support of the distribution. +-- +---------------------------------------------------------------- + +-- Divisor to compute a rational number from an integer. +-- +-- We want input parameters to be exactly representable as +-- Double values. This is so that the production distribution does not +-- mismatch the exact one simply because the input values don't exactly +-- match. (This can happen if the derivative of the distribution +-- function is large.) For this reason, the gd value needs to be a +-- power of 2, and <= 2^53, since the mantissa of a Double is 53 bits. +-- +-- A value of 2^53 gives the most accurate and diverse tests, but the +-- cost is increased running times, as the computed numerators and +-- denominators will become quite large. +gd :: Integer +gd = 2^(16 :: Int) + +-- TestCase +-- +-- Combination of an exact distribution together with an evaluation point. +data TestCase a = TestCase a Integer deriving (Show) + +instance QC.Arbitrary (TestCase ExactBinomialDistr) where + arbitrary = do + -- This somewhat odd sampling of /n/ is done so that lower + -- values (<1000) are more often represented as the larger ones. + n <- (*) <$> chooseInteger (1,1000) <*> chooseInteger(1,2) + p <- (% gd) <$> chooseInteger (0, gd) + k <- chooseInteger (-1, n + 1) + return $ TestCase (ExactBD n p) k + shrink _ = [] + +instance QC.Arbitrary (TestCase ExactDiscreteUniformDistr) where + arbitrary = do + a <- chooseInteger (-1000, 1000) + sz <- chooseInteger (1, 1000) + let b = a + sz + k <- chooseInteger (a - 10, b + 10) + return $ TestCase (ExactDU a b) k + shrink _ = [] + +instance QC.Arbitrary (TestCase ExactGeometricDistr) where + arbitrary = do + p <- (% gd) <$> chooseInteger (1, gd) + let lim = (floor $ 100 / p) :: Integer + k <- chooseInteger (0, lim) + return $ TestCase (ExactGeom p) k + shrink _ = [] + +instance QC.Arbitrary (TestCase ExactHypergeomDistr) where + arbitrary = do + nN <- chooseInteger (1, 100) -- XXX lower bound should be 0 + nK <- chooseInteger (0, nN) + n <- chooseInteger (1, nN) -- XXX lower bound should be 0 + k <- chooseInteger (0, min n nK) + return $ TestCase (ExactHG nK nN n) k + shrink _ = [] + +---------------------------------------------------------------- +-- +-- Linking to the production distributions +-- +-- This section contains the ProductionLinkage typeclass and +-- implementation, that allows to obtain a functions for evaluating +-- the production distribution functions for a corresponding exact +-- distribution. +-- +---------------------------------------------------------------- + +-- | Distribution evaluation functions. +-- +-- This is used to store a +data ProductionProbFuncs = ProductionProbFuncs { + prodProb :: Int -> Double + , prodCumulative :: Double -> Double + , prodComplCumulative :: Double -> Double + } + +productionProbFuncs :: (DiscreteDistr a) => a -> ProductionProbFuncs +productionProbFuncs d = ProductionProbFuncs { + prodProb = probability d + , prodCumulative = cumulative d + , prodComplCumulative = complCumulative d + } + +class (ExactDiscreteDistr a) => ProductionLinkage a where + productionLinkage :: a -> ProductionProbFuncs + +instance ProductionLinkage ExactBinomialDistr where + productionLinkage (ExactBD n p) = + let d = binomial (fromIntegral n) (fromRational p) + in productionProbFuncs d + +instance ProductionLinkage ExactDiscreteUniformDistr where + productionLinkage (ExactDU lower upper) = + let d = discreteUniformAB (fromIntegral lower) (fromIntegral upper) + in productionProbFuncs d + +instance ProductionLinkage ExactGeometricDistr where + productionLinkage (ExactGeom p) = + let d = geometric $ fromRational p + in productionProbFuncs d + +instance ProductionLinkage ExactHypergeomDistr where + productionLinkage (ExactHG nK nN n) = + let d = hypergeometric (fromIntegral nK) (fromIntegral nN) (fromIntegral n) + in productionProbFuncs d + +---------------------------------------------------------------- +-- Tests +---------------------------------------------------------------- + +-- Check production probability mass function accuracy. +-- +-- Inputs: tolerance (max relative error) and test case +pmfMatch :: (Show a, ProductionLinkage a) => Double -> TestCase a -> Bool +pmfMatch tol (TestCase dExact k) = + let dProd = productionLinkage dExact + pe = fromRational $ exactProb dExact k + pa = prodProb dProd k' + k' = fromIntegral k + in relativeError pe pa < tol + +-- Check production cumulative probability function accuracy. +-- +-- Inputs: tolerance (max relative error) and test case. +cdfMatch :: (Show a, ProductionLinkage a) => Double -> TestCase a -> Bool +cdfMatch tol (TestCase dExact k) = + let dProd = productionLinkage dExact + pe = fromRational $ exactCumulative dExact k + pa = prodCumulative dProd k' + k' = fromIntegral k + in relativeError pe pa < tol + +-- Check production complement cumulative function accuracy. +-- +-- Inputs: tolerance (max relative error) and test case. +complCdfMatch :: (Show a, ProductionLinkage a) => Double -> TestCase a -> Bool +complCdfMatch tol (TestCase dExact k) = + let dProd = productionLinkage dExact + pe = fromRational $ 1 - exactCumulative dExact k + pa = prodComplCumulative dProd k' + k' = fromIntegral k + in relativeError pe pa < tol + +-- Phantom type to encode an exact distribution. +data Tag a = Tag + +distTests :: (Show a, ProductionLinkage a, Arbitrary (TestCase a)) => + Tag a -> String -> Double -> TestTree +distTests (Tag :: Tag a) name tol = + testGroup ("Exact tests for " ++ name) [ + testProperty "PMF match" $ ((pmfMatch tol) :: TestCase a -> Bool) + , testProperty "CDF match" $ ((cdfMatch tol) :: TestCase a -> Bool) + , testProperty "1 - CDF match" $ ((complCdfMatch tol) :: TestCase a -> Bool) + ] + + +-- Test driver ------------------------------------------------- + +exactDistributionTests :: TestTree +exactDistributionTests = testGroup "Test distributions against exact" + [ + distTests (Tag :: Tag ExactBinomialDistr) "Binomial" 1.0e-12 + , distTests (Tag :: Tag ExactDiscreteUniformDistr) "DiscreteUniform" 1.0e-12 + , distTests (Tag :: Tag ExactGeometricDistr) "Geometric" 1.0e-13 + , distTests (Tag :: Tag ExactHypergeomDistr) "Hypergeometric" 1.0e-12 + ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/tests/Tests/Orphanage.hs new/statistics-0.16.2.0/tests/Tests/Orphanage.hs --- old/statistics-0.16.1.2/tests/Tests/Orphanage.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/tests/Tests/Orphanage.hs 2001-09-09 03:46:40.000000000 +0200 @@ -17,6 +17,7 @@ import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution, laplace) import Statistics.Distribution.Lognormal (LognormalDistribution, lognormalDistr) +import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution, negativeBinomial) import Statistics.Distribution.Normal (NormalDistribution, normalDistr) import Statistics.Distribution.Poisson (PoissonDistribution, poisson) import Statistics.Distribution.StudentT @@ -30,7 +31,7 @@ ---------------------------------------------------------------- --- Arbitrary instances for ditributions +-- Arbitrary instances for distributions ---------------------------------------------------------------- instance QC.Arbitrary BinomialDistribution where @@ -44,9 +45,9 @@ instance QC.Arbitrary BetaDistribution where arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10) instance QC.Arbitrary GeometricDistribution where - arbitrary = geometric <$> QC.choose (0,1) + arbitrary = geometric <$> QC.choose (1e-10,1) instance QC.Arbitrary GeometricDistribution0 where - arbitrary = geometric0 <$> QC.choose (0,1) + arbitrary = geometric0 <$> QC.choose (1e-10,1) instance QC.Arbitrary HypergeometricDistribution where arbitrary = do l <- QC.choose (1,20) m <- QC.choose (0,l) @@ -55,6 +56,8 @@ instance QC.Arbitrary LognormalDistribution where -- can't choose sigma too big, otherwise goes outside of double-float limit arbitrary = lognormalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-10, 20) +instance QC.Arbitrary NegativeBinomialDistribution where + arbitrary = negativeBinomial <$> QC.choose (1,100) <*> QC.choose (1e-10,1) instance QC.Arbitrary NormalDistribution where arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3) instance QC.Arbitrary PoissonDistribution where diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/statistics-0.16.1.2/tests/Tests/Serialization.hs new/statistics-0.16.2.0/tests/Tests/Serialization.hs --- old/statistics-0.16.1.2/tests/Tests/Serialization.hs 2001-09-09 03:46:40.000000000 +0200 +++ new/statistics-0.16.2.0/tests/Tests/Serialization.hs 2001-09-09 03:46:40.000000000 +0200 @@ -17,6 +17,7 @@ import Statistics.Distribution.Hypergeometric import Statistics.Distribution.Laplace (LaplaceDistribution) import Statistics.Distribution.Lognormal (LognormalDistribution) +import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution) import Statistics.Distribution.Normal (NormalDistribution) import Statistics.Distribution.Poisson (PoissonDistribution) import Statistics.Distribution.StudentT @@ -53,6 +54,7 @@ , serializationTests (T :: T GammaDistribution ) , serializationTests (T :: T LaplaceDistribution ) , serializationTests (T :: T LognormalDistribution ) + , serializationTests (T :: T NegativeBinomialDistribution ) , serializationTests (T :: T NormalDistribution ) , serializationTests (T :: T UniformDistribution ) , serializationTests (T :: T WeibullDistribution )
