Hello community,

here is the log from the commit of package ghc-clustering for openSUSE:Factory 
checked in at 2017-03-03 17:48:37
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-clustering (Old)
 and      /work/SRC/openSUSE:Factory/.ghc-clustering.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "ghc-clustering"

Fri Mar  3 17:48:37 2017 rev:2 rq:461614 version:0.3.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-clustering/ghc-clustering.changes    
2016-12-11 13:26:57.286826588 +0100
+++ /work/SRC/openSUSE:Factory/.ghc-clustering.new/ghc-clustering.changes       
2017-03-03 17:48:38.350337189 +0100
@@ -1,0 +2,5 @@
+Sun Feb 12 14:11:44 UTC 2017 - [email protected]
+
+- Update to version 0.3.1 with cabal2obs.
+
+-------------------------------------------------------------------

Old:
----
  clustering-0.2.1.tar.gz

New:
----
  clustering-0.3.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ghc-clustering.spec ++++++
--- /var/tmp/diff_new_pack.Zy6y5L/_old  2017-03-03 17:48:39.110229871 +0100
+++ /var/tmp/diff_new_pack.Zy6y5L/_new  2017-03-03 17:48:39.114229307 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package ghc-clustering
 #
-# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,15 +19,14 @@
 %global pkg_name clustering
 %bcond_with tests
 Name:           ghc-%{pkg_name}
-Version:        0.2.1
+Version:        0.3.1
 Release:        0
 Summary:        High performance clustering algorithms
 License:        MIT
-Group:          System/Libraries
+Group:          Development/Languages/Other
 Url:            https://hackage.haskell.org/package/%{pkg_name}
 Source0:        
https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz
 BuildRequires:  ghc-Cabal-devel
-# Begin cabal-rpm deps:
 BuildRequires:  ghc-binary-devel
 BuildRequires:  ghc-containers-devel
 BuildRequires:  ghc-matrices-devel
@@ -35,6 +34,7 @@
 BuildRequires:  ghc-parallel-devel
 BuildRequires:  ghc-primitive-devel
 BuildRequires:  ghc-rpm-macros
+BuildRequires:  ghc-unordered-containers-devel
 BuildRequires:  ghc-vector-devel
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 %if %{with tests}
@@ -45,7 +45,6 @@
 BuildRequires:  ghc-tasty-hunit-devel
 BuildRequires:  ghc-tasty-quickcheck-devel
 %endif
-# End cabal-rpm deps
 
 %description
 Following clutering methods are included in this library:
@@ -70,20 +69,14 @@
 %prep
 %setup -q -n %{pkg_name}-%{version}
 
-
 %build
 %ghc_lib_build
 
-
 %install
 %ghc_lib_install
 
-
 %check
-%if %{with tests}
-%{cabal} test
-%endif
-
+%cabal_test
 
 %post devel
 %ghc_pkg_recache

++++++ clustering-0.2.1.tar.gz -> clustering-0.3.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/clustering-0.2.1/benchmarks/Bench/KMeans.hs 
new/clustering-0.3.1/benchmarks/Bench/KMeans.hs
--- old/clustering-0.2.1/benchmarks/Bench/KMeans.hs     2015-06-28 
23:44:21.000000000 +0200
+++ new/clustering-0.3.1/benchmarks/Bench/KMeans.hs     2016-11-11 
21:02:16.000000000 +0100
@@ -1,18 +1,20 @@
 module Bench.KMeans
     ( benchKMeans ) where
 
-import Criterion.Main
-import qualified Data.Matrix.Unboxed as MU
-import qualified Data.Vector.Unboxed as U
-import System.Random.MWC
-import System.IO.Unsafe
-
-import AI.Clustering.KMeans
-
-import Bench.Utils
-
-gen :: GenIO
-gen = unsafePerformIO createSystemRandom
+import           Criterion.Main
+import qualified Data.Matrix.Unboxed  as MU
+import qualified Data.Vector.Unboxed  as U
+import           Data.Word
+import           System.IO.Unsafe
+import           System.Random.MWC
+
+import           AI.Clustering.KMeans
+import           Bench.Utils
+
+gen :: U.Vector Word32
+gen = unsafePerformIO $ do
+    g <- createSystemRandom
+    fmap fromSeed $ save g
 
 dat :: MU.Matrix Double
 dat = unsafePerformIO $ fmap MU.fromRows $ randVectors 1000 10
@@ -21,11 +23,12 @@
 benchKMeans = bgroup "KMeans clustering"
     [ bgroup "AI.Clustering.KMeans"
         [ bench "k-means++ (n = 1000, k = 7)" $
-            whnfIO $ kmeans' gen KMeansPP 7 dat
+            whnf ( \x -> membership $ kmeans 7 x defaultKMeansOpts
+                { kmeansMethod = KMeansPP
+                , kmeansSeed = gen } ) dat
         , bench "forgy (n = 1000, k = 7)" $
-            whnfIO $ kmeans' gen Forgy 7 dat
+            whnf ( \x -> membership $ kmeans 7 x defaultKMeansOpts
+                { kmeansMethod = KMeansPP
+                , kmeansSeed = gen } ) dat
         ]
     ]
-
-kmeans' :: GenIO -> Method -> Int -> MU.Matrix Double -> IO (U.Vector Int)
-kmeans' g method k = fmap _clusters . kmeans g method k
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/clustering-0.2.1/clustering.cabal 
new/clustering-0.3.1/clustering.cabal
--- old/clustering-0.2.1/clustering.cabal       2015-06-28 23:44:21.000000000 
+0200
+++ new/clustering-0.3.1/clustering.cabal       2016-11-11 21:04:13.000000000 
+0100
@@ -1,8 +1,5 @@
--- Initial fastcluster.cabal generated by cabal init.  For further 
--- documentation, see http://haskell.org/cabal/users-guide/
-
 name:                clustering
-version:             0.2.1
+version:             0.3.1
 synopsis:            High performance clustering algorithms
 description:
   Following clutering methods are included in this library:
@@ -20,11 +17,10 @@
 copyright:           (c) 2015 Kai Zhang
 category:            Math
 build-type:          Simple
--- extra-source-files:  
 cabal-version:       >=1.10
 
 library
-  exposed-modules:     
+  exposed-modules:
     AI.Clustering.Hierarchical
     AI.Clustering.Hierarchical.Internal
     AI.Clustering.Hierarchical.Types
@@ -33,8 +29,6 @@
     AI.Clustering.KMeans.Types
     AI.Clustering.Utils
 
---  other-modules:       
-
   build-depends:
       base >=4.0 && <5.0
     , binary
@@ -43,6 +37,7 @@
     , mwc-random
     , parallel
     , primitive
+    , unordered-containers
     , vector
 
   hs-source-dirs:      src
@@ -59,7 +54,7 @@
     Test.Utils
 
   default-language:    Haskell2010
-  build-depends: 
+  build-depends:
       base
     , binary
     , mwc-random
@@ -84,7 +79,7 @@
     Bench.Utils
 
   default-language:    Haskell2010
-  build-depends: 
+  build-depends:
       base
     , criterion
     , mwc-random
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/clustering-0.2.1/src/AI/Clustering/KMeans/Internal.hs 
new/clustering-0.3.1/src/AI/Clustering/KMeans/Internal.hs
--- old/clustering-0.2.1/src/AI/Clustering/KMeans/Internal.hs   2015-06-28 
23:44:21.000000000 +0200
+++ new/clustering-0.3.1/src/AI/Clustering/KMeans/Internal.hs   2016-11-10 
09:03:18.000000000 +0100
@@ -1,17 +1,6 @@
-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BangPatterns     #-}
 {-# LANGUAGE FlexibleContexts #-}
---------------------------------------------------------------------------------
--- |
--- Module      :  AI.Clustering.KMeans.Internal
--- Copyright   :  (c) 2015 Kai Zhang
--- License     :  MIT
---
--- Maintainer  :  [email protected]
--- Stability   :  experimental
--- Portability :  portable
---
--- <module description starting at first column>
---------------------------------------------------------------------------------
+
 module AI.Clustering.KMeans.Internal
 {-# WARNING "To be used by developer only" #-}
     ( forgy
@@ -19,38 +8,39 @@
     , sumSquares
     ) where
 
-import Control.Monad (forM_)
-import Control.Monad.Primitive (PrimMonad, PrimState)
-import Data.List (nub)
-import qualified Data.Matrix.Unboxed as MU
-import qualified Data.Vector.Generic as G
-import qualified Data.Vector.Unboxed as U
-import qualified Data.Vector.Unboxed.Mutable as UM
-import System.Random.MWC (uniformR, Gen)
+import           Control.Monad.Primitive         (PrimMonad, PrimState)
+import qualified Data.HashSet                    as S
+import           Data.List                       (nub)
+import qualified Data.Matrix.Unboxed             as MU
+import qualified Data.Vector.Generic             as G
+import qualified Data.Vector.Unboxed             as U
+import           System.Random.MWC               (Gen, uniformR)
+import           System.Random.MWC.Distributions (categorical)
+
 
 forgy :: (PrimMonad m, G.Vector v a)
       => Gen (PrimState m)
-      -> Int                 -- number of clusters
-      -> v a                 -- data
-      -> (a -> U.Vector Double)
+      -> Int                 -- ^ The number of clusters
+      -> v a                   -- ^ Input data
+      -> (a -> U.Vector Double)  -- ^ Feature extraction function
       -> m (MU.Matrix Double)
 forgy g k dat fn | k > n = error "k is larger than sample size"
-                 | otherwise = iter
+                 | otherwise = loop
   where
-    iter = do
-        vec <- randN g k . U.enumFromN 0 $ n
-        let xs = map (\i -> fn $ dat `G.unsafeIndex` i) . U.toList $ vec
+    loop = do
+        vec <- uniformRN (0, n-1) k g
+        let xs = map (fn . G.unsafeIndex dat) vec
         if length (nub xs) == length xs
-           then return . MU.fromRows $ xs
-           else iter
+           then return $ MU.fromRows xs
+           else loop
     n = G.length dat
 {-# INLINE forgy #-}
 
 kmeansPP :: (PrimMonad m, G.Vector v a)
          => Gen (PrimState m)
-         -> Int
-         -> v a
-         -> (a -> U.Vector Double)
+         -> Int                     -- ^ The number of clusters
+         -> v a                     -- ^ Input data
+         -> (a -> U.Vector Double)  -- ^ Feature extraction function
          -> m (MU.Matrix Double)
 kmeansPP g k dat fn
     | k > n = error "k is larger than sample size"
@@ -59,44 +49,27 @@
         loop [c1] 1
   where
     loop centers !k'
-        | k' == k = return $ MU.fromRows $ map (\i -> fn $ dat `G.unsafeIndex` 
i) centers
+        | k' == k = return $ MU.fromRows $ map (fn . G.unsafeIndex dat) centers
         | otherwise = do
-            c' <- chooseWithProb g $ U.map (shortestDist centers) rowIndices
+            c' <- flip categorical g $ U.generate n $ \i -> minimum $
+                map (\c -> sumSquares (fn $ G.unsafeIndex dat i) (fn $ 
G.unsafeIndex dat c))
+                centers
             loop (c':centers) (k'+1)
-
     n = G.length dat
-    rowIndices = U.enumFromN 0 n
-    shortestDist centers x = minimum $ map (\i ->
-        sumSquares (fn $ dat `G.unsafeIndex` x) (fn $ dat `G.unsafeIndex` i)) 
centers
 {-# INLINE kmeansPP #-}
 
-chooseWithProb :: PrimMonad m
-               => Gen (PrimState m)
-               -> U.Vector Double    -- ^ weights, may not be normalized
-               -> m Int              -- ^ result/index
-chooseWithProb g ws = do
-    x <- uniformR (0,sum') g
-    return $ loop x 0 0
-  where
-    loop v !cdf !i | cdf' >= v = i
-                   | otherwise = loop v cdf' (i+1)
-      where cdf' = cdf + ws `U.unsafeIndex` i
-
-    sum' = U.sum ws
-{-# INLINE chooseWithProb #-}
-
--- | Random select k samples from a population
-randN :: PrimMonad m => Gen (PrimState m) -> Int -> U.Vector Int -> m 
(U.Vector Int)
-randN g k xs = do
-    v <- U.thaw xs
-    forM_ [0..k-1] $ \i -> do
-        j <- uniformR (i, lst) g
-        UM.unsafeSwap v i j
-    U.unsafeFreeze . UM.take k $ v
-  where
-    lst = U.length xs - 1
-{-# INLINE randN #-}
-
 sumSquares :: U.Vector Double -> U.Vector Double -> Double
 sumSquares xs = U.sum . U.zipWith (\x y -> (x - y)**2) xs
 {-# INLINE sumSquares #-}
+
+-- | Generate N non-duplicated uniformly distributed random variables in a 
given range.
+uniformRN :: PrimMonad m => (Int, Int) -> Int -> Gen (PrimState m) -> m [Int]
+uniformRN (lo, hi) n g | hi - lo + 1 < n = error "Range is too narrow!"
+                       | otherwise = loop S.empty
+  where
+    loop m | S.size m >= n = return $ S.toList m
+           | otherwise = do
+               x <- uniformR (lo,hi) g
+               if x `S.member` m
+                   then loop m
+                   else loop $ S.insert x m
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/clustering-0.2.1/src/AI/Clustering/KMeans/Types.hs 
new/clustering-0.3.1/src/AI/Clustering/KMeans/Types.hs
--- old/clustering-0.2.1/src/AI/Clustering/KMeans/Types.hs      2015-06-28 
23:44:21.000000000 +0200
+++ new/clustering-0.3.1/src/AI/Clustering/KMeans/Types.hs      2016-11-10 
18:21:34.000000000 +0100
@@ -11,19 +11,36 @@
 -- <module description starting at first column>
 
--------------------------------------------------------------------------------
 module AI.Clustering.KMeans.Types
-    ( KMeans(..)
+    ( KMeansOpts(..)
+    , defaultKMeansOpts
+    , KMeans(..)
     , Method(..)
     ) where
 
 import qualified Data.Matrix.Unboxed as MU
 import qualified Data.Vector.Unboxed as U
+import Data.Word (Word32)
+
+data KMeansOpts = KMeansOpts
+    { kmeansMethod :: Method
+    , kmeansSeed :: (U.Vector Word32)   -- ^ Seed for random number generation
+    , kmeansClusters :: Bool   -- ^ Wether to return clusters, may use a lot 
memory
+    }
+
+defaultKMeansOpts :: KMeansOpts
+defaultKMeansOpts = KMeansOpts
+    { kmeansMethod = KMeansPP
+    , kmeansSeed = U.fromList [1,2,3,4,5,6,7]
+    , kmeansClusters = True
+    }
 
 -- | Results from running kmeans
-data KMeans = KMeans
-    { _clusters :: U.Vector Int     -- ^ A vector of integers (0 ~ k-1)
+data KMeans a = KMeans
+    { membership :: U.Vector Int     -- ^ A vector of integers (0 ~ k-1)
                                     -- indicating the cluster to which each
                                     -- point is allocated.
-    , _centers :: MU.Matrix Double  -- ^ A matrix of cluster centers.
+    , centers :: MU.Matrix Double  -- ^ A matrix of cluster centers.
+    , clusters :: Maybe [[a]]
     } deriving (Show)
 
 -- | Different initialization methods
@@ -31,3 +48,4 @@
                        -- observations from the data set and uses these
                        -- as the initial means.
             | KMeansPP -- ^ K-means++ algorithm.
+            | Centers (MU.Matrix Double)   -- ^ Provide a set of k centroids
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/clustering-0.2.1/src/AI/Clustering/KMeans.hs 
new/clustering-0.3.1/src/AI/Clustering/KMeans.hs
--- old/clustering-0.2.1/src/AI/Clustering/KMeans.hs    2015-06-28 
23:44:21.000000000 +0200
+++ new/clustering-0.3.1/src/AI/Clustering/KMeans.hs    2016-11-11 
20:59:09.000000000 +0100
@@ -1,28 +1,15 @@
 {-# LANGUAGE FlexibleContexts #-}
---------------------------------------------------------------------------------
--- |
--- Module      :  AI.Clustering.KMeans
--- Copyright   :  (c) 2015 Kai Zhang
--- License     :  MIT
--- Maintainer  :  [email protected]
--- Stability   :  experimental
--- Portability :  portable
---
--- Kmeans clustering
---------------------------------------------------------------------------------
+
 module AI.Clustering.KMeans
     ( KMeans(..)
+    , KMeansOpts(..)
+    , defaultKMeansOpts
     , kmeans
     , kmeansBy
-    , kmeansWith
 
     -- * Initialization methods
     , Method(..)
 
-    -- * Useful functions
-    , decode
-    , withinSS
-
     -- * References
     -- $references
     ) where
@@ -30,7 +17,6 @@
 import Control.Monad (forM_)
 import Control.Monad.Primitive (PrimMonad, PrimState)
 import qualified Data.Matrix.Unboxed as MU
-import qualified Data.Matrix.Generic as MG
 import qualified Data.Matrix.Unboxed.Mutable as MM
 import Data.Ord (comparing)
 import qualified Data.Vector as V
@@ -39,46 +25,63 @@
 import qualified Data.Vector.Unboxed as U
 import qualified Data.Vector.Unboxed.Mutable as UM
 import Data.List (minimumBy, foldl')
-import System.Random.MWC (Gen)
+import System.Random.MWC (Gen, initialize)
+import Control.Monad.ST (runST)
 
-import AI.Clustering.KMeans.Types (KMeans(..), Method(..))
+import AI.Clustering.KMeans.Types
 import AI.Clustering.KMeans.Internal (sumSquares, forgy, kmeansPP)
 
 -- | Perform K-means clustering
-kmeans :: (PrimMonad m, MG.Matrix mat U.Vector Double)
-       => Gen (PrimState m)
-       -> Method
-       -> Int
-       -> mat U.Vector Double
-       -> m KMeans
-kmeans g method k mat = kmeansBy g method k dat (MG.takeRow mat)
+kmeans :: Int                -- ^ The number of clusters
+       -> MU.Matrix Double   -- ^ Input data stored as rows in a matrix
+       -> KMeansOpts
+       -> KMeans (U.Vector Double)
+kmeans k mat opts = KMeans member cs grps
   where
-    dat = U.enumFromN 0 $ MG.rows mat
+    (member, cs) = kmeans' initial dat fn
+    grps = if kmeansClusters opts
+        then Just $ decode member $ MU.toRows mat
+        else Nothing
+    dat = U.enumFromN 0 $ MU.rows mat
+    fn = MU.takeRow mat
+    initial = runST $ do
+        gen <- initialize $ kmeansSeed opts
+        case kmeansMethod opts of
+            Forgy -> forgy gen k dat fn
+            KMeansPP -> kmeansPP gen k dat fn
+            Centers c -> return c
 {-# INLINE kmeans #-}
 
--- | K-means algorithm
-kmeansBy :: (PrimMonad m, G.Vector v a)
-         => Gen (PrimState m)
-         -> Method
-         -> Int                   -- ^ number of clusters
-         -> v a                   -- ^ data stores in rows
+-- | Perform K-means clustering, using a feature extraction function
+kmeansBy :: G.Vector v a
+         => Int                   -- ^ The number of clusters
+         -> v a                   -- ^ Input data
          -> (a -> U.Vector Double)
-         -> m KMeans
-kmeansBy g method k dat fn = do
-    initial <- case method of
-        Forgy -> forgy g k dat fn
-        KMeansPP -> kmeansPP g k dat fn
-    return $ kmeansWith initial dat fn
+         -> KMeansOpts
+         -> KMeans a
+kmeansBy k dat fn opts = KMeans member cs grps
+  where
+    (member, cs) = kmeans' initial dat fn
+    grps = if kmeansClusters opts
+        then Just $ decode member $ G.toList dat
+        else Nothing
+    initial = runST $ do
+        gen <- initialize $ kmeansSeed opts
+        case kmeansMethod opts of
+            Forgy -> forgy gen k dat fn
+            KMeansPP -> kmeansPP gen k dat fn
+            Centers c -> return c
 {-# INLINE kmeansBy #-}
 
 -- | K-means algorithm
-kmeansWith :: G.Vector v a
-           => MU.Matrix Double   -- ^ initial set of k centroids
-           -> v a                -- ^ each row represents a point
-           -> (a -> U.Vector Double)
-           -> KMeans
-kmeansWith initial dat fn | d /= MU.cols initial || k > n = error "check input"
-                          | otherwise = KMeans member centers
+kmeans' :: G.Vector v a
+        => MU.Matrix Double         -- ^ Initial set of k centroids
+        -> v a                      -- ^ Input data
+        -> (a -> U.Vector Double)   -- ^ Feature extraction function
+        -> (U.Vector Int, MU.Matrix Double)
+kmeans' initial dat fn
+    | U.length (fn $ G.head dat) /= d = error "Dimension mismatched."
+    | otherwise = (member, centers)
   where
     (member, centers) = loop initial U.empty
     loop means membership
@@ -109,34 +112,35 @@
             forM_ [0..d-1] $ \j ->
                 MM.unsafeRead m (i,j) >>= MM.unsafeWrite m (i,j) . 
(/fromIntegral c)
         return m
-
     n = G.length dat
     k = MU.rows initial
     d = MU.cols initial
-{-# INLINE kmeansWith #-}
+{-# INLINE kmeans' #-}
 
 -- | Assign data to clusters based on KMeans result
-decode :: KMeans -> [a] -> [[a]]
-decode result xs = V.toList $ V.create $ do
-    v <- VM.replicate n [] 
-    forM_ (zip (U.toList membership) xs) $ \(i,x) ->
+decode :: U.Vector Int -> [a] -> [[a]]
+decode member xs = V.toList $ V.create $ do
+    v <- VM.replicate n []
+    forM_ (zip (U.toList member) xs) $ \(i,x) ->
         VM.unsafeRead v i >>= VM.unsafeWrite v i . (x:)
     return v
   where
-    membership = _clusters result
-    n = U.maximum membership + 1
+    n = U.maximum member + 1
+{-# INLINE decode #-}
 
--- | Compute within-cluster sum of squares
+{-
+-- Compute within-cluster sum of squares
 withinSS :: KMeans -> MU.Matrix Double -> [Double]
 withinSS result mat = zipWith f (decode result [0 .. MU.rows mat-1]) .
                           MU.toRows . _centers $ result
   where
     f c center = foldl' (+) 0 $ map (sumSquares center . MU.takeRow mat) c
+    -}
 
 
 -- $references
 --
--- Arthur, D. and Vassilvitskii, S. (2007). k-means++: the advantages of 
careful 
--- seeding. Proceedings of the eighteenth annual ACM-SIAM symposium on 
Discrete 
--- algorithms. Society for Industrial and Applied Mathematics Philadelphia, 
PA, 
+-- Arthur, D. and Vassilvitskii, S. (2007). k-means++: the advantages of 
careful
+-- seeding. Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete
+-- algorithms. Society for Industrial and Applied Mathematics Philadelphia, PA,
 -- USA. pp. 1027–1035.


Reply via email to