Here's the new UncommonDistributions class, which seems to work. I reworked the gamma method from the Blog distribution and included its copyright.

Jeff

package org.apache.mahout.clustering.dirichlet;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.uncommons.maths.random.GaussianGenerator;

public class UncommonDistributions implements Distributions {

 static final double sqrt2pi = Math.sqrt(2 * Math.PI);

 static final Random random = new Random();

 public UncommonDistributions() {
   super();
 }

 /**
  * Initialize the random number generator
  */
 public static void init() {
 }

/* Copyright (c) 2005, Regents of the University of California
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in
 *   the documentation and/or other materials provided with the
* distribution. *
 * * Neither the name of the University of California, Berkeley nor
 *   the names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 /**
  * Returns a double sampled according to this distribution.  Uniformly
  * fast for all k > 0.  (Reference: Non-Uniform Random Variate Generation,
  * Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)  Uses Cheng's
  * rejection algorithm (GB) for k>=1, rejection from Weibull distribution
  * for 0 < k < 1.
  */
 public double gamma(double k, double lambda) {
   boolean accept = false;
   if (k >= 1) {
     //Cheng's algorithm
     double b = (k - Math.log(4));
     double c = (k + Math.sqrt(2 * k - 1));
     double lam = Math.sqrt(2 * k - 1);
     double cheng = (1 + Math.log(4.5));
     double u, v, x, y, z, r;
     do {
       u = random.nextDouble();
       v = random.nextDouble();
       y = ((1 / lam) * Math.log(v / (1 - v)));
       x = (k * Math.exp(y));
       z = (u * v * v);
       r = (b + (c * y) - x);
       if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) {
         accept = true;
       }
     } while (!accept);
     return new Double(x / lambda);
   } else {
     //Weibull algorithm
     double c = (1 / k);
     double d = ((1 - k) * Math.pow(k, (k / (1 - k))));
     double u, v, z, e, x;
     do {
       u = random.nextDouble();
       v = random.nextDouble();
       z = -Math.log(u); //generating random exponential variates
       e = -Math.log(v);
       x = Math.pow(z, c);
       if ((z + e) >= (d + x)) {
         accept = true;
       }
     } while (!accept);
     return new Double(x / lambda);
   }
 }

 /**
  * Returns a random sample from a beta distribution with
  * the given shapes
  *
  * @param shape1 a double representing shape1
  * @param shape2 a double representing shape2
  * @return a Vector of samples
  */
 public double rbeta(double shape1, double shape2) {
   double gam1 = gamma(1, shape1);
   double gam2 = gamma(1, shape2);
   return gam1 / (gam1 + gam2);

 }

 /**
  * Returns a vector of random samples from a beta distribution with
  * the given shapes
  *
  * @param K the number of samples to return
  * @param shape1 a double representing shape1
  * @param shape2 a double representing shape2
  * @return a Vector of samples
  */
 public Vector rbeta(int K, double shape1, double shape2) {
   List<Double> params = new ArrayList<Double>(2);
   params.add(shape1);
   params.add(Math.max(0, shape2));
   Vector result = new DenseVector(K);
   for (int i = 0; i < K; i++)
     result.set(i, rbeta(shape1, shape2));
   return result;
 }

 /**
* Return a random sample from the chi-squared (chi^2) distribution with df
  * degrees of freedom.
  * @param df
  * @return a double sample
  */
 public double rchisq(double df) {
   double result = 0;
   for (int i = 0; i < df; i++) {
     double sample = rnorm(0, 1);
     result += sample * sample;
   }
   return result;
 }

 /**
* Return a random value from a normal distribution with the given mean and
  * standard deviation
  *
  * @param mean a double mean value
  * @param sd a double standard deviation
  * @return a double sample
  */
 public double rnorm(double mean, double sd) {
   GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
   return dist.nextValue();
 }

 /**
  * Return the normal density function value for the sample x
  *
  * pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
* *
  * @param x a double sample value
  * @param m a double mean value
  * @param s a double standard deviation
  * @return a double probability value
  */
 public double dnorm(double x, double m, double s) {
   double xms = (x - m) / s;
   double ex = (xms * xms) / 2;
   double exp = Math.exp(-ex);
   double result = exp / (sqrt2pi * s);
   return result;
 }

 /**
 * Returns one sample from a multinomial.
 */
 public int rmultinom(Vector probabilities) {
   // our probability argument may not be normalized.
   double total = probabilities.zSum();
   double nextDouble = random.nextDouble();
   double p = nextDouble * total;
   for (int i = 0; i < probabilities.cardinality(); i++) {
     double p_i = probabilities.get(i);
     if (p < p_i) {
       return i;
     } else {
       p -= p_i;
     }
   }
// can't happen except for round-off error so we don't care what we return here
   return 0;
 }

 /**
  * Returns a multinomial vector sampled from the given probabilities
  *
  * rmultinom should be implemented as successive binomial sampling.
  *
  *Keep a normalizing amount that starts with 1 (I call it total).
  *
  * For each i
  *  k[i] = rbinom(p[i] / total, size);
  *  total -= p[i];
  *  size -= k[i];
  *
  * @param size the size parameter of the binomial distribution
  * @param probabilities a Vector of probabilities
  *
  * @return a multinomial distribution Vector
  */
 public Vector rmultinom(int size, Vector probabilities) {
   // our probability argument may not be normalized.
   double total = probabilities.zSum();
   int cardinality = probabilities.cardinality();
   Vector result = new DenseVector(cardinality);
   for (int i = 0; total > 0 && i < cardinality; i++) {
     double p = probabilities.get(i);
     int ki = rbinomial(size, p / total);
     total -= p;
     size -= ki;
     result.set(i, ki);
   }
   return result;
 }

 /**
  * Returns an integer sampled according to this distribution.  Takes time
  * proprotional to np + 1.  (Reference: Non-Uniform Random Variate
  * Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)
  * Second time-waiting algorithm.
  */
 public int rbinomial(int n, double p) {
   if (p >= 1)
     return n; // needed to avoid infinite loops and negative results
   double q = -Math.log(1 - p);
   double sum = 0;
   int x = 0;
   double u, e;
   while (sum <= q) {
     u = random.nextDouble();
     e = -Math.log(u); //exponential random variate
     sum += (e / (n - x));
     x += 1;
   }
   if (x == 0)
     return 0;
   return x - 1;
 }

}

Attachment: PGP.sig
Description: PGP signature

Reply via email to