Here's the new UncommonDistributions class, which seems to work. I
reworked the gamma method from the Blog distribution and included
its copyright.
Jeff
package org.apache.mahout.clustering.dirichlet;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License,
Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.uncommons.maths.random.GaussianGenerator;
public class UncommonDistributions implements Distributions {
static final double sqrt2pi = Math.sqrt(2 * Math.PI);
static final Random random = new Random();
public UncommonDistributions() {
super();
}
/**
* Initialize the random number generator
*/
public static void init() {
}
/* Copyright (c) 2005, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution. *
* * Neither the name of the University of California, Berkeley nor
* the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Returns a double sampled according to this distribution. Uniformly
* fast for all k > 0. (Reference: Non-Uniform Random Variate
Generation,
* Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Uses
Cheng's
* rejection algorithm (GB) for k>=1, rejection from Weibull
distribution
* for 0 < k < 1.
*/
public double gamma(double k, double lambda) {
boolean accept = false;
if (k >= 1) {
//Cheng's algorithm
double b = (k - Math.log(4));
double c = (k + Math.sqrt(2 * k - 1));
double lam = Math.sqrt(2 * k - 1);
double cheng = (1 + Math.log(4.5));
double u, v, x, y, z, r;
do {
u = random.nextDouble();
v = random.nextDouble();
y = ((1 / lam) * Math.log(v / (1 - v)));
x = (k * Math.exp(y));
z = (u * v * v);
r = (b + (c * y) - x);
if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) {
accept = true;
}
} while (!accept);
return new Double(x / lambda);
} else {
//Weibull algorithm
double c = (1 / k);
double d = ((1 - k) * Math.pow(k, (k / (1 - k))));
double u, v, z, e, x;
do {
u = random.nextDouble();
v = random.nextDouble();
z = -Math.log(u); //generating random exponential variates
e = -Math.log(v);
x = Math.pow(z, c);
if ((z + e) >= (d + x)) {
accept = true;
}
} while (!accept);
return new Double(x / lambda);
}
}
/**
* Returns a random sample from a beta distribution with
* the given shapes
*
* @param shape1 a double representing shape1
* @param shape2 a double representing shape2
* @return a Vector of samples
*/
public double rbeta(double shape1, double shape2) {
double gam1 = gamma(1, shape1);
double gam2 = gamma(1, shape2);
return gam1 / (gam1 + gam2);
}
/**
* Returns a vector of random samples from a beta distribution with
* the given shapes
*
* @param K the number of samples to return
* @param shape1 a double representing shape1
* @param shape2 a double representing shape2
* @return a Vector of samples
*/
public Vector rbeta(int K, double shape1, double shape2) {
List<Double> params = new ArrayList<Double>(2);
params.add(shape1);
params.add(Math.max(0, shape2));
Vector result = new DenseVector(K);
for (int i = 0; i < K; i++)
result.set(i, rbeta(shape1, shape2));
return result;
}
/**
* Return a random sample from the chi-squared (chi^2) distribution
with df
* degrees of freedom.
* @param df
* @return a double sample
*/
public double rchisq(double df) {
double result = 0;
for (int i = 0; i < df; i++) {
double sample = rnorm(0, 1);
result += sample * sample;
}
return result;
}
/**
* Return a random value from a normal distribution with the given
mean and
* standard deviation
*
* @param mean a double mean value
* @param sd a double standard deviation
* @return a double sample
*/
public double rnorm(double mean, double sd) {
GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
return dist.nextValue();
}
/**
* Return the normal density function value for the sample x
*
* pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
* *
* @param x a double sample value
* @param m a double mean value
* @param s a double standard deviation
* @return a double probability value
*/
public double dnorm(double x, double m, double s) {
double xms = (x - m) / s;
double ex = (xms * xms) / 2;
double exp = Math.exp(-ex);
double result = exp / (sqrt2pi * s);
return result;
}
/**
* Returns one sample from a multinomial.
*/
public int rmultinom(Vector probabilities) {
// our probability argument may not be normalized.
double total = probabilities.zSum();
double nextDouble = random.nextDouble();
double p = nextDouble * total;
for (int i = 0; i < probabilities.cardinality(); i++) {
double p_i = probabilities.get(i);
if (p < p_i) {
return i;
} else {
p -= p_i;
}
}
// can't happen except for round-off error so we don't care what
we return here
return 0;
}
/**
* Returns a multinomial vector sampled from the given probabilities
*
* rmultinom should be implemented as successive binomial sampling.
*
*Keep a normalizing amount that starts with 1 (I call it total).
*
* For each i
* k[i] = rbinom(p[i] / total, size);
* total -= p[i];
* size -= k[i];
*
* @param size the size parameter of the binomial distribution
* @param probabilities a Vector of probabilities
*
* @return a multinomial distribution Vector
*/
public Vector rmultinom(int size, Vector probabilities) {
// our probability argument may not be normalized.
double total = probabilities.zSum();
int cardinality = probabilities.cardinality();
Vector result = new DenseVector(cardinality);
for (int i = 0; total > 0 && i < cardinality; i++) {
double p = probabilities.get(i);
int ki = rbinomial(size, p / total);
total -= p;
size -= ki;
result.set(i, ki);
}
return result;
}
/**
* Returns an integer sampled according to this distribution. Takes
time
* proprotional to np + 1. (Reference: Non-Uniform Random Variate
* Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)
* Second time-waiting algorithm.
*/
public int rbinomial(int n, double p) {
if (p >= 1)
return n; // needed to avoid infinite loops and negative results
double q = -Math.log(1 - p);
double sum = 0;
int x = 0;
double u, e;
while (sum <= q) {
u = random.nextDouble();
e = -Math.log(u); //exponential random variate
sum += (e / (n - x));
x += 1;
}
if (x == 0)
return 0;
return x - 1;
}
}