Jeff
package org.apache.mahout.clustering.dirichlet; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.mahout.matrix.DenseVector; import org.apache.mahout.matrix.Vector; import org.uncommons.maths.random.GaussianGenerator; public class UncommonDistributions implements Distributions { static final double sqrt2pi = Math.sqrt(2 * Math.PI); static final Random random = new Random(); public UncommonDistributions() { super(); } /** * Initialize the random number generator */ public static void init() { } /* Copyright (c) 2005, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the* distribution. *
* * Neither the name of the University of California, Berkeley nor * the names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Returns a double sampled according to this distribution. Uniformly * fast for all k > 0. (Reference: Non-Uniform Random Variate Generation, * Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Uses Cheng's * rejection algorithm (GB) for k>=1, rejection from Weibull distribution * for 0 < k < 1. */ public double gamma(double k, double lambda) { boolean accept = false; if (k >= 1) { //Cheng's algorithm double b = (k - Math.log(4)); double c = (k + Math.sqrt(2 * k - 1)); double lam = Math.sqrt(2 * k - 1); double cheng = (1 + Math.log(4.5)); double u, v, x, y, z, r; do { u = random.nextDouble(); v = random.nextDouble(); y = ((1 / lam) * Math.log(v / (1 - v))); x = (k * Math.exp(y)); z = (u * v * v); r = (b + (c * y) - x); if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) { accept = true; } } while (!accept); return new Double(x / lambda); } else { //Weibull algorithm double c = (1 / k); double d = ((1 - k) * Math.pow(k, (k / (1 - k)))); double u, v, z, e, x; do { u = random.nextDouble(); v = random.nextDouble(); z = -Math.log(u); //generating random exponential variates e = -Math.log(v); x = Math.pow(z, c); if ((z + e) >= (d + x)) { accept = true; } } while (!accept); return new Double(x / lambda); } } /** * Returns a random sample from a beta distribution with * the given shapes * * @param shape1 a double representing shape1 * @param shape2 a double representing shape2 * @return a Vector of samples */ public double rbeta(double shape1, double shape2) { double gam1 = gamma(1, shape1); double gam2 = gamma(1, shape2); return gam1 / (gam1 + gam2); } /** * Returns a vector of random samples from a beta distribution with * the given shapes * * @param K the number of samples to return * @param shape1 a double representing shape1 * @param shape2 a double representing shape2 * @return a Vector of samples */ public Vector rbeta(int K, double shape1, double shape2) { List<Double> params = new ArrayList<Double>(2); params.add(shape1); params.add(Math.max(0, shape2)); Vector result = new DenseVector(K); for (int i = 0; i < K; i++) result.set(i, rbeta(shape1, shape2)); return result; } /*** Return a random sample from the chi-squared (chi^2) distribution with df
* degrees of freedom.
* @param df
* @return a double sample
*/
public double rchisq(double df) {
double result = 0;
for (int i = 0; i < df; i++) {
double sample = rnorm(0, 1);
result += sample * sample;
}
return result;
}
/**
* Return a random value from a normal distribution with the given
mean and
* standard deviation
*
* @param mean a double mean value
* @param sd a double standard deviation
* @return a double sample
*/
public double rnorm(double mean, double sd) {
GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
return dist.nextValue();
}
/**
* Return the normal density function value for the sample x
*
* pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
*
*
* @param x a double sample value
* @param m a double mean value
* @param s a double standard deviation
* @return a double probability value
*/
public double dnorm(double x, double m, double s) {
double xms = (x - m) / s;
double ex = (xms * xms) / 2;
double exp = Math.exp(-ex);
double result = exp / (sqrt2pi * s);
return result;
}
/**
* Returns one sample from a multinomial.
*/
public int rmultinom(Vector probabilities) {
// our probability argument may not be normalized.
double total = probabilities.zSum();
double nextDouble = random.nextDouble();
double p = nextDouble * total;
for (int i = 0; i < probabilities.cardinality(); i++) {
double p_i = probabilities.get(i);
if (p < p_i) {
return i;
} else {
p -= p_i;
}
}
// can't happen except for round-off error so we don't care what we
return here
return 0;
}
/**
* Returns a multinomial vector sampled from the given probabilities
*
* rmultinom should be implemented as successive binomial sampling.
*
*Keep a normalizing amount that starts with 1 (I call it total).
*
* For each i
* k[i] = rbinom(p[i] / total, size);
* total -= p[i];
* size -= k[i];
*
* @param size the size parameter of the binomial distribution
* @param probabilities a Vector of probabilities
*
* @return a multinomial distribution Vector
*/
public Vector rmultinom(int size, Vector probabilities) {
// our probability argument may not be normalized.
double total = probabilities.zSum();
int cardinality = probabilities.cardinality();
Vector result = new DenseVector(cardinality);
for (int i = 0; total > 0 && i < cardinality; i++) {
double p = probabilities.get(i);
int ki = rbinomial(size, p / total);
total -= p;
size -= ki;
result.set(i, ki);
}
return result;
}
/**
* Returns an integer sampled according to this distribution. Takes time
* proprotional to np + 1. (Reference: Non-Uniform Random Variate
* Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)
* Second time-waiting algorithm.
*/
public int rbinomial(int n, double p) {
if (p >= 1)
return n; // needed to avoid infinite loops and negative results
double q = -Math.log(1 - p);
double sum = 0;
int x = 0;
double u, e;
while (sum <= q) {
u = random.nextDouble();
e = -Math.log(u); //exponential random variate
sum += (e / (n - x));
x += 1;
}
if (x == 0)
return 0;
return x - 1;
}
}
PGP.sig
Description: PGP signature
