OK, so it is BSD licensed. I'm not entirely sure how that should be handled. I would suggest asking on legal-discuss@ by saying: In Mahout, we wish to use one method from a library that is BSD licensed. How should we include it in our code? Do we make a really small JAR file containing just that code or can we just include the method in some class of ours and declare that method as BSD licensed? Or, do we need to include the whole JAR?

-Grant

On Mar 3, 2009, at 10:53 PM, Jeff Eastman wrote:

Here's the new UncommonDistributions class, which seems to work. I reworked the gamma method from the Blog distribution and included its copyright.

Jeff

package org.apache.mahout.clustering.dirichlet;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.uncommons.maths.random.GaussianGenerator;

public class UncommonDistributions implements Distributions {

static final double sqrt2pi = Math.sqrt(2 * Math.PI);

static final Random random = new Random();

public UncommonDistributions() {
  super();
}

/**
 * Initialize the random number generator
 */
public static void init() {
}

/* Copyright (c) 2005, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
*   notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
*   notice, this list of conditions and the following disclaimer in
*   the documentation and/or other materials provided with the
*   distribution.  *
* * Neither the name of the University of California, Berkeley nor
*   the names of its contributors may be used to endorse or promote
*   products derived from this software without specific prior
*   written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
 * Returns a double sampled according to this distribution.  Uniformly
* fast for all k > 0. (Reference: Non-Uniform Random Variate Generation, * Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Uses Cheng's * rejection algorithm (GB) for k>=1, rejection from Weibull distribution
 * for 0 < k < 1.
 */
public double gamma(double k, double lambda) {
  boolean accept = false;
  if (k >= 1) {
    //Cheng's algorithm
    double b = (k - Math.log(4));
    double c = (k + Math.sqrt(2 * k - 1));
    double lam = Math.sqrt(2 * k - 1);
    double cheng = (1 + Math.log(4.5));
    double u, v, x, y, z, r;
    do {
      u = random.nextDouble();
      v = random.nextDouble();
      y = ((1 / lam) * Math.log(v / (1 - v)));
      x = (k * Math.exp(y));
      z = (u * v * v);
      r = (b + (c * y) - x);
      if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) {
        accept = true;
      }
    } while (!accept);
    return new Double(x / lambda);
  } else {
    //Weibull algorithm
    double c = (1 / k);
    double d = ((1 - k) * Math.pow(k, (k / (1 - k))));
    double u, v, z, e, x;
    do {
      u = random.nextDouble();
      v = random.nextDouble();
      z = -Math.log(u); //generating random exponential variates
      e = -Math.log(v);
      x = Math.pow(z, c);
      if ((z + e) >= (d + x)) {
        accept = true;
      }
    } while (!accept);
    return new Double(x / lambda);
  }
}

/**
 * Returns a random sample from a beta distribution with
 * the given shapes
 *
 * @param shape1 a double representing shape1
 * @param shape2 a double representing shape2
 * @return a Vector of samples
 */
public double rbeta(double shape1, double shape2) {
  double gam1 = gamma(1, shape1);
  double gam2 = gamma(1, shape2);
  return gam1 / (gam1 + gam2);

}

/**
 * Returns a vector of random samples from a beta distribution with
 * the given shapes
 *
 * @param K the number of samples to return
 * @param shape1 a double representing shape1
 * @param shape2 a double representing shape2
 * @return a Vector of samples
 */
public Vector rbeta(int K, double shape1, double shape2) {
  List<Double> params = new ArrayList<Double>(2);
  params.add(shape1);
  params.add(Math.max(0, shape2));
  Vector result = new DenseVector(K);
  for (int i = 0; i < K; i++)
    result.set(i, rbeta(shape1, shape2));
  return result;
}

/**
* Return a random sample from the chi-squared (chi^2) distribution with df
 * degrees of freedom.
 * @param df
 * @return a double sample
 */
public double rchisq(double df) {
  double result = 0;
  for (int i = 0; i < df; i++) {
    double sample = rnorm(0, 1);
    result += sample * sample;
  }
  return result;
}

/**
* Return a random value from a normal distribution with the given mean and
 * standard deviation
 *
 * @param mean a double mean value
 * @param sd a double standard deviation
 * @return a double sample
 */
public double rnorm(double mean, double sd) {
  GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
  return dist.nextValue();
}

/**
 * Return the normal density function value for the sample x
 *
 * pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
 *   *
 * @param x a double sample value
 * @param m a double mean value
 * @param s a double standard deviation
 * @return a double probability value
 */
public double dnorm(double x, double m, double s) {
  double xms = (x - m) / s;
  double ex = (xms * xms) / 2;
  double exp = Math.exp(-ex);
  double result = exp / (sqrt2pi * s);
  return result;
}

/**
* Returns one sample from a multinomial.
*/
public int rmultinom(Vector probabilities) {
  // our probability argument may not be normalized.
  double total = probabilities.zSum();
  double nextDouble = random.nextDouble();
  double p = nextDouble * total;
  for (int i = 0; i < probabilities.cardinality(); i++) {
    double p_i = probabilities.get(i);
    if (p < p_i) {
      return i;
    } else {
      p -= p_i;
    }
  }
// can't happen except for round-off error so we don't care what we return here
  return 0;
}

/**
 * Returns a multinomial vector sampled from the given probabilities
 *
 * rmultinom should be implemented as successive binomial sampling.
 *
 *Keep a normalizing amount that starts with 1 (I call it total).
 *
 * For each i
 *  k[i] = rbinom(p[i] / total, size);
 *  total -= p[i];
 *  size -= k[i];
 *
 * @param size the size parameter of the binomial distribution
 * @param probabilities a Vector of probabilities
 *
 * @return a multinomial distribution Vector
 */
public Vector rmultinom(int size, Vector probabilities) {
  // our probability argument may not be normalized.
  double total = probabilities.zSum();
  int cardinality = probabilities.cardinality();
  Vector result = new DenseVector(cardinality);
  for (int i = 0; total > 0 && i < cardinality; i++) {
    double p = probabilities.get(i);
    int ki = rbinomial(size, p / total);
    total -= p;
    size -= ki;
    result.set(i, ki);
  }
  return result;
}

/**
* Returns an integer sampled according to this distribution. Takes time
 * proprotional to np + 1.  (Reference: Non-Uniform Random Variate
 * Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)
 * Second time-waiting algorithm.
 */
public int rbinomial(int n, double p) {
  if (p >= 1)
    return n; // needed to avoid infinite loops and negative results
  double q = -Math.log(1 - p);
  double sum = 0;
  int x = 0;
  double u, e;
  while (sum <= q) {
    u = random.nextDouble();
    e = -Math.log(u); //exponential random variate
    sum += (e / (n - x));
    x += 1;
  }
  if (x == 0)
    return 0;
  return x - 1;
}

}


--------------------------
Grant Ingersoll
http://www.lucidimagination.com/

Search the Lucene ecosystem (Lucene/Solr/Nutch/Mahout/Tika/Droids) using Solr/Lucene:
http://www.lucidimagination.com/search

Reply via email to