http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/math/Polynomial.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/math/Polynomial.java b/core/src/main/java/org/apache/mahout/math/jet/math/Polynomial.java new file mode 100644 index 0000000..723e7d0 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/math/Polynomial.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.math; + +/** + * Polynomial functions. + */ +public final class Polynomial { + + private Polynomial() { + } + + /** + * Evaluates the given polynomial of degree <tt>N</tt> at <tt>x</tt>, assuming coefficient of N is 1.0. Otherwise same + * as <tt>polevl()</tt>. + * <pre> + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * where C = 1 and hence is omitted from the array. + * N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N-1] = C . + * N-1 0 + * + * Calling arguments are otherwise the same as polevl(). + * </pre> + * In the interest of speed, there are no checks for out of bounds arithmetic. + * + * @param x argument to the polynomial. + * @param coef the coefficients of the polynomial. + * @param N the degree of the polynomial. + */ + public static double p1evl(double x, double[] coef, int N) { + + double ans = x + coef[0]; + + for (int i = 1; i < N; i++) { + ans = ans * x + coef[i]; + } + + return ans; + } + + /** + * Evaluates the given polynomial of degree <tt>N</tt> at <tt>x</tt>. + * <pre> + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * </pre> + * In the interest of speed, there are no checks for out of bounds arithmetic. + * + * @param x argument to the polynomial. + * @param coef the coefficients of the polynomial. + * @param N the degree of the polynomial. + */ + public static double polevl(double x, double[] coef, int N) { + double ans = coef[0]; + + for (int i = 1; i <= N; i++) { + ans = ans * x + coef[i]; + } + + return ans; + } +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/math/package-info.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/math/package-info.java b/core/src/main/java/org/apache/mahout/math/jet/math/package-info.java new file mode 100644 index 0000000..3cda850 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/math/package-info.java @@ -0,0 +1,5 @@ +/** + * Tools for basic and advanced mathematics: Arithmetics and Algebra, Polynomials and Chebyshev series, Bessel and Airy + * functions, Function Objects for generic function evaluation, etc. + */ +package org.apache.mahout.math.jet.math; http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/AbstractContinousDistribution.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/AbstractContinousDistribution.java b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractContinousDistribution.java new file mode 100644 index 0000000..8ca03d0 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractContinousDistribution.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +/** + * Abstract base class for all continuous distributions. Continuous distributions have + * probability density and a cumulative distribution functions. + * + */ +public abstract class AbstractContinousDistribution extends AbstractDistribution { + public double cdf(double x) { + throw new UnsupportedOperationException("Can't compute pdf for " + this.getClass().getName()); + } + + public double pdf(double x) { + throw new UnsupportedOperationException("Can't compute pdf for " + this.getClass().getName()); + } + + /** + * @return A random number from the distribution; returns <tt>(int) Math.round(nextDouble())</tt>. + * Override this method if necessary. + */ + @Override + public int nextInt() { + return (int) Math.round(nextDouble()); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDiscreteDistribution.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDiscreteDistribution.java b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDiscreteDistribution.java new file mode 100644 index 0000000..d93d76c --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDiscreteDistribution.java @@ -0,0 +1,27 @@ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +/** + * Abstract base class for all discrete distributions. + * + */ +public abstract class AbstractDiscreteDistribution extends AbstractDistribution { + + /** Makes this class non instantiable, but still let's others inherit from it. */ + protected AbstractDiscreteDistribution() { + } + + /** Returns a random number from the distribution; returns <tt>(double) nextInt()</tt>. */ + @Override + public double nextDouble() { + return nextInt(); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDistribution.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDistribution.java b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDistribution.java new file mode 100644 index 0000000..8e9cb0e --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/AbstractDistribution.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import java.util.Random; + +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.IntFunction; + +public abstract class AbstractDistribution extends DoubleFunction implements IntFunction { + + private Random randomGenerator; + + /** Makes this class non instantiable, but still let's others inherit from it. */ + protected AbstractDistribution() { + } + + protected Random getRandomGenerator() { + return randomGenerator; + } + + protected double randomDouble() { + return randomGenerator.nextDouble(); + } + + /** + * Equivalent to <tt>nextDouble()</tt>. This has the effect that distributions can now be used as function objects, + * returning a random number upon function evaluation. + */ + @Override + public double apply(double dummy) { + return nextDouble(); + } + + /** + * Equivalent to <tt>nextInt()</tt>. This has the effect that distributions can now be used as function objects, + * returning a random number upon function evaluation. + */ + @Override + public int apply(int dummy) { + return nextInt(); + } + + /** + * Returns a random number from the distribution. + * @return A new sample from this distribution. + */ + public abstract double nextDouble(); + + /** + * @return + * A random number from the distribution; returns <tt>(int) Math.round(nextDouble())</tt>. Override this + * method if necessary. + */ + public abstract int nextInt(); + + /** + * Sets the uniform random generator internally used. + * @param randomGenerator the new PRNG + */ + public void setRandomGenerator(Random randomGenerator) { + this.randomGenerator = randomGenerator; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/Exponential.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/Exponential.java b/core/src/main/java/org/apache/mahout/math/jet/random/Exponential.java new file mode 100644 index 0000000..06472c2 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/Exponential.java @@ -0,0 +1,81 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import java.util.Locale; +import java.util.Random; + +public class Exponential extends AbstractContinousDistribution { + // rate parameter for the distribution. Mean is 1/lambda. + private double lambda; + + /** + * Provides a negative exponential distribution given a rate parameter lambda and an underlying + * random number generator. The mean of this distribution will be equal to 1/lambda. + * + * @param lambda The rate parameter of the distribution. + * @param randomGenerator The PRNG that is used to generate values. + */ + public Exponential(double lambda, Random randomGenerator) { + setRandomGenerator(randomGenerator); + this.lambda = lambda; + } + + /** + * Returns the cumulative distribution function. + * @param x The point at which the cumulative distribution function is to be evaluated. + * @return Returns the integral from -infinity to x of the PDF, also known as the cumulative distribution + * function. + */ + @Override + public double cdf(double x) { + if (x <= 0.0) { + return 0.0; + } + return 1.0 - Math.exp(-x * lambda); + } + + /** + * Returns a random number from the distribution. + */ + @Override + public double nextDouble() { + return -Math.log1p(-randomDouble()) / lambda; + } + + /** + * Returns the value of the probability density function at a particular point. + * @param x The point at which the probability density function is to be evaluated. + * @return The value of the probability density function at the specified point. + */ + @Override + public double pdf(double x) { + if (x < 0.0) { + return 0.0; + } + return lambda * Math.exp(-x * lambda); + } + + /** + * Sets the rate parameter. + * @param lambda The new value of the rate parameter. + */ + public void setState(double lambda) { + this.lambda = lambda; + } + + /** + * Returns a String representation of the receiver. + */ + @Override + public String toString() { + return String.format(Locale.ENGLISH, "%s(%.4f)", this.getClass().getName(), lambda); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/Gamma.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/Gamma.java b/core/src/main/java/org/apache/mahout/math/jet/random/Gamma.java new file mode 100644 index 0000000..914157b --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/Gamma.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import org.apache.mahout.math.jet.stat.Probability; + +import java.util.Random; + +public class Gamma extends AbstractContinousDistribution { + // shape + private final double alpha; + + // rate + private final double rate; + + /** + * Constructs a Gamma distribution with a given shape (alpha) and rate (beta). + * + * @param alpha The shape parameter. + * @param rate The rate parameter. + * @param randomGenerator The random number generator that generates bits for us. + * @throws IllegalArgumentException if <tt>alpha <= 0.0 || alpha <= 0.0</tt>. + */ + public Gamma(double alpha, double rate, Random randomGenerator) { + this.alpha = alpha; + this.rate = rate; + setRandomGenerator(randomGenerator); + } + + /** + * Returns the cumulative distribution function. + * @param x The end-point where the cumulation should end. + */ + @Override + public double cdf(double x) { + return Probability.gamma(alpha, rate, x); + } + + /** Returns a random number from the distribution. */ + @Override + public double nextDouble() { + return nextDouble(alpha, rate); + } + + /** Returns a random number from the distribution; bypasses the internal state. + * * + * Gamma Distribution - Acceptance Rejection combined with * + * Acceptance Complement * + * * + ****************************************************************** + * * + * FUNCTION: - gds samples a random number from the standard * + * gamma distribution with parameter a > 0. * + * Acceptance Rejection gs for a < 1 , * + * Acceptance Complement gd for a >= 1 . * + * REFERENCES: - J.H. Ahrens, U. Dieter (1974): Computer methods * + * for sampling from gamma, beta, Poisson and * + * binomial distributions, Computing 12, 223-246. * + * - J.H. Ahrens, U. Dieter (1982): Generating gamma * + * variates by a modified rejection technique, * + * Communications of the ACM 25, 47-54. * + * SUBPROGRAMS: - drand(seed) ... (0,1)-Uniform generator with * + * unsigned long integer *seed * + * - NORMAL(seed) ... Normal generator N(0,1). * + * * + * @param alpha Shape parameter. + * @param rate Rate parameter (=1/scale). + * @return A gamma distributed sample. + */ + public double nextDouble(double alpha, double rate) { + if (alpha <= 0.0) { + throw new IllegalArgumentException(); + } + if (rate <= 0.0) { + throw new IllegalArgumentException(); + } + + double gds; + double b = 0.0; + if (alpha < 1.0) { // CASE A: Acceptance rejection algorithm gs + b = 1.0 + 0.36788794412 * alpha; // Step 1 + while (true) { + double p = b * randomDouble(); + if (p <= 1.0) { // Step 2. Case gds <= 1 + gds = Math.exp(Math.log(p) / alpha); + if (Math.log(randomDouble()) <= -gds) { + return gds / rate; + } + } else { // Step 3. Case gds > 1 + gds = -Math.log((b - p) / alpha); + if (Math.log(randomDouble()) <= (alpha - 1.0) * Math.log(gds)) { + return gds / rate; + } + } + } + } else { // CASE B: Acceptance complement algorithm gd (gaussian distribution, box muller transformation) + double ss = 0.0; + double s = 0.0; + double d = 0.0; + if (alpha != -1.0) { // Step 1. Preparations + ss = alpha - 0.5; + s = Math.sqrt(ss); + d = 5.656854249 - 12.0 * s; + } + // Step 2. Normal deviate + double v12; + double v1; + do { + v1 = 2.0 * randomDouble() - 1.0; + double v2 = 2.0 * randomDouble() - 1.0; + v12 = v1 * v1 + v2 * v2; + } while (v12 > 1.0); + double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12); + double x = s + 0.5 * t; + gds = x * x; + if (t >= 0.0) { + return gds / rate; + } // Immediate acceptance + + double u = randomDouble(); + if (d * u <= t * t * t) { + return gds / rate; + } // Squeeze acceptance + + double q0 = 0.0; + double si = 0.0; + double c = 0.0; + if (alpha != -1.0) { // Step 4. Set-up for hat case + double r = 1.0 / alpha; + double q9 = 0.0001710320; + double q8 = -0.0004701849; + double q7 = 0.0006053049; + double q6 = 0.0003340332; + double q5 = -0.0003349403; + double q4 = 0.0015746717; + double q3 = 0.0079849875; + double q2 = 0.0208333723; + double q1 = 0.0416666664; + q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r; + if (alpha > 3.686) { + if (alpha > 13.022) { + b = 1.77; + si = 0.75; + c = 0.1515 / s; + } else { + b = 1.654 + 0.0076 * ss; + si = 1.68 / s + 0.275; + c = 0.062 / s + 0.024; + } + } else { + b = 0.463 + s - 0.178 * ss; + si = 1.235; + c = 0.195 / s - 0.079 + 0.016 * s; + } + } + double v; + double q; + double a9 = 0.104089866; + double a8 = -0.112750886; + double a7 = 0.110368310; + double a6 = -0.124385581; + double a5 = 0.142873973; + double a4 = -0.166677482; + double a3 = 0.199999867; + double a2 = -0.249999949; + double a1 = 0.333333333; + if (x > 0.0) { // Step 5. Calculation of q + v = t / (s + s); // Step 6. + if (Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log1p(v); + } else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) + * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } // Step 7. Quotient acceptance + if (Math.log1p(-u) <= q) { + return gds / rate; + } + } + + double e7 = 0.000247453; + double e6 = 0.001353826; + double e5 = 0.008345522; + double e4 = 0.041664508; + double e3 = 0.166666848; + double e2 = 0.499999994; + double e1 = 1.000000000; + while (true) { // Step 8. Double exponential deviate t + double sign_u; + double e; + do { + e = -Math.log(randomDouble()); + u = randomDouble(); + u = u + u - 1.0; + sign_u = u > 0 ? 1.0 : -1.0; + t = b + e * si * sign_u; + } while (t <= -0.71874483771719); // Step 9. Rejection of t + v = t / (s + s); // Step 10. New q(t) + if (Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log1p(v); + } else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) + * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } + if (q <= 0.0) { + continue; + } // Step 11. + double w; + if (q > 0.5) { + w = Math.exp(q) - 1.0; + } else { + w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q; + } // Step 12. Hat acceptance + if (c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) { + x = s + 0.5 * t; + return x * x / rate; + } + } + } + } + + /** Returns the probability distribution function. + * @param x Where to compute the density function. + * + * @return The value of the gamma density at x. + */ + @Override + public double pdf(double x) { + if (x < 0) { + throw new IllegalArgumentException(); + } + if (x == 0) { + if (alpha == 1.0) { + return rate; + } else if (alpha < 1) { + return Double.POSITIVE_INFINITY; + } else { + return 0; + } + } + if (alpha == 1.0) { + return rate * Math.exp(-x * rate); + } + return rate * Math.exp((alpha - 1.0) * Math.log(x * rate) - x * rate - logGamma(alpha)); + } + + @Override + public String toString() { + return this.getClass().getName() + '(' + rate + ',' + alpha + ')'; + } + + /** Returns a quick approximation of <tt>log(gamma(x))</tt>. */ + public static double logGamma(double x) { + + if (x <= 0.0 /* || x > 1.3e19 */) { + return -999; + } + + double z; + for (z = 1.0; x < 11.0; x++) { + z *= x; + } + + double r = 1.0 / (x * x); + double c6 = -1.9175269175269175e-03; + double c5 = 8.4175084175084175e-04; + double c4 = -5.9523809523809524e-04; + double c3 = 7.9365079365079365e-04; + double c2 = -2.7777777777777777e-03; + double c1 = 8.3333333333333333e-02; + double g = c1 + r * (c2 + r * (c3 + r * (c4 + r * (c5 + r + c6)))); + double c0 = 9.1893853320467274e-01; + g = (x - 0.5) * Math.log(x) - x + c0 + g / x; + if (z == 1.0) { + return g; + } + return g - Math.log(z); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/NegativeBinomial.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/NegativeBinomial.java b/core/src/main/java/org/apache/mahout/math/jet/random/NegativeBinomial.java new file mode 100644 index 0000000..1e577eb --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/NegativeBinomial.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import org.apache.mahout.math.jet.math.Arithmetic; +import org.apache.mahout.math.jet.stat.Probability; + +import java.util.Random; + +/** Mostly deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ +public final class NegativeBinomial extends AbstractDiscreteDistribution { + + private final int r; + private final double p; + + private final Gamma gamma; + private final Poisson poisson; + + /** + * Constructs a Negative Binomial distribution which describes the probability of getting + * a particular number of negative trials (k) before getting a fixed number of positive + * trials (r) where each positive trial has probability (p) of being successful. + * + * @param r the required number of positive trials. + * @param p the probability of success. + * @param randomGenerator a uniform random number generator. + */ + public NegativeBinomial(int r, double p, Random randomGenerator) { + setRandomGenerator(randomGenerator); + this.r = r; + this.p = p; + this.gamma = new Gamma(r, 1, randomGenerator); + this.poisson = new Poisson(0.0, randomGenerator); + } + + /** + * Returns the cumulative distribution function. + */ + public double cdf(int k) { + return Probability.negativeBinomial(k, r, p); + } + + /** + * Returns the probability distribution function. + */ + public double pdf(int k) { + return Arithmetic.binomial(k + r - 1, r - 1) * Math.pow(p, r) * Math.pow(1.0 - p, k); + } + + @Override + public int nextInt() { + return nextInt(r, p); + } + + /** + * Returns a sample from this distribution. The value returned will + * be the number of negative samples required before achieving r + * positive samples. Each successive sample is taken independently + * from a Bernouli process with probability p of success. + * + * The algorithm used is taken from J.H. Ahrens, U. Dieter (1974): + * Computer methods for sampling from gamma, beta, Poisson and + * binomial distributions, Computing 12, 223--246. + * + * This algorithm is essentially the same as described at + * http://en.wikipedia.org/wiki/Negative_binomial_distribution#Gamma.E2.80.93Poisson_mixture + * except that the notion of positive and negative outcomes is uniformly + * inverted. Because the inversion is complete and consistent, this + * definition is effectively identical to that defined on wikipedia. + */ + public int nextInt(int r, double p) { + return this.poisson.nextInt(gamma.nextDouble(r, p / (1.0 - p))); + } + + /** + * Returns a String representation of the receiver. + */ + @Override + public String toString() { + return this.getClass().getName() + '(' + r + ',' + p + ')'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/Normal.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/Normal.java b/core/src/main/java/org/apache/mahout/math/jet/random/Normal.java new file mode 100644 index 0000000..7ceac22 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/Normal.java @@ -0,0 +1,110 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import org.apache.mahout.math.jet.stat.Probability; + +import java.util.Locale; +import java.util.Random; + +/** + * Implements a normal distribution specified mean and standard deviation. + */ +public class Normal extends AbstractContinousDistribution { + + private double mean; + private double variance; + private double standardDeviation; + + private double cache; // cache for Box-Mueller algorithm + private boolean cacheFilled; // Box-Mueller + + private double normalizer; // performance cache + + /** + * @param mean The mean of the resulting distribution. + * @param standardDeviation The standard deviation of the distribution. + * @param randomGenerator The random number generator to use. This can be null if you don't + * need to generate any numbers. + */ + public Normal(double mean, double standardDeviation, Random randomGenerator) { + setRandomGenerator(randomGenerator); + setState(mean, standardDeviation); + } + + /** + * Returns the cumulative distribution function. + */ + @Override + public double cdf(double x) { + return Probability.normal(mean, variance, x); + } + + /** Returns the probability density function. */ + @Override + public double pdf(double x) { + double diff = x - mean; + return normalizer * Math.exp(-(diff * diff) / (2.0 * variance)); + } + + /** + * Returns a random number from the distribution. + */ + @Override + public double nextDouble() { + // Uses polar Box-Muller transformation. + if (cacheFilled) { + cacheFilled = false; + return cache; + } + + double x; + double y; + double r; + do { + x = 2.0 * randomDouble() - 1.0; + y = 2.0 * randomDouble() - 1.0; + r = x * x + y * y; + } while (r >= 1.0); + + double z = Math.sqrt(-2.0 * Math.log(r) / r); + cache = this.mean + this.standardDeviation * x * z; + cacheFilled = true; + return this.mean + this.standardDeviation * y * z; + } + + /** Sets the uniform random generator internally used. */ + @Override + public final void setRandomGenerator(Random randomGenerator) { + super.setRandomGenerator(randomGenerator); + this.cacheFilled = false; + } + + /** + * Sets the mean and variance. + * @param mean The new value for the mean. + * @param standardDeviation The new value for the standard deviation. + */ + public final void setState(double mean, double standardDeviation) { + if (mean != this.mean || standardDeviation != this.standardDeviation) { + this.mean = mean; + this.standardDeviation = standardDeviation; + this.variance = standardDeviation * standardDeviation; + this.cacheFilled = false; + + this.normalizer = 1.0 / Math.sqrt(2.0 * Math.PI * variance); + } + } + + /** Returns a String representation of the receiver. */ + @Override + public String toString() { + return String.format(Locale.ENGLISH, "%s(m=%f, sd=%f)", this.getClass().getName(), mean, standardDeviation); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/Poisson.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/Poisson.java b/core/src/main/java/org/apache/mahout/math/jet/random/Poisson.java new file mode 100644 index 0000000..497691e --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/Poisson.java @@ -0,0 +1,296 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import org.apache.mahout.math.jet.math.Arithmetic; + +import java.util.Random; + +/** Partially deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ +public final class Poisson extends AbstractDiscreteDistribution { + + private final double mean; + + // precomputed and cached values (for performance only) + // cache for < SWITCH_MEAN + private double myOld = -1.0; + private double p; + private double q; + private double p0; + private final double[] pp = new double[36]; + private int llll; + + // cache for >= SWITCH_MEAN + private double myLast = -1.0; + private double ll; + private int k2; + private int k4; + private int k1; + private int k5; + private double dl; + private double dr; + private double r1; + private double r2; + private double r4; + private double r5; + private double lr; + private double lMy; + private double cPm; + private double f1; + private double f2; + private double f4; + private double f5; + private double p1; + private double p2; + private double p3; + private double p4; + private double p5; + private double p6; + + // cache for both; + + + private static final double MEAN_MAX = Integer.MAX_VALUE; + // for all means larger than that, we don't try to compute a poisson deviation, but return the mean. + private static final double SWITCH_MEAN = 10.0; // switch from method A to method B + + + /** Constructs a poisson distribution. Example: mean=1.0. */ + public Poisson(double mean, Random randomGenerator) { + setRandomGenerator(randomGenerator); + this.mean = mean; + } + + private static double f(int k, double lNu, double cPm) { + return Math.exp(k * lNu - Arithmetic.logFactorial(k) - cPm); + } + + @Override + public int nextInt() { + return nextInt(mean); + } + + /** Returns a random number from the distribution; bypasses the internal state. */ + public int nextInt(double theMean) { + /****************************************************************** + * * + * Poisson Distribution - Patchwork Rejection/Inversion * + * * + ****************************************************************** + * * + * For parameter my < 10 Tabulated Inversion is applied. * + * For my >= 10 Patchwork Rejection is employed: * + * The area below the histogram function f(x) is rearranged in * + * its body by certain point reflections. Within a large center * + * interval variates are sampled efficiently by rejection from * + * uniform hats. Rectangular immediate acceptance regions speed * + * up the generation. The remaining tails are covered by * + * exponential functions. * + * * + *****************************************************************/ + Random gen = getRandomGenerator(); + + //double t, g, my_k; + + //double gx, gy, px, py, e, x, xx, delta, v; + //int sign; + + //static double p,q,p0,pp[36]; + //static long ll,m; + + int m; + if (theMean < SWITCH_MEAN) { // CASE B: Inversion- start new table and calculate p0 + if (theMean != myOld) { + myOld = theMean; + llll = 0; + p = Math.exp(-theMean); + q = p; + p0 = p; + //for (k=pp.length; --k >=0;) pp[k] = 0; + } + m = theMean > 1.0 ? (int) theMean : 1; + while (true) { + double u = gen.nextDouble(); + int k = 0; + if (u <= p0) { + return k; + } + if (llll != 0) { // Step T. Table comparison + int i = u > 0.458 ? Math.min(llll, m) : 1; + for (k = i; k <= llll; k++) { + if (u <= pp[k]) { + return k; + } + } + if (llll == 35) { + continue; + } + } + for (k = llll + 1; k <= 35; k++) { // Step C. Creation of new prob. + p *= theMean / k; + q += p; + pp[k] = q; + if (u <= q) { + llll = k; + return k; + } + } + llll = 35; + } + // end my < SWITCH_MEAN + } else if (theMean < MEAN_MAX) { // CASE A: acceptance complement + //static double my_last = -1.0; + //static long int m, k2, k4, k1, k5; + //static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm, + // f1, f2, f4, f5, p1, p2, p3, p4, p5, p6; + + m = (int) theMean; + if (theMean != myLast) { // set-up + myLast = theMean; + + // approximate deviation of reflection points k2, k4 from my - 1/2 + double Ds = Math.sqrt(theMean + 0.25); + + // mode m, reflection points k2 and k4, and points k1 and k5, which + // delimit the centre region of h(x) + k2 = (int) Math.ceil(theMean - 0.5 - Ds); + k4 = (int) (theMean - 0.5 + Ds); + k1 = k2 + k2 - m + 1; + k5 = k4 + k4 - m; + + // range width of the critical left and right centre region + dl = k2 - k1; + dr = k5 - k4; + + // recurrence constants r(k) = p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 + r1 = theMean / k1; + r2 = theMean / k2; + r4 = theMean / (k4 + 1); + r5 = theMean / (k5 + 1); + + // reciprocal values of the scale parameters of expon. tail envelopes + ll = Math.log(r1); // expon. tail left + lr = -Math.log(r5); // expon. tail right + + // Poisson constants, necessary for computing function values f(k) + lMy = Math.log(theMean); + cPm = m * lMy - Arithmetic.logFactorial(m); + + // function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5 + f2 = f(k2, lMy, cPm); + f4 = f(k4, lMy, cPm); + f1 = f(k1, lMy, cPm); + f5 = f(k5, lMy, cPm); + + // area of the two centre and the two exponential tail regions + // area of the two immediate acceptance regions between k2, k4 + p1 = f2 * (dl + 1.0); // immed. left + p2 = f2 * dl + p1; // centre left + p3 = f4 * (dr + 1.0) + p2; // immed. right + p4 = f4 * dr + p3; // centre right + p5 = f1 / ll + p4; // expon. tail left + p6 = f5 / lr + p5; // expon. tail right + } // end set-up + + while (true) { + // generate uniform number U -- U(0, p6) + // case distinction corresponding to U + double W; + double V; + double U; + int Y; + int X; + int Dk; + if ((U = gen.nextDouble() * p6) < p2) { // centre left + + // immediate acceptance region R2 = [k2, m) *[0, f2), X = k2, ... m -1 + if ((V = U - p1) < 0.0) { + return k2 + (int) (U / f2); + } + // immediate acceptance region R1 = [k1, k2)*[0, f1), X = k1, ... k2-1 + if ((W = V / dl) < f1) { + return k1 + (int) (V / f1); + } + + // computation of candidate X < k2, and its counterpart Y > k2 + // either squeeze-acceptance of X or acceptance-rejection of Y + Dk = gen.nextInt((int) dl) + 1; + if (W <= f2 - Dk * (f2 - f2 / r2)) { // quick accept of + return k2 - Dk; // X = k2 - Dk + } + if ((V = f2 + f2 - W) < 1.0) { // quick reject of Y + Y = k2 + Dk; + if (V <= f2 + Dk * (1.0 - f2) / (dl + 1.0)) { // quick accept of + return Y; // Y = k2 + Dk + } + if (V <= f(Y, lMy, cPm)) { + return Y; + } // final accept of Y + } + X = k2 - Dk; + } else if (U < p4) { // centre right + // immediate acceptance region R3 = [m, k4+1)*[0, f4), X = m, ... k4 + if ((V = U - p3) < 0.0) { + return k4 - (int) ((U - p2) / f4); + } + // immediate acceptance region R4 = [k4+1, k5+1)*[0, f5) + if ((W = V / dr) < f5) { + return k5 - (int) (V / f5); + } + + // computation of candidate X > k4, and its counterpart Y < k4 + // either squeeze-acceptance of X or acceptance-rejection of Y + Dk = gen.nextInt((int) dr) + 1; + if (W <= f4 - Dk * (f4 - f4 * r4)) { // quick accept of + return k4 + Dk; // X = k4 + Dk + } + if ((V = f4 + f4 - W) < 1.0) { // quick reject of Y + Y = k4 - Dk; + if (V <= f4 + Dk * (1.0 - f4) / dr) { // quick accept of + return Y; // Y = k4 - Dk + } + if (V <= f(Y, lMy, cPm)) { + return Y; + } // final accept of Y + } + X = k4 + Dk; + } else { + W = gen.nextDouble(); + if (U < p5) { // expon. tail left + Dk = (int) (1.0 - Math.log(W) / ll); + if ((X = k1 - Dk) < 0) { + continue; + } // 0 <= X <= k1 - 1 + W *= (U - p4) * ll; // W -- U(0, h(x)) + if (W <= f1 - Dk * (f1 - f1 / r1)) { + return X; + } // quick accept of X + } else { // expon. tail right + Dk = (int) (1.0 - Math.log(W) / lr); + X = k5 + Dk; // X >= k5 + 1 + W *= (U - p5) * lr; // W -- U(0, h(x)) + if (W <= f5 - Dk * (f5 - f5 * r5)) { + return X; + } // quick accept of X + } + } + + // acceptance-rejection test of candidate X from the original area + // test, whether W <= f(k), with W = U*h(x) and U -- U(0, 1) + // log f(X) = (X - m)*log(my) - log X! + log m! + if (Math.log(W) <= X * lMy - Arithmetic.logFactorial(X) - cPm) { + return X; + } + } + } else { // mean is too large + return (int) theMean; + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/Uniform.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/Uniform.java b/core/src/main/java/org/apache/mahout/math/jet/random/Uniform.java new file mode 100644 index 0000000..32c8b90 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/Uniform.java @@ -0,0 +1,164 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random; + +import org.apache.mahout.common.RandomUtils; + +import java.util.Random; + +public class Uniform extends AbstractContinousDistribution { + + private double min; + private double max; + + /** + * Constructs a uniform distribution with the given minimum and maximum, using a {@link + * org.apache.mahout.math.jet.random.engine.MersenneTwister} seeded with the given seed. + */ + public Uniform(double min, double max, int seed) { + this(min, max, RandomUtils.getRandom(seed)); + } + + /** Constructs a uniform distribution with the given minimum and maximum. */ + public Uniform(double min, double max, Random randomGenerator) { + setRandomGenerator(randomGenerator); + setState(min, max); + } + + /** Constructs a uniform distribution with <tt>min=0.0</tt> and <tt>max=1.0</tt>. */ + public Uniform(Random randomGenerator) { + this(0, 1, randomGenerator); + } + + /** Returns the cumulative distribution function (assuming a continous uniform distribution). */ + @Override + public double cdf(double x) { + if (x <= min) { + return 0.0; + } + if (x >= max) { + return 1.0; + } + return (x - min) / (max - min); + } + + /** Returns a uniformly distributed random <tt>boolean</tt>. */ + public boolean nextBoolean() { + return randomDouble() > 0.5; + } + + /** + * Returns a uniformly distributed random number in the open interval <tt>(min,max)</tt> (excluding <tt>min</tt> and + * <tt>max</tt>). + */ + @Override + public double nextDouble() { + return min + (max - min) * randomDouble(); + } + + /** + * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> (excluding <tt>from</tt> and + * <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. + */ + public double nextDoubleFromTo(double from, double to) { + return from + (to - from) * randomDouble(); + } + + /** + * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> (excluding <tt>from</tt> and + * <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. + */ + public float nextFloatFromTo(float from, float to) { + return (float) nextDoubleFromTo(from, to); + } + + /** + * Returns a uniformly distributed random number in the closed interval + * <tt>[from,to]</tt> (including <tt>from</tt> + * and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. + */ + public int nextIntFromTo(int from, int to) { + return (int) (from + (long) ((1L + to - from) * randomDouble())); + } + + /** + * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt> (including <tt>from</tt> + * and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. + */ + public long nextLongFromTo(long from, long to) { + /* Doing the thing turns out to be more tricky than expected. + avoids overflows and underflows. + treats cases like from=-1, to=1 and the like right. + the following code would NOT solve the problem: return (long) (Doubles.randomFromTo(from,to)); + + rounding avoids the unsymmetric behaviour of casts from double to long: (long) -0.7 = 0, (long) 0.7 = 0. + checking for overflows and underflows is also necessary. + */ + + // first the most likely and also the fastest case. + if (from >= 0 && to < Long.MAX_VALUE) { + return from + (long) nextDoubleFromTo(0.0, to - from + 1); + } + + // would we get a numeric overflow? + // if not, we can still handle the case rather efficient. + double diff = (double) to - (double) from + 1.0; + if (diff <= Long.MAX_VALUE) { + return from + (long) nextDoubleFromTo(0.0, diff); + } + + // now the pathologic boundary cases. + // they are handled rather slow. + long random; + if (from == Long.MIN_VALUE) { + if (to == Long.MAX_VALUE) { + //return Math.round(nextDoubleFromTo(from,to)); + int i1 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE); + int i2 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE); + return ((i1 & 0xFFFFFFFFL) << 32) | (i2 & 0xFFFFFFFFL); + } + random = Math.round(nextDoubleFromTo(Long.MIN_VALUE, to + 1)); + if (random > to) { + random = Long.MIN_VALUE; + } + } else { + random = Math.round(nextDoubleFromTo(from - 1, to)); + if (random < from) { + random = to; + } + } + return random; + } + + /** Returns the probability distribution function (assuming a continous uniform distribution). */ + @Override + public double pdf(double x) { + if (x <= min || x >= max) { + return 0.0; + } + return 1.0 / (max - min); + } + + /** Sets the internal state. */ + public void setState(double min, double max) { + if (max < min) { + setState(max, min); + return; + } + this.min = min; + this.max = max; + } + + + /** Returns a String representation of the receiver. */ + @Override + public String toString() { + return this.getClass().getName() + '(' + min + ',' + max + ')'; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java b/core/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java new file mode 100644 index 0000000..8bca895 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ + +package org.apache.mahout.math.jet.random.engine; + +import java.util.Date; +/** + MersenneTwister (MT19937) is one of the strongest uniform pseudo-random number generators + known so far; at the same time it is quick. + Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals + <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively, + as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt> + and <tt>(0.0,1.0)</tt>, respectively. + The seed can be any 32-bit integer except <tt>0</tt>. Shawn J. Cokus commented that perhaps the + seed should preferably be odd. + <p> + <b>Quality:</b> MersenneTwister is designed to pass the k-distribution test. It has an + astronomically large period of 2<sup>19937</sup>-1 (=10<sup>6001</sup>) and 623-dimensional + equidistribution up to 32-bit accuracy. + It passes many stringent statistical tests, including the + <A HREF="http://stat.fsu.edu/~geo/diehard.html">diehard</A> test of G. Marsaglia + and the load test of P. Hellekalek and S. Wegenkittl. + <p> + <b>Performance:</b> Its speed is comparable to other modern generators (in particular, + as fast as <tt>java.util.Random.nextFloat()</tt>). + 2.5 million calls to <tt>raw()</tt> per second (Pentium Pro 200 Mhz, JDK 1.2, NT). + Be aware, however, that there is a non-negligible amount of overhead required to initialize the data + structures used by a MersenneTwister. Code like + {@code + double sum = 0.0; + for (int i=0; i<100000; ++i) { + RandomElement twister = new MersenneTwister(new Date()); + sum += twister.raw(); + } + } + will be wildly inefficient. Consider using + {@code + double sum = 0.0; + RandomElement twister = new MersenneTwister(new Date()); + for (int i=0; i<100000; ++i) { + sum += twister.raw(); + } + } + instead. This allows the cost of constructing the MersenneTwister object + to be borne only once, rather than once for each iteration in the loop. + <p> + <b>Implementation:</b> After M. Matsumoto and T. Nishimura, + "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number Generator", + ACM Transactions on Modeling and Computer Simulation, + Vol. 8, No. 1, January 1998, pp 3--30. + <dl> + <dt>More info on <a HREF="http://www.math.keio.ac.jp/~matumoto/eindex.html"> Masumoto's homepage</a>.</dt> + <dt>More info on <a HREF="http://www.ncsa.uiuc.edu/Apps/CMP/RNG/www-rng.html"> Pseudo-random number + generators is on the Web</a>.</dt> + <dt>Yet <a HREF="http://nhse.npac.syr.edu/random"> some more info</a>.</dt> + <p> + The correctness of this implementation has been verified against the published output sequence + <a href="http://www.math.keio.ac.jp/~nisimura/random/real2/mt19937-2.out">mt19937-2.out</a> of the C-implementation + <a href="http://www.math.keio.ac.jp/~nisimura/random/real2/mt19937-2.c">mt19937-2.c</a>. + (Call <tt>test(1000)</tt> to print the sequence). + <dt> + Note that this implementation is <b>not synchronized</b>.</dt> + </dl> + <p> + <b>Details:</b> MersenneTwister is designed with consideration of the flaws of various existing generators in mind. + It is an improved version of TT800, a very successful generator. + MersenneTwister is based on linear recurrences modulo 2. + Such generators are very fast, have extremely long periods, and appear quite robust. + MersenneTwister produces 32-bit numbers, and every <tt>k</tt>-dimensional vector of such + numbers appears the same number of times as <tt>k</tt> successive values over the + period length, for each <tt>k <= 623</tt> (except for the zero vector, which appears one time less). + If one looks at only the first <tt>n <= 16</tt> bits of each number, then the property holds + for even larger <tt>k</tt>, as shown in the following table (taken from the publication cited above): + <table width="75%" border="1" cellspacing="0" cellpadding="0" summary="property table"> + <tr> + <td width="2%" align="center"> <div>n</div> </td> + <td width="6%" align="center"> <div>1</div> </td> + <td width="5%" align="center"> <div>2</div> </td> + <td width="5%" align="center"> <div>3</div> </td> + <td width="5%" align="center"> <div>4</div> </td> + <td width="5%" align="center"> <div>5</div> </td> + <td width="5%" align="center"> <div>6</div> </td> + <td width="5%" align="center"> <div>7</div> </td> + <td width="5%" align="center"> <div>8</div> </td> + <td width="5%" align="center"> <div>9</div> </td> + <td width="5%" align="center"> <div>10</div> </td> + <td width="5%" align="center"> <div>11</div> </td> + <td width="10%" align="center"> <div>12 .. 16</div> </td> + <td width="10%" align="center"> <div>17 .. 32</div> </td> + </tr> + <tr> + <td width="2%" align="center"> <div>k</div> </td> + <td width="6%" align="center"> <div>19937</div> </td> + <td width="5%" align="center"> <div>9968</div> </td> + <td width="5%" align="center"> <div>6240</div> </td> + <td width="5%" align="center"> <div>4984</div> </td> + <td width="5%" align="center"> <div>3738</div> </td> + <td width="5%" align="center"> <div>3115</div> </td> + <td width="5%" align="center"> <div>2493</div> </td> + <td width="5%" align="center"> <div>2492</div> </td> + <td width="5%" align="center"> <div>1869</div> </td> + <td width="5%" align="center"> <div>1869</div> </td> + <td width="5%" align="center"> <div>1248</div> </td> + <td width="10%" align="center"> <div>1246</div> </td> + <td width="10%" align="center"> <div>623</div> </td> + </tr> + </table> + <p> + MersenneTwister generates random numbers in batches of 624 numbers at a time, so + the caching and pipelining of modern systems is exploited. + The generator is implemented to generate the output by using the fastest arithmetic + operations only: 32-bit additions and bit operations (no division, no multiplication, no mod). + These operations generate sequences of 32 random bits (<tt>int</tt>'s). + <tt>long</tt>'s are formed by concatenating two 32 bit <tt>int</tt>'s. + <tt>float</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>32</sup> + sub intervals, then randomly choosing one subinterval. + <tt>double</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>64</sup> + sub intervals, then randomly choosing one subinterval. + <p> + @author [email protected] + @version 1.0, 09/24/99 + @see java.util.Random + */ +public final class MersenneTwister extends RandomEngine { + + /* Period parameters */ + private static final int N = 624; + private static final int M = 397; + private static final int MATRIX_A = 0x9908b0df; /* constant vector a */ + private static final int UPPER_MASK = 0x80000000; /* most significant w-r bits */ + private static final int LOWER_MASK = 0x7fffffff; /* least significant r bits */ + + /* for tempering */ + private static final int TEMPERING_MASK_B = 0x9d2c5680; + private static final int TEMPERING_MASK_C = 0xefc60000; + + private static final int MAG0 = 0x0; + private static final int MAG1 = MATRIX_A; + //private static final int[] mag01=new int[] {0x0, MATRIX_A}; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + private static final int DEFAULT_SEED = 4357; + + private int mti; + private final int[] mt = new int[N]; /* set initial seeds: N = 624 words */ + + /** + * Constructs and returns a random number generator with a default seed, which is a <b>constant</b>. Thus using this + * constructor will yield generators that always produce exactly the same sequence. This method is mainly intended to + * ease testing and debugging. + */ + public MersenneTwister() { + this(DEFAULT_SEED); + } + + /** Constructs and returns a random number generator with the given seed. + * @param seed A number that is used to initialize the internal state of the generator. + */ + public MersenneTwister(int seed) { + setSeed(seed); + } + + /** + * Constructs and returns a random number generator seeded with the given date. + * + * @param d typically <tt>new Date()</tt> + */ + public MersenneTwister(Date d) { + this((int) d.getTime()); + } + + /** Generates N words at one time */ + void nextBlock() { + int y; + int kk; + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1); + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1); + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1); + + this.mti = 0; + } + + /** + * Returns a 32 bit uniformly distributed random number in the closed interval + * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> + * (including <tt>Integer.MIN_VALUE</tt> and <tt>Integer.MAX_VALUE</tt>). + */ + @Override + public int nextInt() { + /* Each single bit including the sign bit will be random */ + if (mti == N) { + nextBlock(); + } // generate N ints at one time + + int y = mt[mti++]; + y ^= y >>> 11; // y ^= TEMPERING_SHIFT_U(y ); + y ^= (y << 7) & TEMPERING_MASK_B; // y ^= TEMPERING_SHIFT_S(y) & TEMPERING_MASK_B; + y ^= (y << 15) & TEMPERING_MASK_C; // y ^= TEMPERING_SHIFT_T(y) & TEMPERING_MASK_C; + // y &= 0xffffffff; //you may delete this line if word size = 32 + y ^= y >>> 18; // y ^= TEMPERING_SHIFT_L(y); + + return y; + } + + /** Sets the receiver's seed. This method resets the receiver's entire internal state. + * @param seed An integer that is used to reset the internal state of the generator */ + void setSeed(int seed) { + mt[0] = seed; + for (int i = 1; i < N; i++) { + mt[i] = 1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i; + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + //mt[i] &= 0xffffffff; + /* for >32 bit machines */ + } + //log.info("init done"); + mti = N; + } + + /** + * Sets the receiver's seed in a fashion compatible with the + * reference C implementation. See + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/980409/mt19937int.c + * + * This method isn't as good as the default method due to poor distribution of the + * resulting states. + * + * @param seed An integer that is used to reset the internal state in the same way as + * done in the 1999 reference implementation. Should only be used for testing, not + * actual coding. + */ + void setReferenceSeed(int seed) { + for (int i = 0; i < N; i++) { + mt[i] = seed & 0xffff0000; + seed = 69069 * seed + 1; + mt[i] |= (seed & 0xffff0000) >>> 16; + seed = 69069 * seed + 1; + } + //log.info("init done"); + mti = N; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/engine/RandomEngine.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/engine/RandomEngine.java b/core/src/main/java/org/apache/mahout/math/jet/random/engine/RandomEngine.java new file mode 100644 index 0000000..f832b1d --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/engine/RandomEngine.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random.engine; + +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.IntFunction; + +/** + * Abstract base class for uniform pseudo-random number generating engines. + * <p> + * Most probability distributions are obtained by using a <b>uniform</b> pseudo-random number generation engine + * followed by a transformation to the desired distribution. + * Thus, subclasses of this class are at the core of computational statistics, simulations, Monte Carlo methods, etc. + * <p> + * Subclasses produce uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals + * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively, + * as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt> and + * <tt>(0.0,1.0)</tt>, respectively. + * <p> + * Subclasses need to override one single method only: <tt>nextInt()</tt>. + * All other methods generating different data types or ranges are usually layered upon <tt>nextInt()</tt>. + * <tt>long</tt>'s are formed by concatenating two 32 bit <tt>int</tt>'s. + * <tt>float</tt>'s are formed by dividing the interval <tt>[0.0f,1.0f]</tt> into 2<sup>32</sup> sub intervals, + * then randomly choosing one subinterval. + * <tt>double</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>64</sup> sub intervals, + * then randomly choosing one subinterval. + * <p> + * Note that this implementation is <b>not synchronized</b>. + * + * @see MersenneTwister + * @see java.util.Random + */ +public abstract class RandomEngine extends DoubleFunction implements IntFunction { + + /** + * Equivalent to <tt>raw()</tt>. This has the effect that random engines can now be used as function objects, + * returning a random number upon function evaluation. + */ + @Override + public double apply(double dummy) { + return raw(); + } + + /** + * Equivalent to <tt>nextInt()</tt>. This has the effect that random engines can now be used as function objects, + * returning a random number upon function evaluation. + */ + @Override + public int apply(int dummy) { + return nextInt(); + } + + /** + * @return a 64 bit uniformly distributed random number in the open unit interval {@code (0.0,1.0)} (excluding + * 0.0 and 1.0). + */ + public double nextDouble() { + double nextDouble; + + do { + // -9.223372036854776E18 == (double) Long.MIN_VALUE + // 5.421010862427522E-20 == 1 / Math.pow(2,64) == 1 / ((double) Long.MAX_VALUE - (double) Long.MIN_VALUE); + nextDouble = (nextLong() - -9.223372036854776E18) * 5.421010862427522E-20; + } + // catch loss of precision of long --> double conversion + while (!(nextDouble > 0.0 && nextDouble < 1.0)); + + // --> in (0.0,1.0) + return nextDouble; + + /* + nextLong == Long.MAX_VALUE --> 1.0 + nextLong == Long.MIN_VALUE --> 0.0 + nextLong == Long.MAX_VALUE-1 --> 1.0 + nextLong == Long.MAX_VALUE-100000L --> 0.9999999999999946 + nextLong == Long.MIN_VALUE+1 --> 0.0 + nextLong == Long.MIN_VALUE-100000L --> 0.9999999999999946 + nextLong == 1L --> 0.5 + nextLong == -1L --> 0.5 + nextLong == 2L --> 0.5 + nextLong == -2L --> 0.5 + nextLong == 2L+100000L --> 0.5000000000000054 + nextLong == -2L-100000L --> 0.49999999999999456 + */ + } + + /** + * @return a 32 bit uniformly distributed random number in the open unit interval {@code (0.0f, 1.0f)} (excluding + * 0.0f and 1.0f). + */ + public float nextFloat() { + // catch loss of precision of double --> float conversion which could result in a value == 1.0F + float nextFloat; + do { + nextFloat = (float) raw(); + } + while (nextFloat >= 1.0f); + + // --> in [0.0f,1.0f) + return nextFloat; + } + + /** + * @return a 32 bit uniformly distributed random number in the closed interval + * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> + * (including <tt>Integer.MIN_VALUE</tt> and <tt>Integer.MAX_VALUE</tt>); + */ + public abstract int nextInt(); + + /** + * @return a 64 bit uniformly distributed random number in the closed interval + * <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt> + * (including <tt>Long.MIN_VALUE</tt> and <tt>Long.MAX_VALUE</tt>). + */ + public long nextLong() { + // concatenate two 32-bit strings into one 64-bit string + return ((nextInt() & 0xFFFFFFFFL) << 32) | (nextInt() & 0xFFFFFFFFL); + } + + /** + * @return a 32 bit uniformly distributed random number in the open unit interval {@code (0.0, 1.0)} (excluding + * 0.0 and 1.0). + */ + public double raw() { + int nextInt; + do { // accept anything but zero + nextInt = nextInt(); // in [Integer.MIN_VALUE,Integer.MAX_VALUE]-interval + } while (nextInt == 0); + + // transform to (0.0,1.0)-interval + // 2.3283064365386963E-10 == 1.0 / Math.pow(2,32) + return (nextInt & 0xFFFFFFFFL) * 2.3283064365386963E-10; + + /* + nextInt == Integer.MAX_VALUE --> 0.49999999976716936 + nextInt == Integer.MIN_VALUE --> 0.5 + nextInt == Integer.MAX_VALUE-1 --> 0.4999999995343387 + nextInt == Integer.MIN_VALUE+1 --> 0.5000000002328306 + nextInt == 1 --> 2.3283064365386963E-10 + nextInt == -1 --> 0.9999999997671694 + nextInt == 2 --> 4.6566128730773926E-10 + nextInt == -2 --> 0.9999999995343387 + */ + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/engine/package-info.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/engine/package-info.java b/core/src/main/java/org/apache/mahout/math/jet/random/engine/package-info.java new file mode 100644 index 0000000..e092010 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/engine/package-info.java @@ -0,0 +1,7 @@ +/** + * Engines generating strong uniformly distributed pseudo-random numbers; + * Needed by all JET probability distributions since they rely on uniform random numbers to generate random + * numbers from their own distribution. + * Thus, the classes of this package are at the core of computational statistics, simulations, Monte Carlo methods, etc. + */ +package org.apache.mahout.math.jet.random.engine;
